unicode_utils 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.txt +11 -5
 - data/cdata/case_ignorable_set +1 -0
 - data/cdata/cat_set_titlecase +1 -0
 - data/cdata/combining_class_map +1 -0
 - data/cdata/cond_lc_map +16 -0
 - data/cdata/cond_uc_map +16 -0
 - data/cdata/prop_set_lowercase +1 -0
 - data/cdata/prop_set_uppercase +1 -0
 - data/cdata/soft_dotted_set +1 -0
 - data/lib/unicode_utils.rb +8 -1
 - data/lib/unicode_utils/case_ignorable_char_q.rb +16 -0
 - data/lib/unicode_utils/cased_char_q.rb +18 -0
 - data/lib/unicode_utils/combining_class.rb +34 -0
 - data/lib/unicode_utils/conditional_casing.rb +164 -0
 - data/lib/unicode_utils/downcase.rb +17 -11
 - data/lib/unicode_utils/lowercase_char_q.rb +15 -0
 - data/lib/unicode_utils/name.rb +5 -4
 - data/lib/unicode_utils/read_codepoint_map.rb +3 -2
 - data/lib/unicode_utils/read_codepoint_set.rb +22 -0
 - data/lib/unicode_utils/read_special_casing_map.rb +3 -2
 - data/lib/unicode_utils/simple_downcase.rb +2 -2
 - data/lib/unicode_utils/simple_upcase.rb +2 -2
 - data/lib/unicode_utils/soft_dotted_char_q.rb +16 -0
 - data/lib/unicode_utils/titlecase_char_q.rb +16 -0
 - data/lib/unicode_utils/upcase.rb +18 -10
 - data/lib/unicode_utils/uppercase_char_q.rb +15 -0
 - data/lib/unicode_utils/version.rb +2 -2
 - data/test/test_unicode_utils.rb +75 -0
 - metadata +19 -2
 
    
        data/README.txt
    CHANGED
    
    | 
         @@ -10,17 +10,21 @@ Install with RubyGems: 
     | 
|
| 
       10 
10 
     | 
    
         | 
| 
       11 
11 
     | 
    
         
             
            Or get the source from Github: http://github.com/lang/unicode_utils
         
     | 
| 
       12 
12 
     | 
    
         | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
      
 13 
     | 
    
         
            +
            UnicodeUtils works with Ruby 1.9.1-preview1 or later. Though a bug
         
     | 
| 
      
 14 
     | 
    
         
            +
            (http://redmine.ruby-lang.org/issues/show/692) in
         
     | 
| 
      
 15 
     | 
    
         
            +
            1.9.1-preview1 prevents UnicodeUtils from loading when
         
     | 
| 
      
 16 
     | 
    
         
            +
            Encoding.default_internal is set (e.g. with -U or -E).
         
     | 
| 
       14 
17 
     | 
    
         | 
| 
       15 
18 
     | 
    
         
             
            == Synopsis
         
     | 
| 
       16 
19 
     | 
    
         | 
| 
       17 
20 
     | 
    
         
             
                require "unicode_utils"
         
     | 
| 
       18 
21 
     | 
    
         | 
| 
       19 
     | 
    
         
            -
                UnicodeUtils.name 
     | 
| 
      
 22 
     | 
    
         
            +
                UnicodeUtils.name("æ") => "LATIN SMALL LETTER AE"
         
     | 
| 
       20 
23 
     | 
    
         | 
| 
       21 
     | 
    
         
            -
                UnicodeUtils.upcase 
     | 
| 
      
 24 
     | 
    
         
            +
                UnicodeUtils.upcase("Straße") => "STRASSE"
         
     | 
| 
      
 25 
     | 
    
         
            +
                UnicodeUtils.upcase("i", :tr) => "İ"
         
     | 
| 
       22 
26 
     | 
    
         | 
| 
       23 
     | 
    
         
            -
                UnicodeUtils.downcase 
     | 
| 
      
 27 
     | 
    
         
            +
                UnicodeUtils.downcase("Ümit") => "ümit"
         
     | 
| 
       24 
28 
     | 
    
         | 
| 
       25 
29 
     | 
    
         
             
            Start with the UnicodeUtils module in the API documentation for
         
     | 
| 
       26 
30 
     | 
    
         
             
            complete documentation.
         
     | 
| 
         @@ -30,7 +34,9 @@ at require time, the library is split up into separate files for 
     | 
|
| 
       30 
34 
     | 
    
         
             
            each function. The +unicode_utils+ library loads them all. If you
         
     | 
| 
       31 
35 
     | 
    
         
             
            need only a specific function, e.g. +upcase+, you can require only
         
     | 
| 
       32 
36 
     | 
    
         
             
            the file <tt>unicode_utils/upcase</tt> to save memory and reduce
         
     | 
| 
       33 
     | 
    
         
            -
            startup time.
         
     | 
| 
      
 37 
     | 
    
         
            +
            startup time. Methods that end in a ? are in a file suffixed with
         
     | 
| 
      
 38 
     | 
    
         
            +
            +_q+, e.g. <tt>lowercase_char?</tt> can be required with
         
     | 
| 
      
 39 
     | 
    
         
            +
            <tt>unicode_utils/lowercase_char_q</tt>.
         
     | 
| 
       34 
40 
     | 
    
         | 
| 
       35 
41 
     | 
    
         
             
            == License
         
     | 
| 
       36 
42 
     | 
    
         | 
| 
         @@ -0,0 +1 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            
         
     | 
| 
         @@ -0,0 +1 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            0001c50001c80001cb0001f2001f88001f89001f8a001f8b001f8c001f8d001f8e001f8f001f98001f99001f9a001f9b001f9c001f9d001f9e001f9f001fa8001fa9001faa001fab001fac001fad001fae001faf001fbc001fcc001ffc
         
     | 
| 
         @@ -0,0 +1 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            00033401000335010003360100033701000338010020d2010020d3010020d8010020d9010020da010020e5010020e6010020ea010020eb01010a390101d1670101d1680101d1690100093c070009bc07000a3c07000abc07000b3c07000cbc0700103707001b3407001c37070030990800309a0800094d090009cd09000a4d09000acd09000b4d09000bcd09000c4d09000ccd09000d4d09000dca09000e3a09000f84090010390900103a0900171409001734090017d209001b4409001baa0900a8060900a8c40900a95309010a3f090005b00a0005b10b0005b20c0005b30d0005b40e0005b50f0005b6100005b7110005b8120005c7120005b9130005ba130005bb140005bc150005bd160005bf170005c1180005c21900fb1e1a00064b1b00064c1c00064d1d0006181e00064e1e0006191f00064f1f00061a200006502000065121000652220006702300071124000c5554000c565b000e3867000e3967000e486b000e496b000e4a6b000e4b6b000eb876000eb976000ec87a000ec97a000eca7a000ecb7a000f7181000f7282000f7a82000f7b82000f7c82000f7d82000f8082000f7484000321ca000322ca000327ca000328ca001dd0ca001dced600031bd8000f39d801d165d801d166d801d16ed801d16fd801d170d801d171d801d172d800302ada000316dc000317dc000318dc000319dc00031cdc00031ddc00031edc00031fdc000320dc000323dc000324dc000325dc000326dc000329dc00032adc00032bdc00032cdc00032ddc00032edc00032fdc000330dc000331dc000332dc000333dc000339dc00033adc00033bdc00033cdc000347dc000348dc000349dc00034ddc00034edc000353dc000354dc000355dc000356dc000359dc00035adc000591dc000596dc00059bdc0005a2dc0005a3dc0005a4dc0005a5dc0005a6dc0005a7dc0005aadc0005c5dc000655dc000656dc00065cdc0006e3dc0006eadc0006eddc000731dc000734dc000737dc000738dc000739dc00073bdc00073cdc00073edc000742dc000744dc000746dc000748dc0007f2dc000952dc000f18dc000f19dc000f35dc000f37dc000fc6dc00108ddc00193bdc001a18dc001b6cdc001dc2dc001dcadc001dcfdc001dffdc0020e8dc0020ecdc0020eddc0020eedc0020efdc00a92bdc00a92cdc00a92ddc0101fddc010a0ddc010a3adc01d17bdc01d17cdc01d17ddc01d17edc01d17fdc01d180dc01d181dc01d182dc01d18adc01d18bdc00059ade0005adde001939de00302dde00302ee000302fe001d16de20005aee40018a9e400302be4000300e6000301e6000302e6000303e6000304e6000305e6000306e6000307e6000308e6000309e600030ae600030be600030ce600030de600030ee600030fe6000310e6000311e6000312e6000313e6000314e600033de600033ee600033fe6000340e6000341e6000342e6000343e6000344e6000346e600034ae600034be600034ce6000350e6000351e6000352e6000357e600035be6000363e6000364e6000365e6000366e6000367e6000368e6000369e600036ae600036be600036ce600036de600036ee600036fe6000483e6000484e6000485e6000486e6000487e6000592e6000593e6000594e6000595e6000597e6000598e6000599e600059ce600059de600059ee600059fe60005a0e60005a1e60005a8e60005a9e60005abe60005ace60005afe60005c4e6000610e6000611e6000612e6000613e6000614e6000615e6000616e6000617e6000653e6000654e6000657e6000658e6000659e600065ae600065be600065de600065ee60006d6e60006d7e60006d8e60006d9e60006dae60006dbe60006dce60006dfe60006e0e60006e1e60006e2e60006e4e60006e7e60006e8e60006ebe60006ece6000730e6000732e6000733e6000735e6000736e600073ae600073de600073fe6000740e6000741e6000743e6000745e6000747e6000749e600074ae60007ebe60007ece60007ede60007eee60007efe60007f0e60007f1e60007f3e6000951e6000953e6000954e6000f82e6000f83e6000f86e6000f87e600135fe60017dde600193ae6001a17e6001b6be6001b6de6001b6ee6001b6fe6001b70e6001b71e6001b72e6001b73e6001dc0e6001dc1e6001dc3e6001dc4e6001dc5e6001dc6e6001dc7e6001dc8e6001dc9e6001dcbe6001dcce6001dd1e6001dd2e6001dd3e6001dd4e6001dd5e6001dd6e6001dd7e6001dd8e6001dd9e6001ddae6001ddbe6001ddce6001ddde6001ddee6001ddfe6001de0e6001de1e6001de2e6001de3e6001de4e6001de5e6001de6e6001dfee60020d0e60020d1e60020d4e60020d5e60020d6e60020d7e60020dbe60020dce60020e1e60020e7e60020e9e60020f0e6002de0e6002de1e6002de2e6002de3e6002de4e6002de5e6002de6e6002de7e6002de8e6002de9e6002deae6002debe6002dece6002dede6002deee6002defe6002df0e6002df1e6002df2e6002df3e6002df4e6002df5e6002df6e6002df7e6002df8e6002df9e6002dfae6002dfbe6002dfce6002dfde6002dfee6002dffe600a66fe600a67ce600a67de600fe20e600fe21e600fe22e600fe23e600fe24e600fe25e600fe26e6010a0fe6010a38e601d185e601d186e601d187e601d188e601d189e601d1aae601d1abe601d1ace601d1ade601d242e601d243e601d244e6000315e800031ae8000358e800302ce800035ce900035fe9000362e900035dea00035eea000360ea000361ea001dcdea000345f0
         
     | 
    
        data/cdata/cond_lc_map
    ADDED
    
    | 
         @@ -0,0 +1,16 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            000049;000069,000307;lt;More_Above
         
     | 
| 
      
 2 
     | 
    
         
            +
            000049;000131;az;Not_Before_Dot
         
     | 
| 
      
 3 
     | 
    
         
            +
            000049;000131;tr;Not_Before_Dot
         
     | 
| 
      
 4 
     | 
    
         
            +
            00004a;00006a,000307;lt;More_Above
         
     | 
| 
      
 5 
     | 
    
         
            +
            000069;000069;az;
         
     | 
| 
      
 6 
     | 
    
         
            +
            000069;000069;tr;
         
     | 
| 
      
 7 
     | 
    
         
            +
            0000cc;000069,000307,000300;lt;
         
     | 
| 
      
 8 
     | 
    
         
            +
            0000cd;000069,000307,000301;lt;
         
     | 
| 
      
 9 
     | 
    
         
            +
            000128;000069,000307,000303;lt;
         
     | 
| 
      
 10 
     | 
    
         
            +
            00012e;00012f,000307;lt;More_Above
         
     | 
| 
      
 11 
     | 
    
         
            +
            000130;000069;az;
         
     | 
| 
      
 12 
     | 
    
         
            +
            000130;000069;tr;
         
     | 
| 
      
 13 
     | 
    
         
            +
            000307;000307;lt;After_Soft_Dotted
         
     | 
| 
      
 14 
     | 
    
         
            +
            000307;;az;After_I
         
     | 
| 
      
 15 
     | 
    
         
            +
            000307;;tr;After_I
         
     | 
| 
      
 16 
     | 
    
         
            +
            0003a3;0003c2;;Final_Sigma
         
     | 
    
        data/cdata/cond_uc_map
    ADDED
    
    | 
         @@ -0,0 +1,16 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            000049;000049;az;Not_Before_Dot
         
     | 
| 
      
 2 
     | 
    
         
            +
            000049;000049;lt;More_Above
         
     | 
| 
      
 3 
     | 
    
         
            +
            000049;000049;tr;Not_Before_Dot
         
     | 
| 
      
 4 
     | 
    
         
            +
            00004a;00004a;lt;More_Above
         
     | 
| 
      
 5 
     | 
    
         
            +
            000069;000130;az;
         
     | 
| 
      
 6 
     | 
    
         
            +
            000069;000130;tr;
         
     | 
| 
      
 7 
     | 
    
         
            +
            0000cc;0000cc;lt;
         
     | 
| 
      
 8 
     | 
    
         
            +
            0000cd;0000cd;lt;
         
     | 
| 
      
 9 
     | 
    
         
            +
            000128;000128;lt;
         
     | 
| 
      
 10 
     | 
    
         
            +
            00012e;00012e;lt;More_Above
         
     | 
| 
      
 11 
     | 
    
         
            +
            000130;000130;az;
         
     | 
| 
      
 12 
     | 
    
         
            +
            000130;000130;tr;
         
     | 
| 
      
 13 
     | 
    
         
            +
            000307;000307;az;After_I
         
     | 
| 
      
 14 
     | 
    
         
            +
            000307;000307;tr;After_I
         
     | 
| 
      
 15 
     | 
    
         
            +
            000307;;lt;After_Soft_Dotted
         
     | 
| 
      
 16 
     | 
    
         
            +
            0003a3;0003a3;;Final_Sigma
         
     | 
| 
         @@ -0,0 +1 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            00006100006200006300006400006500006600006700006800006900006a00006b00006c00006d00006e00006f00007000007100007200007300007400007500007600007700007800007900007a0000aa0000b50000ba0000df0000e00000e10000e20000e30000e40000e50000e60000e70000e80000e90000ea0000eb0000ec0000ed0000ee0000ef0000f00000f10000f20000f30000f40000f50000f60000f80000f90000fa0000fb0000fc0000fd0000fe0000ff00010100010300010500010700010900010b00010d00010f00011100011300011500011700011900011b00011d00011f00012100012300012500012700012900012b00012d00012f00013100013300013500013700013800013a00013c00013e00014000014200014400014600014800014900014b00014d00014f00015100015300015500015700015900015b00015d00015f00016100016300016500016700016900016b00016d00016f00017100017300017500017700017a00017c00017e00017f00018000018300018500018800018c00018d00019200019500019900019a00019b00019e0001a10001a30001a50001a80001aa0001ab0001ad0001b00001b40001b60001b90001ba0001bd0001be0001bf0001c60001c90001cc0001ce0001d00001d20001d40001d60001d80001da0001dc0001dd0001df0001e10001e30001e50001e70001e90001eb0001ed0001ef0001f00001f30001f50001f90001fb0001fd0001ff00020100020300020500020700020900020b00020d00020f00021100021300021500021700021900021b00021d00021f00022100022300022500022700022900022b00022d00022f00023100023300023400023500023600023700023800023900023c00023f00024000024200024700024900024b00024d00024f00025000025100025200025300025400025500025600025700025800025900025a00025b00025c00025d00025e00025f00026000026100026200026300026400026500026600026700026800026900026a00026b00026c00026d00026e00026f00027000027100027200027300027400027500027600027700027800027900027a00027b00027c00027d00027e00027f00028000028100028200028300028400028500028600028700028800028900028a00028b00028c00028d00028e00028f00029000029100029200029300029500029600029700029800029900029a00029b00029c00029d00029e00029f0002a00002a10002a20002a30002a40002a50002a60002a70002a80002a90002aa0002ab0002ac0002ad0002ae0002af0002b00002b10002b20002b30002b40002b50002b60002b70002b80002c00002c10002e00002e10002e20002e30002e400034500037100037300037700037a00037b00037c00037d0003900003ac0003ad0003ae0003af0003b00003b10003b20003b30003b40003b50003b60003b70003b80003b90003ba0003bb0003bc0003bd0003be0003bf0003c00003c10003c20003c30003c40003c50003c60003c70003c80003c90003ca0003cb0003cc0003cd0003ce0003d00003d10003d50003d60003d70003d90003db0003dd0003df0003e10003e30003e50003e70003e90003eb0003ed0003ef0003f00003f10003f20003f30003f50003f80003fb0003fc00043000043100043200043300043400043500043600043700043800043900043a00043b00043c00043d00043e00043f00044000044100044200044300044400044500044600044700044800044900044a00044b00044c00044d00044e00044f00045000045100045200045300045400045500045600045700045800045900045a00045b00045c00045d00045e00045f00046100046300046500046700046900046b00046d00046f00047100047300047500047700047900047b00047d00047f00048100048b00048d00048f00049100049300049500049700049900049b00049d00049f0004a10004a30004a50004a70004a90004ab0004ad0004af0004b10004b30004b50004b70004b90004bb0004bd0004bf0004c20004c40004c60004c80004ca0004cc0004ce0004cf0004d10004d30004d50004d70004d90004db0004dd0004df0004e10004e30004e50004e70004e90004eb0004ed0004ef0004f10004f30004f50004f70004f90004fb0004fd0004ff00050100050300050500050700050900050b00050d00050f00051100051300051500051700051900051b00051d00051f00052100052300056100056200056300056400056500056600056700056800056900056a00056b00056c00056d00056e00056f00057000057100057200057300057400057500057600057700057800057900057a00057b00057c00057d00057e00057f000580000581000582000583000584000585000586000587001d00001d01001d02001d03001d04001d05001d06001d07001d08001d09001d0a001d0b001d0c001d0d001d0e001d0f001d10001d11001d12001d13001d14001d15001d16001d17001d18001d19001d1a001d1b001d1c001d1d001d1e001d1f001d20001d21001d22001d23001d24001d25001d26001d27001d28001d29001d2a001d2b001d2c001d2d001d2e001d2f001d30001d31001d32001d33001d34001d35001d36001d37001d38001d39001d3a001d3b001d3c001d3d001d3e001d3f001d40001d41001d42001d43001d44001d45001d46001d47001d48001d49001d4a001d4b001d4c001d4d001d4e001d4f001d50001d51001d52001d53001d54001d55001d56001d57001d58001d59001d5a001d5b001d5c001d5d001d5e001d5f001d60001d61001d62001d63001d64001d65001d66001d67001d68001d69001d6a001d6b001d6c001d6d001d6e001d6f001d70001d71001d72001d73001d74001d75001d76001d77001d78001d79001d7a001d7b001d7c001d7d001d7e001d7f001d80001d81001d82001d83001d84001d85001d86001d87001d88001d89001d8a001d8b001d8c001d8d001d8e001d8f001d90001d91001d92001d93001d94001d95001d96001d97001d98001d99001d9a001d9b001d9c001d9d001d9e001d9f001da0001da1001da2001da3001da4001da5001da6001da7001da8001da9001daa001dab001dac001dad001dae001daf001db0001db1001db2001db3001db4001db5001db6001db7001db8001db9001dba001dbb001dbc001dbd001dbe001dbf001e01001e03001e05001e07001e09001e0b001e0d001e0f001e11001e13001e15001e17001e19001e1b001e1d001e1f001e21001e23001e25001e27001e29001e2b001e2d001e2f001e31001e33001e35001e37001e39001e3b001e3d001e3f001e41001e43001e45001e47001e49001e4b001e4d001e4f001e51001e53001e55001e57001e59001e5b001e5d001e5f001e61001e63001e65001e67001e69001e6b001e6d001e6f001e71001e73001e75001e77001e79001e7b001e7d001e7f001e81001e83001e85001e87001e89001e8b001e8d001e8f001e91001e93001e95001e96001e97001e98001e99001e9a001e9b001e9c001e9d001e9f001ea1001ea3001ea5001ea7001ea9001eab001ead001eaf001eb1001eb3001eb5001eb7001eb9001ebb001ebd001ebf001ec1001ec3001ec5001ec7001ec9001ecb001ecd001ecf001ed1001ed3001ed5001ed7001ed9001edb001edd001edf001ee1001ee3001ee5001ee7001ee9001eeb001eed001eef001ef1001ef3001ef5001ef7001ef9001efb001efd001eff001f00001f01001f02001f03001f04001f05001f06001f07001f10001f11001f12001f13001f14001f15001f20001f21001f22001f23001f24001f25001f26001f27001f30001f31001f32001f33001f34001f35001f36001f37001f40001f41001f42001f43001f44001f45001f50001f51001f52001f53001f54001f55001f56001f57001f60001f61001f62001f63001f64001f65001f66001f67001f70001f71001f72001f73001f74001f75001f76001f77001f78001f79001f7a001f7b001f7c001f7d001f80001f81001f82001f83001f84001f85001f86001f87001f90001f91001f92001f93001f94001f95001f96001f97001fa0001fa1001fa2001fa3001fa4001fa5001fa6001fa7001fb0001fb1001fb2001fb3001fb4001fb6001fb7001fbe001fc2001fc3001fc4001fc6001fc7001fd0001fd1001fd2001fd3001fd6001fd7001fe0001fe1001fe2001fe3001fe4001fe5001fe6001fe7001ff2001ff3001ff4001ff6001ff700207100207f00209000209100209200209300209400210a00210e00210f00211300212f00213400213900213c00213d00214600214700214800214900214e00217000217100217200217300217400217500217600217700217800217900217a00217b00217c00217d00217e00217f0021840024d00024d10024d20024d30024d40024d50024d60024d70024d80024d90024da0024db0024dc0024dd0024de0024df0024e00024e10024e20024e30024e40024e50024e60024e70024e80024e9002c30002c31002c32002c33002c34002c35002c36002c37002c38002c39002c3a002c3b002c3c002c3d002c3e002c3f002c40002c41002c42002c43002c44002c45002c46002c47002c48002c49002c4a002c4b002c4c002c4d002c4e002c4f002c50002c51002c52002c53002c54002c55002c56002c57002c58002c59002c5a002c5b002c5c002c5d002c5e002c61002c65002c66002c68002c6a002c6c002c71002c73002c74002c76002c77002c78002c79002c7a002c7b002c7c002c7d002c81002c83002c85002c87002c89002c8b002c8d002c8f002c91002c93002c95002c97002c99002c9b002c9d002c9f002ca1002ca3002ca5002ca7002ca9002cab002cad002caf002cb1002cb3002cb5002cb7002cb9002cbb002cbd002cbf002cc1002cc3002cc5002cc7002cc9002ccb002ccd002ccf002cd1002cd3002cd5002cd7002cd9002cdb002cdd002cdf002ce1002ce3002ce4002d00002d01002d02002d03002d04002d05002d06002d07002d08002d09002d0a002d0b002d0c002d0d002d0e002d0f002d10002d11002d12002d13002d14002d15002d16002d17002d18002d19002d1a002d1b002d1c002d1d002d1e002d1f002d20002d21002d22002d23002d24002d2500a64100a64300a64500a64700a64900a64b00a64d00a64f00a65100a65300a65500a65700a65900a65b00a65d00a65f00a66300a66500a66700a66900a66b00a66d00a68100a68300a68500a68700a68900a68b00a68d00a68f00a69100a69300a69500a69700a72300a72500a72700a72900a72b00a72d00a72f00a73000a73100a73300a73500a73700a73900a73b00a73d00a73f00a74100a74300a74500a74700a74900a74b00a74d00a74f00a75100a75300a75500a75700a75900a75b00a75d00a75f00a76100a76300a76500a76700a76900a76b00a76d00a76f00a77000a77100a77200a77300a77400a77500a77600a77700a77800a77a00a77c00a77f00a78100a78300a78500a78700a78c00fb0000fb0100fb0200fb0300fb0400fb0500fb0600fb1300fb1400fb1500fb1600fb1700ff4100ff4200ff4300ff4400ff4500ff4600ff4700ff4800ff4900ff4a00ff4b00ff4c00ff4d00ff4e00ff4f00ff5000ff5100ff5200ff5300ff5400ff5500ff5600ff5700ff5800ff5900ff5a01042801042901042a01042b01042c01042d01042e01042f01043001043101043201043301043401043501043601043701043801043901043a01043b01043c01043d01043e01043f01044001044101044201044301044401044501044601044701044801044901044a01044b01044c01044d01044e01044f01d41a01d41b01d41c01d41d01d41e01d41f01d42001d42101d42201d42301d42401d42501d42601d42701d42801d42901d42a01d42b01d42c01d42d01d42e01d42f01d43001d43101d43201d43301d44e01d44f01d45001d45101d45201d45301d45401d45601d45701d45801d45901d45a01d45b01d45c01d45d01d45e01d45f01d46001d46101d46201d46301d46401d46501d46601d46701d48201d48301d48401d48501d48601d48701d48801d48901d48a01d48b01d48c01d48d01d48e01d48f01d49001d49101d49201d49301d49401d49501d49601d49701d49801d49901d49a01d49b01d4b601d4b701d4b801d4b901d4bb01d4bd01d4be01d4bf01d4c001d4c101d4c201d4c301d4c501d4c601d4c701d4c801d4c901d4ca01d4cb01d4cc01d4cd01d4ce01d4cf01d4ea01d4eb01d4ec01d4ed01d4ee01d4ef01d4f001d4f101d4f201d4f301d4f401d4f501d4f601d4f701d4f801d4f901d4fa01d4fb01d4fc01d4fd01d4fe01d4ff01d50001d50101d50201d50301d51e01d51f01d52001d52101d52201d52301d52401d52501d52601d52701d52801d52901d52a01d52b01d52c01d52d01d52e01d52f01d53001d53101d53201d53301d53401d53501d53601d53701d55201d55301d55401d55501d55601d55701d55801d55901d55a01d55b01d55c01d55d01d55e01d55f01d56001d56101d56201d56301d56401d56501d56601d56701d56801d56901d56a01d56b01d58601d58701d58801d58901d58a01d58b01d58c01d58d01d58e01d58f01d59001d59101d59201d59301d59401d59501d59601d59701d59801d59901d59a01d59b01d59c01d59d01d59e01d59f01d5ba01d5bb01d5bc01d5bd01d5be01d5bf01d5c001d5c101d5c201d5c301d5c401d5c501d5c601d5c701d5c801d5c901d5ca01d5cb01d5cc01d5cd01d5ce01d5cf01d5d001d5d101d5d201d5d301d5ee01d5ef01d5f001d5f101d5f201d5f301d5f401d5f501d5f601d5f701d5f801d5f901d5fa01d5fb01d5fc01d5fd01d5fe01d5ff01d60001d60101d60201d60301d60401d60501d60601d60701d62201d62301d62401d62501d62601d62701d62801d62901d62a01d62b01d62c01d62d01d62e01d62f01d63001d63101d63201d63301d63401d63501d63601d63701d63801d63901d63a01d63b01d65601d65701d65801d65901d65a01d65b01d65c01d65d01d65e01d65f01d66001d66101d66201d66301d66401d66501d66601d66701d66801d66901d66a01d66b01d66c01d66d01d66e01d66f01d68a01d68b01d68c01d68d01d68e01d68f01d69001d69101d69201d69301d69401d69501d69601d69701d69801d69901d69a01d69b01d69c01d69d01d69e01d69f01d6a001d6a101d6a201d6a301d6a401d6a501d6c201d6c301d6c401d6c501d6c601d6c701d6c801d6c901d6ca01d6cb01d6cc01d6cd01d6ce01d6cf01d6d001d6d101d6d201d6d301d6d401d6d501d6d601d6d701d6d801d6d901d6da01d6dc01d6dd01d6de01d6df01d6e001d6e101d6fc01d6fd01d6fe01d6ff01d70001d70101d70201d70301d70401d70501d70601d70701d70801d70901d70a01d70b01d70c01d70d01d70e01d70f01d71001d71101d71201d71301d71401d71601d71701d71801d71901d71a01d71b01d73601d73701d73801d73901d73a01d73b01d73c01d73d01d73e01d73f01d74001d74101d74201d74301d74401d74501d74601d74701d74801d74901d74a01d74b01d74c01d74d01d74e01d75001d75101d75201d75301d75401d75501d77001d77101d77201d77301d77401d77501d77601d77701d77801d77901d77a01d77b01d77c01d77d01d77e01d77f01d78001d78101d78201d78301d78401d78501d78601d78701d78801d78a01d78b01d78c01d78d01d78e01d78f01d7aa01d7ab01d7ac01d7ad01d7ae01d7af01d7b001d7b101d7b201d7b301d7b401d7b501d7b601d7b701d7b801d7b901d7ba01d7bb01d7bc01d7bd01d7be01d7bf01d7c001d7c101d7c201d7c401d7c501d7c601d7c701d7c801d7c901d7cb
         
     | 
| 
         @@ -0,0 +1 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            00004100004200004300004400004500004600004700004800004900004a00004b00004c00004d00004e00004f00005000005100005200005300005400005500005600005700005800005900005a0000c00000c10000c20000c30000c40000c50000c60000c70000c80000c90000ca0000cb0000cc0000cd0000ce0000cf0000d00000d10000d20000d30000d40000d50000d60000d80000d90000da0000db0000dc0000dd0000de00010000010200010400010600010800010a00010c00010e00011000011200011400011600011800011a00011c00011e00012000012200012400012600012800012a00012c00012e00013000013200013400013600013900013b00013d00013f00014100014300014500014700014a00014c00014e00015000015200015400015600015800015a00015c00015e00016000016200016400016600016800016a00016c00016e00017000017200017400017600017800017900017b00017d00018100018200018400018600018700018900018a00018b00018e00018f00019000019100019300019400019600019700019800019c00019d00019f0001a00001a20001a40001a60001a70001a90001ac0001ae0001af0001b10001b20001b30001b50001b70001b80001bc0001c40001c70001ca0001cd0001cf0001d10001d30001d50001d70001d90001db0001de0001e00001e20001e40001e60001e80001ea0001ec0001ee0001f10001f40001f60001f70001f80001fa0001fc0001fe00020000020200020400020600020800020a00020c00020e00021000021200021400021600021800021a00021c00021e00022000022200022400022600022800022a00022c00022e00023000023200023a00023b00023d00023e00024100024300024400024500024600024800024a00024c00024e00037000037200037600038600038800038900038a00038c00038e00038f00039100039200039300039400039500039600039700039800039900039a00039b00039c00039d00039e00039f0003a00003a10003a30003a40003a50003a60003a70003a80003a90003aa0003ab0003cf0003d20003d30003d40003d80003da0003dc0003de0003e00003e20003e40003e60003e80003ea0003ec0003ee0003f40003f70003f90003fa0003fd0003fe0003ff00040000040100040200040300040400040500040600040700040800040900040a00040b00040c00040d00040e00040f00041000041100041200041300041400041500041600041700041800041900041a00041b00041c00041d00041e00041f00042000042100042200042300042400042500042600042700042800042900042a00042b00042c00042d00042e00042f00046000046200046400046600046800046a00046c00046e00047000047200047400047600047800047a00047c00047e00048000048a00048c00048e00049000049200049400049600049800049a00049c00049e0004a00004a20004a40004a60004a80004aa0004ac0004ae0004b00004b20004b40004b60004b80004ba0004bc0004be0004c00004c10004c30004c50004c70004c90004cb0004cd0004d00004d20004d40004d60004d80004da0004dc0004de0004e00004e20004e40004e60004e80004ea0004ec0004ee0004f00004f20004f40004f60004f80004fa0004fc0004fe00050000050200050400050600050800050a00050c00050e00051000051200051400051600051800051a00051c00051e00052000052200053100053200053300053400053500053600053700053800053900053a00053b00053c00053d00053e00053f00054000054100054200054300054400054500054600054700054800054900054a00054b00054c00054d00054e00054f0005500005510005520005530005540005550005560010a00010a10010a20010a30010a40010a50010a60010a70010a80010a90010aa0010ab0010ac0010ad0010ae0010af0010b00010b10010b20010b30010b40010b50010b60010b70010b80010b90010ba0010bb0010bc0010bd0010be0010bf0010c00010c10010c20010c30010c40010c5001e00001e02001e04001e06001e08001e0a001e0c001e0e001e10001e12001e14001e16001e18001e1a001e1c001e1e001e20001e22001e24001e26001e28001e2a001e2c001e2e001e30001e32001e34001e36001e38001e3a001e3c001e3e001e40001e42001e44001e46001e48001e4a001e4c001e4e001e50001e52001e54001e56001e58001e5a001e5c001e5e001e60001e62001e64001e66001e68001e6a001e6c001e6e001e70001e72001e74001e76001e78001e7a001e7c001e7e001e80001e82001e84001e86001e88001e8a001e8c001e8e001e90001e92001e94001e9e001ea0001ea2001ea4001ea6001ea8001eaa001eac001eae001eb0001eb2001eb4001eb6001eb8001eba001ebc001ebe001ec0001ec2001ec4001ec6001ec8001eca001ecc001ece001ed0001ed2001ed4001ed6001ed8001eda001edc001ede001ee0001ee2001ee4001ee6001ee8001eea001eec001eee001ef0001ef2001ef4001ef6001ef8001efa001efc001efe001f08001f09001f0a001f0b001f0c001f0d001f0e001f0f001f18001f19001f1a001f1b001f1c001f1d001f28001f29001f2a001f2b001f2c001f2d001f2e001f2f001f38001f39001f3a001f3b001f3c001f3d001f3e001f3f001f48001f49001f4a001f4b001f4c001f4d001f59001f5b001f5d001f5f001f68001f69001f6a001f6b001f6c001f6d001f6e001f6f001fb8001fb9001fba001fbb001fc8001fc9001fca001fcb001fd8001fd9001fda001fdb001fe8001fe9001fea001feb001fec001ff8001ff9001ffa001ffb00210200210700210b00210c00210d00211000211100211200211500211900211a00211b00211c00211d00212400212600212800212a00212b00212c00212d00213000213100213200213300213e00213f00214500216000216100216200216300216400216500216600216700216800216900216a00216b00216c00216d00216e00216f0021830024b60024b70024b80024b90024ba0024bb0024bc0024bd0024be0024bf0024c00024c10024c20024c30024c40024c50024c60024c70024c80024c90024ca0024cb0024cc0024cd0024ce0024cf002c00002c01002c02002c03002c04002c05002c06002c07002c08002c09002c0a002c0b002c0c002c0d002c0e002c0f002c10002c11002c12002c13002c14002c15002c16002c17002c18002c19002c1a002c1b002c1c002c1d002c1e002c1f002c20002c21002c22002c23002c24002c25002c26002c27002c28002c29002c2a002c2b002c2c002c2d002c2e002c60002c62002c63002c64002c67002c69002c6b002c6d002c6e002c6f002c72002c75002c80002c82002c84002c86002c88002c8a002c8c002c8e002c90002c92002c94002c96002c98002c9a002c9c002c9e002ca0002ca2002ca4002ca6002ca8002caa002cac002cae002cb0002cb2002cb4002cb6002cb8002cba002cbc002cbe002cc0002cc2002cc4002cc6002cc8002cca002ccc002cce002cd0002cd2002cd4002cd6002cd8002cda002cdc002cde002ce0002ce200a64000a64200a64400a64600a64800a64a00a64c00a64e00a65000a65200a65400a65600a65800a65a00a65c00a65e00a66200a66400a66600a66800a66a00a66c00a68000a68200a68400a68600a68800a68a00a68c00a68e00a69000a69200a69400a69600a72200a72400a72600a72800a72a00a72c00a72e00a73200a73400a73600a73800a73a00a73c00a73e00a74000a74200a74400a74600a74800a74a00a74c00a74e00a75000a75200a75400a75600a75800a75a00a75c00a75e00a76000a76200a76400a76600a76800a76a00a76c00a76e00a77900a77b00a77d00a77e00a78000a78200a78400a78600a78b00ff2100ff2200ff2300ff2400ff2500ff2600ff2700ff2800ff2900ff2a00ff2b00ff2c00ff2d00ff2e00ff2f00ff3000ff3100ff3200ff3300ff3400ff3500ff3600ff3700ff3800ff3900ff3a01040001040101040201040301040401040501040601040701040801040901040a01040b01040c01040d01040e01040f01041001041101041201041301041401041501041601041701041801041901041a01041b01041c01041d01041e01041f01042001042101042201042301042401042501042601042701d40001d40101d40201d40301d40401d40501d40601d40701d40801d40901d40a01d40b01d40c01d40d01d40e01d40f01d41001d41101d41201d41301d41401d41501d41601d41701d41801d41901d43401d43501d43601d43701d43801d43901d43a01d43b01d43c01d43d01d43e01d43f01d44001d44101d44201d44301d44401d44501d44601d44701d44801d44901d44a01d44b01d44c01d44d01d46801d46901d46a01d46b01d46c01d46d01d46e01d46f01d47001d47101d47201d47301d47401d47501d47601d47701d47801d47901d47a01d47b01d47c01d47d01d47e01d47f01d48001d48101d49c01d49e01d49f01d4a201d4a501d4a601d4a901d4aa01d4ab01d4ac01d4ae01d4af01d4b001d4b101d4b201d4b301d4b401d4b501d4d001d4d101d4d201d4d301d4d401d4d501d4d601d4d701d4d801d4d901d4da01d4db01d4dc01d4dd01d4de01d4df01d4e001d4e101d4e201d4e301d4e401d4e501d4e601d4e701d4e801d4e901d50401d50501d50701d50801d50901d50a01d50d01d50e01d50f01d51001d51101d51201d51301d51401d51601d51701d51801d51901d51a01d51b01d51c01d53801d53901d53b01d53c01d53d01d53e01d54001d54101d54201d54301d54401d54601d54a01d54b01d54c01d54d01d54e01d54f01d55001d56c01d56d01d56e01d56f01d57001d57101d57201d57301d57401d57501d57601d57701d57801d57901d57a01d57b01d57c01d57d01d57e01d57f01d58001d58101d58201d58301d58401d58501d5a001d5a101d5a201d5a301d5a401d5a501d5a601d5a701d5a801d5a901d5aa01d5ab01d5ac01d5ad01d5ae01d5af01d5b001d5b101d5b201d5b301d5b401d5b501d5b601d5b701d5b801d5b901d5d401d5d501d5d601d5d701d5d801d5d901d5da01d5db01d5dc01d5dd01d5de01d5df01d5e001d5e101d5e201d5e301d5e401d5e501d5e601d5e701d5e801d5e901d5ea01d5eb01d5ec01d5ed01d60801d60901d60a01d60b01d60c01d60d01d60e01d60f01d61001d61101d61201d61301d61401d61501d61601d61701d61801d61901d61a01d61b01d61c01d61d01d61e01d61f01d62001d62101d63c01d63d01d63e01d63f01d64001d64101d64201d64301d64401d64501d64601d64701d64801d64901d64a01d64b01d64c01d64d01d64e01d64f01d65001d65101d65201d65301d65401d65501d67001d67101d67201d67301d67401d67501d67601d67701d67801d67901d67a01d67b01d67c01d67d01d67e01d67f01d68001d68101d68201d68301d68401d68501d68601d68701d68801d68901d6a801d6a901d6aa01d6ab01d6ac01d6ad01d6ae01d6af01d6b001d6b101d6b201d6b301d6b401d6b501d6b601d6b701d6b801d6b901d6ba01d6bb01d6bc01d6bd01d6be01d6bf01d6c001d6e201d6e301d6e401d6e501d6e601d6e701d6e801d6e901d6ea01d6eb01d6ec01d6ed01d6ee01d6ef01d6f001d6f101d6f201d6f301d6f401d6f501d6f601d6f701d6f801d6f901d6fa01d71c01d71d01d71e01d71f01d72001d72101d72201d72301d72401d72501d72601d72701d72801d72901d72a01d72b01d72c01d72d01d72e01d72f01d73001d73101d73201d73301d73401d75601d75701d75801d75901d75a01d75b01d75c01d75d01d75e01d75f01d76001d76101d76201d76301d76401d76501d76601d76701d76801d76901d76a01d76b01d76c01d76d01d76e01d79001d79101d79201d79301d79401d79501d79601d79701d79801d79901d79a01d79b01d79c01d79d01d79e01d79f01d7a001d7a101d7a201d7a301d7a401d7a501d7a601d7a701d7a801d7ca
         
     | 
| 
         @@ -0,0 +1 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            00006900006a00012f00024900026800029d0002b20003f3000456000458001d62001d96001da4001da8001e2d001ecb002071002148002149002c7c01d42201d42301d45601d45701d48a01d48b01d4be01d4bf01d4f201d4f301d52601d52701d55a01d55b01d58e01d58f01d5c201d5c301d5f601d5f701d62a01d62b01d65e01d65f01d69201d693
         
     | 
    
        data/lib/unicode_utils.rb
    CHANGED
    
    | 
         @@ -1,4 +1,4 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            # encoding: utf-8
         
     | 
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
3 
     | 
    
         
             
            require "unicode_utils/version"
         
     | 
| 
       4 
4 
     | 
    
         
             
            require "unicode_utils/name"
         
     | 
| 
         @@ -6,3 +6,10 @@ require "unicode_utils/simple_upcase" 
     | 
|
| 
       6 
6 
     | 
    
         
             
            require "unicode_utils/simple_downcase"
         
     | 
| 
       7 
7 
     | 
    
         
             
            require "unicode_utils/upcase"
         
     | 
| 
       8 
8 
     | 
    
         
             
            require "unicode_utils/downcase"
         
     | 
| 
      
 9 
     | 
    
         
            +
            require "unicode_utils/titlecase_char_q"
         
     | 
| 
      
 10 
     | 
    
         
            +
            require "unicode_utils/lowercase_char_q"
         
     | 
| 
      
 11 
     | 
    
         
            +
            require "unicode_utils/uppercase_char_q"
         
     | 
| 
      
 12 
     | 
    
         
            +
            require "unicode_utils/cased_char_q"
         
     | 
| 
      
 13 
     | 
    
         
            +
            require "unicode_utils/case_ignorable_char_q"
         
     | 
| 
      
 14 
     | 
    
         
            +
            require "unicode_utils/soft_dotted_char_q"
         
     | 
| 
      
 15 
     | 
    
         
            +
            require "unicode_utils/combining_class"
         
     | 
| 
         @@ -0,0 +1,16 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require "unicode_utils/read_codepoint_set"
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            module UnicodeUtils
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
              CASE_IGNORABLE_SET = Impl.read_codepoint_set("case_ignorable_set") # :nodoc:
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
              # Returns true if the given character is case-ignorable as defined
         
     | 
| 
      
 10 
     | 
    
         
            +
              # by Unicode 5.0, section 3.13.
         
     | 
| 
      
 11 
     | 
    
         
            +
              def case_ignorable_char?(char)
         
     | 
| 
      
 12 
     | 
    
         
            +
                CASE_IGNORABLE_SET.include?(char.ord)
         
     | 
| 
      
 13 
     | 
    
         
            +
              end
         
     | 
| 
      
 14 
     | 
    
         
            +
              module_function :case_ignorable_char?
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,18 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require "unicode_utils/lowercase_char_q"
         
     | 
| 
      
 4 
     | 
    
         
            +
            require "unicode_utils/uppercase_char_q"
         
     | 
| 
      
 5 
     | 
    
         
            +
            require "unicode_utils/titlecase_char_q"
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            module UnicodeUtils
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
              # A cased char is a character that has the Unicode property
         
     | 
| 
      
 10 
     | 
    
         
            +
              # Lowercase or Uppercase or the general category Titlecase_Letter.
         
     | 
| 
      
 11 
     | 
    
         
            +
              #
         
     | 
| 
      
 12 
     | 
    
         
            +
              # See also: lowercase_char?, uppercase_char?, titlecase_char?
         
     | 
| 
      
 13 
     | 
    
         
            +
              def cased_char?(char)
         
     | 
| 
      
 14 
     | 
    
         
            +
                lowercase_char?(char) || uppercase_char?(char) || titlecase_char?(char)
         
     | 
| 
      
 15 
     | 
    
         
            +
              end
         
     | 
| 
      
 16 
     | 
    
         
            +
              module_function :cased_char?
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,34 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module UnicodeUtils
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
              module Impl # :nodoc:
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
                def self.read_combining_class_map
         
     | 
| 
      
 8 
     | 
    
         
            +
                  path = File.join(File.dirname(__FILE__),
         
     | 
| 
      
 9 
     | 
    
         
            +
                                   "..", "..", "cdata", "combining_class_map")
         
     | 
| 
      
 10 
     | 
    
         
            +
                  Hash.new.tap { |map|
         
     | 
| 
      
 11 
     | 
    
         
            +
                    File.open(path, "r:US-ASCII:-") do |input|
         
     | 
| 
      
 12 
     | 
    
         
            +
                      buffer = "x" * 6
         
     | 
| 
      
 13 
     | 
    
         
            +
                      buffer.force_encoding(Encoding::US_ASCII)
         
     | 
| 
      
 14 
     | 
    
         
            +
                      cc_buffer = "x" * 2
         
     | 
| 
      
 15 
     | 
    
         
            +
                      cc_buffer.force_encoding(Encoding::US_ASCII)
         
     | 
| 
      
 16 
     | 
    
         
            +
                      while input.read(6, buffer)
         
     | 
| 
      
 17 
     | 
    
         
            +
                        map[buffer.to_i(16)] = input.read(2, cc_buffer).to_i(16)
         
     | 
| 
      
 18 
     | 
    
         
            +
                      end
         
     | 
| 
      
 19 
     | 
    
         
            +
                    end
         
     | 
| 
      
 20 
     | 
    
         
            +
                  }
         
     | 
| 
      
 21 
     | 
    
         
            +
                end
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
              end
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
              COMBINING_CLASS_MAP = Impl.read_combining_class_map # :nodoc:
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
              # Get the combining class of the given character as an integer in
         
     | 
| 
      
 28 
     | 
    
         
            +
              # the range 0..255.
         
     | 
| 
      
 29 
     | 
    
         
            +
              def combining_class(char)
         
     | 
| 
      
 30 
     | 
    
         
            +
                COMBINING_CLASS_MAP[char.ord] || 0
         
     | 
| 
      
 31 
     | 
    
         
            +
              end
         
     | 
| 
      
 32 
     | 
    
         
            +
              module_function :combining_class
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,164 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require "unicode_utils/cased_char_q"
         
     | 
| 
      
 4 
     | 
    
         
            +
            require "unicode_utils/case_ignorable_char_q"
         
     | 
| 
      
 5 
     | 
    
         
            +
            require "unicode_utils/soft_dotted_char_q"
         
     | 
| 
      
 6 
     | 
    
         
            +
            require "unicode_utils/combining_class"
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            module UnicodeUtils
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
              module Impl # :nodoc:
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
                class ConditionalCasing # :nodoc:
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
                  attr_reader :mapping
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                  def initialize(mapping)
         
     | 
| 
      
 17 
     | 
    
         
            +
                    @mapping = mapping
         
     | 
| 
      
 18 
     | 
    
         
            +
                  end
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
                  def context_match?(str, pos)
         
     | 
| 
      
 21 
     | 
    
         
            +
                    true
         
     | 
| 
      
 22 
     | 
    
         
            +
                  end
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
                end
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
                class BeforeDotConditionalCasing < ConditionalCasing # :nodoc:
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                  def context_match?(str, pos)
         
     | 
| 
      
 29 
     | 
    
         
            +
                    (pos + 1).upto(str.length - 1) { |i|
         
     | 
| 
      
 30 
     | 
    
         
            +
                      c = str[i]
         
     | 
| 
      
 31 
     | 
    
         
            +
                      return true if c.ord == 0x0307
         
     | 
| 
      
 32 
     | 
    
         
            +
                      cc = UnicodeUtils.combining_class(c)
         
     | 
| 
      
 33 
     | 
    
         
            +
                      return false if cc == 0 || cc == 230
         
     | 
| 
      
 34 
     | 
    
         
            +
                    }
         
     | 
| 
      
 35 
     | 
    
         
            +
                    false # "combining dot above" not found
         
     | 
| 
      
 36 
     | 
    
         
            +
                  end
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
                end
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
                class NotBeforeDotConditionalCasing < BeforeDotConditionalCasing # :nodoc:
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
                  def context_match?(str, pos)
         
     | 
| 
      
 43 
     | 
    
         
            +
                    !super
         
     | 
| 
      
 44 
     | 
    
         
            +
                  end
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
                end
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
                class MoreAboveConditionalCasing < ConditionalCasing # :nodoc:
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
                  def context_match?(str, pos)
         
     | 
| 
      
 51 
     | 
    
         
            +
                    (pos + 1).upto(str.length - 1) { |i|
         
     | 
| 
      
 52 
     | 
    
         
            +
                      c = str[i]
         
     | 
| 
      
 53 
     | 
    
         
            +
                      cc = UnicodeUtils.combining_class(c)
         
     | 
| 
      
 54 
     | 
    
         
            +
                      return true if cc == 230
         
     | 
| 
      
 55 
     | 
    
         
            +
                      return false if cc == 0
         
     | 
| 
      
 56 
     | 
    
         
            +
                    }
         
     | 
| 
      
 57 
     | 
    
         
            +
                    false
         
     | 
| 
      
 58 
     | 
    
         
            +
                  end
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
                end
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
                class AfterIConditionalCasing < ConditionalCasing # :nodoc:
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
                  def context_match?(str, pos)
         
     | 
| 
      
 65 
     | 
    
         
            +
                    (pos - 1).downto(0) { |i|
         
     | 
| 
      
 66 
     | 
    
         
            +
                      c = str[i]
         
     | 
| 
      
 67 
     | 
    
         
            +
                      return true if c.ord == 0x49 # uppercase I
         
     | 
| 
      
 68 
     | 
    
         
            +
                      cc = UnicodeUtils.combining_class(c)
         
     | 
| 
      
 69 
     | 
    
         
            +
                      return false if cc == 0 || cc == 230
         
     | 
| 
      
 70 
     | 
    
         
            +
                    }
         
     | 
| 
      
 71 
     | 
    
         
            +
                    false # uppercase I not found
         
     | 
| 
      
 72 
     | 
    
         
            +
                  end
         
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
      
 74 
     | 
    
         
            +
                end
         
     | 
| 
      
 75 
     | 
    
         
            +
             
     | 
| 
      
 76 
     | 
    
         
            +
                class AfterSoftDottedConditionalCasing < ConditionalCasing # :nodoc:
         
     | 
| 
      
 77 
     | 
    
         
            +
             
     | 
| 
      
 78 
     | 
    
         
            +
                  def context_match?(str, pos)
         
     | 
| 
      
 79 
     | 
    
         
            +
                    (pos - 1).downto(0) { |i|
         
     | 
| 
      
 80 
     | 
    
         
            +
                      c = str[i]
         
     | 
| 
      
 81 
     | 
    
         
            +
                      return true if UnicodeUtils.soft_dotted_char?(c)
         
     | 
| 
      
 82 
     | 
    
         
            +
                      cc = UnicodeUtils.combining_class(c)
         
     | 
| 
      
 83 
     | 
    
         
            +
                      return false if cc == 0 || cc == 230
         
     | 
| 
      
 84 
     | 
    
         
            +
                    }
         
     | 
| 
      
 85 
     | 
    
         
            +
                    false
         
     | 
| 
      
 86 
     | 
    
         
            +
                  end
         
     | 
| 
      
 87 
     | 
    
         
            +
             
     | 
| 
      
 88 
     | 
    
         
            +
                end
         
     | 
| 
      
 89 
     | 
    
         
            +
             
     | 
| 
      
 90 
     | 
    
         
            +
                class FinalSigmaConditionalCasing < ConditionalCasing # :nodoc:
         
     | 
| 
      
 91 
     | 
    
         
            +
             
     | 
| 
      
 92 
     | 
    
         
            +
                  def context_match?(str, pos)
         
     | 
| 
      
 93 
     | 
    
         
            +
                    before_match?(str, pos) && !after_match?(str, pos)
         
     | 
| 
      
 94 
     | 
    
         
            +
                  end
         
     | 
| 
      
 95 
     | 
    
         
            +
             
     | 
| 
      
 96 
     | 
    
         
            +
                  private
         
     | 
| 
      
 97 
     | 
    
         
            +
             
     | 
| 
      
 98 
     | 
    
         
            +
                  def before_match?(str, pos)
         
     | 
| 
      
 99 
     | 
    
         
            +
                    (pos - 1).downto(0) { |i|
         
     | 
| 
      
 100 
     | 
    
         
            +
                      c = str[i]
         
     | 
| 
      
 101 
     | 
    
         
            +
                      return true if UnicodeUtils.cased_char?(c)
         
     | 
| 
      
 102 
     | 
    
         
            +
                      return false unless UnicodeUtils.case_ignorable_char?(c)
         
     | 
| 
      
 103 
     | 
    
         
            +
                    }
         
     | 
| 
      
 104 
     | 
    
         
            +
                    false # no cased char
         
     | 
| 
      
 105 
     | 
    
         
            +
                  end
         
     | 
| 
      
 106 
     | 
    
         
            +
             
     | 
| 
      
 107 
     | 
    
         
            +
                  def after_match?(str, pos)
         
     | 
| 
      
 108 
     | 
    
         
            +
                    (pos + 1).upto(str.length - 1) { |i|
         
     | 
| 
      
 109 
     | 
    
         
            +
                      c = str[i]
         
     | 
| 
      
 110 
     | 
    
         
            +
                      return true if UnicodeUtils.cased_char?(c)
         
     | 
| 
      
 111 
     | 
    
         
            +
                      return false unless UnicodeUtils.case_ignorable_char?(c)
         
     | 
| 
      
 112 
     | 
    
         
            +
                    }
         
     | 
| 
      
 113 
     | 
    
         
            +
                    false
         
     | 
| 
      
 114 
     | 
    
         
            +
                  end
         
     | 
| 
      
 115 
     | 
    
         
            +
             
     | 
| 
      
 116 
     | 
    
         
            +
                end
         
     | 
| 
      
 117 
     | 
    
         
            +
             
     | 
| 
      
 118 
     | 
    
         
            +
                def self.read_conditional_casings(filename)
         
     | 
| 
      
 119 
     | 
    
         
            +
                  path = File.join(File.dirname(__FILE__), "..", "..", "cdata", filename)
         
     | 
| 
      
 120 
     | 
    
         
            +
                  Hash.new.tap { |cp_map|
         
     | 
| 
      
 121 
     | 
    
         
            +
                    File.open(path, "r:US-ASCII:-") do |input|
         
     | 
| 
      
 122 
     | 
    
         
            +
                      input.each_line { |line|
         
     | 
| 
      
 123 
     | 
    
         
            +
                        line.chomp!
         
     | 
| 
      
 124 
     | 
    
         
            +
                        record = line.split(";")
         
     | 
| 
      
 125 
     | 
    
         
            +
                        cp = record[0].to_i(16)
         
     | 
| 
      
 126 
     | 
    
         
            +
                        mapping = record[1].split(",").map { |c| c.to_i(16) }
         
     | 
| 
      
 127 
     | 
    
         
            +
                        language_id = record[2].empty? ? nil : record[2].to_sym
         
     | 
| 
      
 128 
     | 
    
         
            +
                        context = record[3] && record[3].gsub('_', '')
         
     | 
| 
      
 129 
     | 
    
         
            +
                        casing = Impl.const_get("#{context}ConditionalCasing").new(mapping)
         
     | 
| 
      
 130 
     | 
    
         
            +
                        (cp_map[cp] ||= {})[language_id] = casing
         
     | 
| 
      
 131 
     | 
    
         
            +
                      }
         
     | 
| 
      
 132 
     | 
    
         
            +
                    end
         
     | 
| 
      
 133 
     | 
    
         
            +
                  }
         
     | 
| 
      
 134 
     | 
    
         
            +
                end
         
     | 
| 
      
 135 
     | 
    
         
            +
             
     | 
| 
      
 136 
     | 
    
         
            +
                CONDITIONAL_UPCASE_MAP =
         
     | 
| 
      
 137 
     | 
    
         
            +
                  read_conditional_casings("cond_uc_map")
         
     | 
| 
      
 138 
     | 
    
         
            +
             
     | 
| 
      
 139 
     | 
    
         
            +
                CONDITIONAL_DOWNCASE_MAP =
         
     | 
| 
      
 140 
     | 
    
         
            +
                  read_conditional_casings("cond_lc_map")
         
     | 
| 
      
 141 
     | 
    
         
            +
             
     | 
| 
      
 142 
     | 
    
         
            +
                def self.conditional_upcase_mapping(cp, str, pos, language_id)
         
     | 
| 
      
 143 
     | 
    
         
            +
                  lang_map = CONDITIONAL_UPCASE_MAP[cp]
         
     | 
| 
      
 144 
     | 
    
         
            +
                  if lang_map
         
     | 
| 
      
 145 
     | 
    
         
            +
                    casing = lang_map[language_id] || lang_map[nil]
         
     | 
| 
      
 146 
     | 
    
         
            +
                    if casing && casing.context_match?(str, pos)
         
     | 
| 
      
 147 
     | 
    
         
            +
                      casing.mapping
         
     | 
| 
      
 148 
     | 
    
         
            +
                    end
         
     | 
| 
      
 149 
     | 
    
         
            +
                  end
         
     | 
| 
      
 150 
     | 
    
         
            +
                end
         
     | 
| 
      
 151 
     | 
    
         
            +
             
     | 
| 
      
 152 
     | 
    
         
            +
                def self.conditional_downcase_mapping(cp, str, pos, language_id)
         
     | 
| 
      
 153 
     | 
    
         
            +
                  lang_map = CONDITIONAL_DOWNCASE_MAP[cp]
         
     | 
| 
      
 154 
     | 
    
         
            +
                  if lang_map
         
     | 
| 
      
 155 
     | 
    
         
            +
                    casing = lang_map[language_id] || lang_map[nil]
         
     | 
| 
      
 156 
     | 
    
         
            +
                    if casing && casing.context_match?(str, pos)
         
     | 
| 
      
 157 
     | 
    
         
            +
                      casing.mapping
         
     | 
| 
      
 158 
     | 
    
         
            +
                    end
         
     | 
| 
      
 159 
     | 
    
         
            +
                  end
         
     | 
| 
      
 160 
     | 
    
         
            +
                end
         
     | 
| 
      
 161 
     | 
    
         
            +
             
     | 
| 
      
 162 
     | 
    
         
            +
              end
         
     | 
| 
      
 163 
     | 
    
         
            +
             
     | 
| 
      
 164 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -1,34 +1,40 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            # encoding: utf-8
         
     | 
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
3 
     | 
    
         
             
            require "unicode_utils/simple_downcase"
         
     | 
| 
       4 
4 
     | 
    
         
             
            require "unicode_utils/read_special_casing_map"
         
     | 
| 
      
 5 
     | 
    
         
            +
            require "unicode_utils/conditional_casing"
         
     | 
| 
       5 
6 
     | 
    
         | 
| 
       6 
7 
     | 
    
         
             
            module UnicodeUtils
         
     | 
| 
       7 
8 
     | 
    
         | 
| 
       8 
     | 
    
         
            -
              SPECIAL_DOWNCASE_MAP = Impl.read_special_casing_map("special_lc_map")
         
     | 
| 
      
 9 
     | 
    
         
            +
              SPECIAL_DOWNCASE_MAP = Impl.read_special_casing_map("special_lc_map") # :nodoc:
         
     | 
| 
       9 
10 
     | 
    
         | 
| 
       10 
11 
     | 
    
         
             
              # Perform a full case-conversion of +str+ to lowercase according to
         
     | 
| 
       11 
12 
     | 
    
         
             
              # the Unicode standard.
         
     | 
| 
       12 
13 
     | 
    
         
             
              #
         
     | 
| 
       13 
     | 
    
         
            -
              #  
     | 
| 
      
 14 
     | 
    
         
            +
              # Some conversion rules are language dependent, these are in effect
         
     | 
| 
      
 15 
     | 
    
         
            +
              # when a non-nil +language_id+ is given. If non-nil, the
         
     | 
| 
      
 16 
     | 
    
         
            +
              # +language_id+ must be a two letter language code as defined in BCP
         
     | 
| 
      
 17 
     | 
    
         
            +
              # 47 (http://tools.ietf.org/rfc/bcp/bcp47.txt) as a symbol. If a
         
     | 
| 
      
 18 
     | 
    
         
            +
              # language doesn't have a two letter code, the three letter code is
         
     | 
| 
      
 19 
     | 
    
         
            +
              # to be used.
         
     | 
| 
       14 
20 
     | 
    
         
             
              #
         
     | 
| 
       15 
     | 
    
         
            -
              # 
     | 
| 
      
 21 
     | 
    
         
            +
              # Examples:
         
     | 
| 
       16 
22 
     | 
    
         
             
              #
         
     | 
| 
       17 
     | 
    
         
            -
              #  
     | 
| 
       18 
     | 
    
         
            -
              #  
     | 
| 
       19 
     | 
    
         
            -
              # cases. This affects text in the languages Lithuanian, Turkish and
         
     | 
| 
       20 
     | 
    
         
            -
              # Azeri and the greek letter sigma in a special position. A future
         
     | 
| 
       21 
     | 
    
         
            -
              # version of UnicodeUtils will fix this. All other languages are
         
     | 
| 
       22 
     | 
    
         
            -
              # fully supported according to the Unicode standard.
         
     | 
| 
      
 23 
     | 
    
         
            +
              #     UnicodeUtils.downcase("ᾈ") => "ᾀ"
         
     | 
| 
      
 24 
     | 
    
         
            +
              #     UnicodeUtils.downcase("aBI\u{307}", :tr) => "abi"
         
     | 
| 
       23 
25 
     | 
    
         
             
              def downcase(str, language_id = nil)
         
     | 
| 
       24 
26 
     | 
    
         
             
                String.new.force_encoding(str.encoding).tap { |res|
         
     | 
| 
      
 27 
     | 
    
         
            +
                  pos = 0
         
     | 
| 
       25 
28 
     | 
    
         
             
                  str.each_codepoint { |cp|
         
     | 
| 
       26 
     | 
    
         
            -
                    special_mapping = 
     | 
| 
      
 29 
     | 
    
         
            +
                    special_mapping =
         
     | 
| 
      
 30 
     | 
    
         
            +
                      Impl.conditional_downcase_mapping(cp, str, pos, language_id) ||
         
     | 
| 
      
 31 
     | 
    
         
            +
                      SPECIAL_DOWNCASE_MAP[cp]
         
     | 
| 
       27 
32 
     | 
    
         
             
                    if special_mapping
         
     | 
| 
       28 
33 
     | 
    
         
             
                      special_mapping.each { |m| res << m }
         
     | 
| 
       29 
34 
     | 
    
         
             
                    else
         
     | 
| 
       30 
35 
     | 
    
         
             
                      res << (SIMPLE_DOWNCASE_MAP[cp] || cp)
         
     | 
| 
       31 
36 
     | 
    
         
             
                    end
         
     | 
| 
      
 37 
     | 
    
         
            +
                    pos += 1
         
     | 
| 
       32 
38 
     | 
    
         
             
                  }
         
     | 
| 
       33 
39 
     | 
    
         
             
                }
         
     | 
| 
       34 
40 
     | 
    
         
             
              end
         
     | 
| 
         @@ -0,0 +1,15 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require "unicode_utils/read_codepoint_set"
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            module UnicodeUtils
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
              PROP_LOWERCASE_SET = Impl.read_codepoint_set("prop_set_lowercase") # :nodoc:
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
              # True if the given character has the Unicode property Lowercase.
         
     | 
| 
      
 10 
     | 
    
         
            +
              def lowercase_char?(char)
         
     | 
| 
      
 11 
     | 
    
         
            +
                PROP_LOWERCASE_SET.include?(char.ord)
         
     | 
| 
      
 12 
     | 
    
         
            +
              end
         
     | 
| 
      
 13 
     | 
    
         
            +
              module_function :lowercase_char?
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/unicode_utils/name.rb
    CHANGED
    
    | 
         @@ -1,4 +1,4 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            # encoding: utf-8
         
     | 
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
3 
     | 
    
         
             
            module UnicodeUtils
         
     | 
| 
       4 
4 
     | 
    
         | 
| 
         @@ -7,8 +7,9 @@ module UnicodeUtils 
     | 
|
| 
       7 
7 
     | 
    
         
             
                def self.read_names
         
     | 
| 
       8 
8 
     | 
    
         
             
                  path = File.join(File.dirname(__FILE__), "..", "..", "cdata", "names")
         
     | 
| 
       9 
9 
     | 
    
         
             
                  Hash.new.tap { |map|
         
     | 
| 
       10 
     | 
    
         
            -
                    File.open(path, "r:US-ASCII") do |input|
         
     | 
| 
      
 10 
     | 
    
         
            +
                    File.open(path, "r:US-ASCII:-") do |input|
         
     | 
| 
       11 
11 
     | 
    
         
             
                      buffer = "x" * 6
         
     | 
| 
      
 12 
     | 
    
         
            +
                      buffer.force_encoding(Encoding::US_ASCII)
         
     | 
| 
       12 
13 
     | 
    
         
             
                      while input.read(6, buffer)
         
     | 
| 
       13 
14 
     | 
    
         
             
                        map[buffer.to_i(16)] = input.gets.tap { |x| x.chomp! }
         
     | 
| 
       14 
15 
     | 
    
         
             
                      end
         
     | 
| 
         @@ -18,7 +19,7 @@ module UnicodeUtils 
     | 
|
| 
       18 
19 
     | 
    
         | 
| 
       19 
20 
     | 
    
         
             
              end
         
     | 
| 
       20 
21 
     | 
    
         | 
| 
       21 
     | 
    
         
            -
              NAME_MAP = Impl.read_names
         
     | 
| 
      
 22 
     | 
    
         
            +
              NAME_MAP = Impl.read_names # :nodoc:
         
     | 
| 
       22 
23 
     | 
    
         | 
| 
       23 
24 
     | 
    
         
             
              # Get the Unicode name of the single codepoint in str.
         
     | 
| 
       24 
25 
     | 
    
         
             
              #
         
     | 
| 
         @@ -26,7 +27,7 @@ module UnicodeUtils 
     | 
|
| 
       26 
27 
     | 
    
         
             
              #
         
     | 
| 
       27 
28 
     | 
    
         
             
              #     UnicodeUtils.name "ᾀ" => "GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI"
         
     | 
| 
       28 
29 
     | 
    
         
             
              def name(str)
         
     | 
| 
       29 
     | 
    
         
            -
                NAME_MAP[str.codepoints.first] 
     | 
| 
      
 30 
     | 
    
         
            +
                NAME_MAP[str.codepoints.first]
         
     | 
| 
       30 
31 
     | 
    
         
             
              end
         
     | 
| 
       31 
32 
     | 
    
         
             
              module_function :name
         
     | 
| 
       32 
33 
     | 
    
         | 
| 
         @@ -1,4 +1,4 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            # encoding: utf-8
         
     | 
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
3 
     | 
    
         
             
            module UnicodeUtils
         
     | 
| 
       4 
4 
     | 
    
         | 
| 
         @@ -7,8 +7,9 @@ module UnicodeUtils 
     | 
|
| 
       7 
7 
     | 
    
         
             
                def self.read_codepoint_map(filename)
         
     | 
| 
       8 
8 
     | 
    
         
             
                  path = File.join(File.dirname(__FILE__), "..", "..", "cdata", filename)
         
     | 
| 
       9 
9 
     | 
    
         
             
                  Hash.new.tap { |map|
         
     | 
| 
       10 
     | 
    
         
            -
                    File.open(path, "r:US-ASCII") do |input|
         
     | 
| 
      
 10 
     | 
    
         
            +
                    File.open(path, "r:US-ASCII:-") do |input|
         
     | 
| 
       11 
11 
     | 
    
         
             
                      buffer = "x" * 6
         
     | 
| 
      
 12 
     | 
    
         
            +
                      buffer.force_encoding(Encoding::US_ASCII)
         
     | 
| 
       12 
13 
     | 
    
         
             
                      while input.read(6, buffer)
         
     | 
| 
       13 
14 
     | 
    
         
             
                        map[buffer.to_i(16)] = input.read(6, buffer).to_i(16)
         
     | 
| 
       14 
15 
     | 
    
         
             
                      end
         
     | 
| 
         @@ -0,0 +1,22 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module UnicodeUtils
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
              module Impl # :nodoc:
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
                def self.read_codepoint_set(filename)
         
     | 
| 
      
 8 
     | 
    
         
            +
                  path = File.join(File.dirname(__FILE__), "..", "..", "cdata", filename)
         
     | 
| 
      
 9 
     | 
    
         
            +
                  Hash.new.tap { |set|
         
     | 
| 
      
 10 
     | 
    
         
            +
                    File.open(path, "r:US-ASCII:-") do |input|
         
     | 
| 
      
 11 
     | 
    
         
            +
                      buffer = "x" * 6
         
     | 
| 
      
 12 
     | 
    
         
            +
                      buffer.force_encoding(Encoding::US_ASCII)
         
     | 
| 
      
 13 
     | 
    
         
            +
                      while input.read(6, buffer)
         
     | 
| 
      
 14 
     | 
    
         
            +
                        set[buffer.to_i(16)] = true
         
     | 
| 
      
 15 
     | 
    
         
            +
                      end
         
     | 
| 
      
 16 
     | 
    
         
            +
                    end
         
     | 
| 
      
 17 
     | 
    
         
            +
                  }
         
     | 
| 
      
 18 
     | 
    
         
            +
                end
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
              end
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -1,4 +1,4 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            # encoding: utf-8
         
     | 
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
3 
     | 
    
         
             
            module UnicodeUtils
         
     | 
| 
       4 
4 
     | 
    
         | 
| 
         @@ -7,8 +7,9 @@ module UnicodeUtils 
     | 
|
| 
       7 
7 
     | 
    
         
             
                def self.read_special_casing_map(filename)
         
     | 
| 
       8 
8 
     | 
    
         
             
                  path = File.join(File.dirname(__FILE__), "..", "..", "cdata", filename)
         
     | 
| 
       9 
9 
     | 
    
         
             
                  Hash.new.tap { |map|
         
     | 
| 
       10 
     | 
    
         
            -
                    File.open(path, "r:US-ASCII") do |input|
         
     | 
| 
      
 10 
     | 
    
         
            +
                    File.open(path, "r:US-ASCII:-") do |input|
         
     | 
| 
       11 
11 
     | 
    
         
             
                      buffer = "x" * 6
         
     | 
| 
      
 12 
     | 
    
         
            +
                      buffer.force_encoding(Encoding::US_ASCII)
         
     | 
| 
       12 
13 
     | 
    
         
             
                      while input.read(6, buffer)
         
     | 
| 
       13 
14 
     | 
    
         
             
                        cp = buffer.to_i(16)
         
     | 
| 
       14 
15 
     | 
    
         
             
                        mapping = []
         
     | 
| 
         @@ -1,10 +1,10 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            # encoding: utf-8
         
     | 
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
3 
     | 
    
         
             
            require "unicode_utils/read_codepoint_map"
         
     | 
| 
       4 
4 
     | 
    
         | 
| 
       5 
5 
     | 
    
         
             
            module UnicodeUtils
         
     | 
| 
       6 
6 
     | 
    
         | 
| 
       7 
     | 
    
         
            -
              SIMPLE_DOWNCASE_MAP = Impl.read_codepoint_map("simple_lc_map")
         
     | 
| 
      
 7 
     | 
    
         
            +
              SIMPLE_DOWNCASE_MAP = Impl.read_codepoint_map("simple_lc_map") # :nodoc:
         
     | 
| 
       8 
8 
     | 
    
         | 
| 
       9 
9 
     | 
    
         
             
              # Map each codepoint in +str+ that has a single codepoint
         
     | 
| 
       10 
10 
     | 
    
         
             
              # lowercase-mapping to that lowercase mapping. +str+ is assumed to be
         
     | 
| 
         @@ -1,10 +1,10 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            # encoding: utf-8
         
     | 
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
3 
     | 
    
         
             
            require "unicode_utils/read_codepoint_map"
         
     | 
| 
       4 
4 
     | 
    
         | 
| 
       5 
5 
     | 
    
         
             
            module UnicodeUtils
         
     | 
| 
       6 
6 
     | 
    
         | 
| 
       7 
     | 
    
         
            -
              SIMPLE_UPCASE_MAP = Impl.read_codepoint_map("simple_uc_map")
         
     | 
| 
      
 7 
     | 
    
         
            +
              SIMPLE_UPCASE_MAP = Impl.read_codepoint_map("simple_uc_map") # :nodoc:
         
     | 
| 
       8 
8 
     | 
    
         | 
| 
       9 
9 
     | 
    
         
             
              # Map each codepoint in +str+ that has a single codepoint
         
     | 
| 
       10 
10 
     | 
    
         
             
              # uppercase-mapping to that uppercase mapping. +str+ is assumed to be
         
     | 
| 
         @@ -0,0 +1,16 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require "unicode_utils/read_codepoint_set"
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            module UnicodeUtils
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
              SOFT_DOTTED_SET = Impl.read_codepoint_set("soft_dotted_set") # :nodoc:
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
              # Returns true if the given character has the Unicode property
         
     | 
| 
      
 10 
     | 
    
         
            +
              # Soft_Dotted.
         
     | 
| 
      
 11 
     | 
    
         
            +
              def soft_dotted_char?(char)
         
     | 
| 
      
 12 
     | 
    
         
            +
                SOFT_DOTTED_SET.include?(char.ord)
         
     | 
| 
      
 13 
     | 
    
         
            +
              end
         
     | 
| 
      
 14 
     | 
    
         
            +
              module_function :soft_dotted_char?
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,16 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require "unicode_utils/read_codepoint_set"
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            module UnicodeUtils
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
              TITLECASE_LETTER_SET = Impl.read_codepoint_set("cat_set_titlecase") # :nodoc:
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
              # True if the given character has the General_Category
         
     | 
| 
      
 10 
     | 
    
         
            +
              # Titlecase_Letter (Lt).
         
     | 
| 
      
 11 
     | 
    
         
            +
              def titlecase_char?(char)
         
     | 
| 
      
 12 
     | 
    
         
            +
                TITLECASE_LETTER_SET.include?(char.ord)
         
     | 
| 
      
 13 
     | 
    
         
            +
              end
         
     | 
| 
      
 14 
     | 
    
         
            +
              module_function :titlecase_char?
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/unicode_utils/upcase.rb
    CHANGED
    
    | 
         @@ -1,33 +1,41 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            # encoding: utf-8
         
     | 
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
3 
     | 
    
         
             
            require "unicode_utils/simple_upcase"
         
     | 
| 
       4 
4 
     | 
    
         
             
            require "unicode_utils/read_special_casing_map"
         
     | 
| 
      
 5 
     | 
    
         
            +
            require "unicode_utils/conditional_casing"
         
     | 
| 
       5 
6 
     | 
    
         | 
| 
       6 
7 
     | 
    
         
             
            module UnicodeUtils
         
     | 
| 
       7 
8 
     | 
    
         | 
| 
       8 
     | 
    
         
            -
              SPECIAL_UPCASE_MAP = Impl.read_special_casing_map("special_uc_map")
         
     | 
| 
      
 9 
     | 
    
         
            +
              SPECIAL_UPCASE_MAP = Impl.read_special_casing_map("special_uc_map") # :nodoc:
         
     | 
| 
       9 
10 
     | 
    
         | 
| 
       10 
11 
     | 
    
         
             
              # Perform a full case-conversion of +str+ to uppercase according to
         
     | 
| 
       11 
12 
     | 
    
         
             
              # the Unicode standard.
         
     | 
| 
       12 
13 
     | 
    
         
             
              #
         
     | 
| 
       13 
     | 
    
         
            -
              #  
     | 
| 
      
 14 
     | 
    
         
            +
              # Some conversion rules are language dependent, these are in effect
         
     | 
| 
      
 15 
     | 
    
         
            +
              # when a non-nil +language_id+ is given. If non-nil, the
         
     | 
| 
      
 16 
     | 
    
         
            +
              # +language_id+ must be a two letter language code as defined in BCP
         
     | 
| 
      
 17 
     | 
    
         
            +
              # 47 (http://tools.ietf.org/rfc/bcp/bcp47.txt) as a symbol. If a
         
     | 
| 
      
 18 
     | 
    
         
            +
              # language doesn't have a two letter code, the three letter code is
         
     | 
| 
      
 19 
     | 
    
         
            +
              # to be used.
         
     | 
| 
       14 
20 
     | 
    
         
             
              #
         
     | 
| 
       15 
     | 
    
         
            -
              # 
     | 
| 
      
 21 
     | 
    
         
            +
              # Examples:
         
     | 
| 
       16 
22 
     | 
    
         
             
              #
         
     | 
| 
       17 
     | 
    
         
            -
              #  
     | 
| 
       18 
     | 
    
         
            -
              #  
     | 
| 
       19 
     | 
    
         
            -
              # 
     | 
| 
       20 
     | 
    
         
            -
              # Azeri. A future version of UnicodeUtils will fix this. All other
         
     | 
| 
       21 
     | 
    
         
            -
              # languages are fully supported according to the Unicode standard.
         
     | 
| 
      
 23 
     | 
    
         
            +
              #     UnicodeUtils.upcase("weiß") => "WEISS"
         
     | 
| 
      
 24 
     | 
    
         
            +
              #     UnicodeUtils.upcase("i", :en) => "I"
         
     | 
| 
      
 25 
     | 
    
         
            +
              #     UnicodeUtils.upcase("i", :tr) => "İ"
         
     | 
| 
       22 
26 
     | 
    
         
             
              def upcase(str, language_id = nil)
         
     | 
| 
       23 
27 
     | 
    
         
             
                String.new.force_encoding(str.encoding).tap { |res|
         
     | 
| 
      
 28 
     | 
    
         
            +
                  pos = 0
         
     | 
| 
       24 
29 
     | 
    
         
             
                  str.each_codepoint { |cp|
         
     | 
| 
       25 
     | 
    
         
            -
                    special_mapping = 
     | 
| 
      
 30 
     | 
    
         
            +
                    special_mapping =
         
     | 
| 
      
 31 
     | 
    
         
            +
                      Impl.conditional_upcase_mapping(cp, str, pos, language_id) ||
         
     | 
| 
      
 32 
     | 
    
         
            +
                      SPECIAL_UPCASE_MAP[cp]
         
     | 
| 
       26 
33 
     | 
    
         
             
                    if special_mapping
         
     | 
| 
       27 
34 
     | 
    
         
             
                      special_mapping.each { |m| res << m }
         
     | 
| 
       28 
35 
     | 
    
         
             
                    else
         
     | 
| 
       29 
36 
     | 
    
         
             
                      res << (SIMPLE_UPCASE_MAP[cp] || cp)
         
     | 
| 
       30 
37 
     | 
    
         
             
                    end
         
     | 
| 
      
 38 
     | 
    
         
            +
                    pos += 1
         
     | 
| 
       31 
39 
     | 
    
         
             
                  }
         
     | 
| 
       32 
40 
     | 
    
         
             
                }
         
     | 
| 
       33 
41 
     | 
    
         
             
              end
         
     | 
| 
         @@ -0,0 +1,15 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require "unicode_utils/read_codepoint_set"
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            module UnicodeUtils
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
              PROP_UPPERCASE_SET = Impl.read_codepoint_set("prop_set_uppercase") # :nodoc:
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
              # True if the given character has the Unicode property Uppercase.
         
     | 
| 
      
 10 
     | 
    
         
            +
              def uppercase_char?(char)
         
     | 
| 
      
 11 
     | 
    
         
            +
                PROP_UPPERCASE_SET.include?(char.ord)
         
     | 
| 
      
 12 
     | 
    
         
            +
              end
         
     | 
| 
      
 13 
     | 
    
         
            +
              module_function :uppercase_char?
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
            end
         
     | 
    
        data/test/test_unicode_utils.rb
    CHANGED
    
    | 
         @@ -8,6 +8,7 @@ class TestUnicodeUtils < Test::Unit::TestCase 
     | 
|
| 
       8 
8 
     | 
    
         | 
| 
       9 
9 
     | 
    
         
             
              def test_name
         
     | 
| 
       10 
10 
     | 
    
         
             
                assert_equal "LATIN SMALL LETTER F", UnicodeUtils.name("f")
         
     | 
| 
      
 11 
     | 
    
         
            +
                assert_equal Encoding::US_ASCII, UnicodeUtils.name("f").encoding
         
     | 
| 
       11 
12 
     | 
    
         
             
              end
         
     | 
| 
       12 
13 
     | 
    
         | 
| 
       13 
14 
     | 
    
         
             
              def test_simple_upcase
         
     | 
| 
         @@ -22,11 +23,85 @@ class TestUnicodeUtils < Test::Unit::TestCase 
     | 
|
| 
       22 
23 
     | 
    
         | 
| 
       23 
24 
     | 
    
         
             
              def test_upcase
         
     | 
| 
       24 
25 
     | 
    
         
             
                assert_equal "WEISS 123", UnicodeUtils.upcase("Weiß 123")
         
     | 
| 
      
 26 
     | 
    
         
            +
                assert_equal "WEISS 123", UnicodeUtils.upcase("Weiß 123", :de)
         
     | 
| 
      
 27 
     | 
    
         
            +
                assert_equal "I", UnicodeUtils.upcase("i")
         
     | 
| 
      
 28 
     | 
    
         
            +
                assert_equal "I", UnicodeUtils.upcase("i", :de)
         
     | 
| 
      
 29 
     | 
    
         
            +
                assert_equal "\u{130}", UnicodeUtils.upcase("i", :tr)
         
     | 
| 
      
 30 
     | 
    
         
            +
                assert_equal "\u{130}", UnicodeUtils.upcase("i", :az)
         
     | 
| 
      
 31 
     | 
    
         
            +
                assert_equal "ABI\u{3a3}SS\u{3a3}/FFI\u{5ffff}\u{10405}",
         
     | 
| 
      
 32 
     | 
    
         
            +
                  UnicodeUtils.upcase("aBi\u{3c3}\u{df}\u{3c2}/\u{fb03}\u{5ffff}\u{1042d}")
         
     | 
| 
      
 33 
     | 
    
         
            +
                assert_equal "AB\u{130}\u{3a3}SS\u{3a3}/FFI\u{5ffff}\u{10405}",
         
     | 
| 
      
 34 
     | 
    
         
            +
                  UnicodeUtils.upcase("aBi\u{3c3}\u{df}\u{3c2}/\u{fb03}\u{5ffff}\u{1042d}", :az)
         
     | 
| 
      
 35 
     | 
    
         
            +
                assert_equal "I\u{307}", UnicodeUtils.upcase("i\u{307}")
         
     | 
| 
      
 36 
     | 
    
         
            +
                assert_equal "I", UnicodeUtils.upcase("i\u{307}", :lt)
         
     | 
| 
       25 
37 
     | 
    
         
             
              end
         
     | 
| 
       26 
38 
     | 
    
         | 
| 
       27 
39 
     | 
    
         
             
              def test_downcase
         
     | 
| 
       28 
40 
     | 
    
         
             
                # LATIN CAPITAL LETTER I WITH DOT ABOVE
         
     | 
| 
       29 
41 
     | 
    
         
             
                assert_equal "\u0069\u0307", UnicodeUtils.downcase("\u0130")
         
     | 
| 
      
 42 
     | 
    
         
            +
                assert_equal "\u0069\u0307", UnicodeUtils.downcase("\u0130", :de)
         
     | 
| 
      
 43 
     | 
    
         
            +
                assert_equal "\u0069", UnicodeUtils.downcase("\u0130", :tr)
         
     | 
| 
      
 44 
     | 
    
         
            +
                assert_equal "\u0069", UnicodeUtils.downcase("\u0130", :az)
         
     | 
| 
      
 45 
     | 
    
         
            +
                assert_equal "ab\u{131}\u{3c3}\u{df}\u{3c2}/\u{5ffff}\u{1042d}",
         
     | 
| 
      
 46 
     | 
    
         
            +
                  UnicodeUtils.downcase("aBI\u{3a3}\u{df}\u{3a3}/\u{5ffff}\u{10405}", :tr)
         
     | 
| 
      
 47 
     | 
    
         
            +
                # tests After_I and Not_Before_Dot
         
     | 
| 
      
 48 
     | 
    
         
            +
                assert_equal "abi", UnicodeUtils.downcase("aBI\u{307}", :tr)
         
     | 
| 
      
 49 
     | 
    
         
            +
                assert_equal "ia\u{300}", UnicodeUtils.downcase("Ia\u{300}", :lt)
         
     | 
| 
      
 50 
     | 
    
         
            +
                # this is probably unrealistic, because I don't understand a word Lithuanian
         
     | 
| 
      
 51 
     | 
    
         
            +
                assert_equal "i\u{307}\u{300}", UnicodeUtils.downcase("I\u{300}", :lt)
         
     | 
| 
      
 52 
     | 
    
         
            +
              end
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
              def test_downcase_final_sigma
         
     | 
| 
      
 55 
     | 
    
         
            +
                assert_equal "abi\u{3c3}\u{df}\u{3c2}/\u{5ffff}\u{1042d}",
         
     | 
| 
      
 56 
     | 
    
         
            +
                  UnicodeUtils.downcase("aBI\u{3a3}\u{df}\u{3a3}/\u{5ffff}\u{10405}")
         
     | 
| 
      
 57 
     | 
    
         
            +
              end
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
              def test_titlecase?
         
     | 
| 
      
 60 
     | 
    
         
            +
                assert_equal true, UnicodeUtils.titlecase_char?("\u{01F2}")
         
     | 
| 
      
 61 
     | 
    
         
            +
                assert_equal false, UnicodeUtils.titlecase_char?("\u{0041}")
         
     | 
| 
      
 62 
     | 
    
         
            +
              end
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
              def test_lowercase_char?
         
     | 
| 
      
 65 
     | 
    
         
            +
                assert_equal true, UnicodeUtils.lowercase_char?("c")
         
     | 
| 
      
 66 
     | 
    
         
            +
                assert_equal true, UnicodeUtils.lowercase_char?("ö")
         
     | 
| 
      
 67 
     | 
    
         
            +
                assert_equal false, UnicodeUtils.lowercase_char?("C")
         
     | 
| 
      
 68 
     | 
    
         
            +
                assert_equal false, UnicodeUtils.lowercase_char?("2")
         
     | 
| 
      
 69 
     | 
    
         
            +
              end
         
     | 
| 
      
 70 
     | 
    
         
            +
             
     | 
| 
      
 71 
     | 
    
         
            +
              def test_uppercase_char?
         
     | 
| 
      
 72 
     | 
    
         
            +
                assert_equal true, UnicodeUtils.uppercase_char?("C")
         
     | 
| 
      
 73 
     | 
    
         
            +
                assert_equal true, UnicodeUtils.uppercase_char?("Ö")
         
     | 
| 
      
 74 
     | 
    
         
            +
                assert_equal false, UnicodeUtils.uppercase_char?("2")
         
     | 
| 
      
 75 
     | 
    
         
            +
                assert_equal false, UnicodeUtils.uppercase_char?("c")
         
     | 
| 
      
 76 
     | 
    
         
            +
              end
         
     | 
| 
      
 77 
     | 
    
         
            +
             
     | 
| 
      
 78 
     | 
    
         
            +
              def test_cased_char?
         
     | 
| 
      
 79 
     | 
    
         
            +
                assert_equal true, UnicodeUtils.cased_char?("a")
         
     | 
| 
      
 80 
     | 
    
         
            +
                assert_equal true, UnicodeUtils.cased_char?("Ä")
         
     | 
| 
      
 81 
     | 
    
         
            +
                assert_equal true, UnicodeUtils.cased_char?("ß")
         
     | 
| 
      
 82 
     | 
    
         
            +
                assert_equal false, UnicodeUtils.cased_char?("2")
         
     | 
| 
      
 83 
     | 
    
         
            +
              end
         
     | 
| 
      
 84 
     | 
    
         
            +
             
     | 
| 
      
 85 
     | 
    
         
            +
              def test_case_ignorable_char?
         
     | 
| 
      
 86 
     | 
    
         
            +
                assert_equal true, UnicodeUtils.case_ignorable_char?(":")
         
     | 
| 
      
 87 
     | 
    
         
            +
                assert_equal true, UnicodeUtils.case_ignorable_char?("\u{302}")
         
     | 
| 
      
 88 
     | 
    
         
            +
                assert_equal true, UnicodeUtils.case_ignorable_char?("\u{20dd}")
         
     | 
| 
      
 89 
     | 
    
         
            +
                assert_equal true, UnicodeUtils.case_ignorable_char?("\u{600}")
         
     | 
| 
      
 90 
     | 
    
         
            +
                assert_equal true, UnicodeUtils.case_ignorable_char?("\u{2b0}")
         
     | 
| 
      
 91 
     | 
    
         
            +
                assert_equal true, UnicodeUtils.case_ignorable_char?("\u{2c2}")
         
     | 
| 
      
 92 
     | 
    
         
            +
                assert_equal false, UnicodeUtils.case_ignorable_char?("a")
         
     | 
| 
      
 93 
     | 
    
         
            +
                assert_equal false, UnicodeUtils.case_ignorable_char?("1")
         
     | 
| 
      
 94 
     | 
    
         
            +
              end
         
     | 
| 
      
 95 
     | 
    
         
            +
             
     | 
| 
      
 96 
     | 
    
         
            +
              def test_combining_class
         
     | 
| 
      
 97 
     | 
    
         
            +
                assert_equal 0, UnicodeUtils.combining_class("a")
         
     | 
| 
      
 98 
     | 
    
         
            +
                assert_equal 230, UnicodeUtils.combining_class("\u{1b6e}")
         
     | 
| 
      
 99 
     | 
    
         
            +
              end
         
     | 
| 
      
 100 
     | 
    
         
            +
             
     | 
| 
      
 101 
     | 
    
         
            +
              def test_soft_dotted_char?
         
     | 
| 
      
 102 
     | 
    
         
            +
                assert_equal true, UnicodeUtils.soft_dotted_char?("j")
         
     | 
| 
      
 103 
     | 
    
         
            +
                assert_equal true, UnicodeUtils.soft_dotted_char?("\u{2c7c}")
         
     | 
| 
      
 104 
     | 
    
         
            +
                assert_equal false, UnicodeUtils.soft_dotted_char?("a")
         
     | 
| 
       30 
105 
     | 
    
         
             
              end
         
     | 
| 
       31 
106 
     | 
    
         | 
| 
       32 
107 
     | 
    
         
             
            end
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification 
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: unicode_utils
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version 
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.2.0
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors: 
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Stefan Lang
         
     | 
| 
         @@ -9,7 +9,7 @@ autorequire: 
     | 
|
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
11 
     | 
    
         | 
| 
       12 
     | 
    
         
            -
            date: 2008- 
     | 
| 
      
 12 
     | 
    
         
            +
            date: 2008-11-02 00:00:00 +01:00
         
     | 
| 
       13 
13 
     | 
    
         
             
            default_executable: 
         
     | 
| 
       14 
14 
     | 
    
         
             
            dependencies: []
         
     | 
| 
       15 
15 
     | 
    
         | 
| 
         @@ -23,19 +23,36 @@ extra_rdoc_files: 
     | 
|
| 
       23 
23 
     | 
    
         
             
            - README.txt
         
     | 
| 
       24 
24 
     | 
    
         
             
            files: 
         
     | 
| 
       25 
25 
     | 
    
         
             
            - lib/unicode_utils/read_special_casing_map.rb
         
     | 
| 
      
 26 
     | 
    
         
            +
            - lib/unicode_utils/conditional_casing.rb
         
     | 
| 
       26 
27 
     | 
    
         
             
            - lib/unicode_utils/simple_downcase.rb
         
     | 
| 
       27 
28 
     | 
    
         
             
            - lib/unicode_utils/read_codepoint_map.rb
         
     | 
| 
      
 29 
     | 
    
         
            +
            - lib/unicode_utils/read_codepoint_set.rb
         
     | 
| 
      
 30 
     | 
    
         
            +
            - lib/unicode_utils/titlecase_char_q.rb
         
     | 
| 
      
 31 
     | 
    
         
            +
            - lib/unicode_utils/cased_char_q.rb
         
     | 
| 
       28 
32 
     | 
    
         
             
            - lib/unicode_utils/downcase.rb
         
     | 
| 
       29 
33 
     | 
    
         
             
            - lib/unicode_utils/name.rb
         
     | 
| 
      
 34 
     | 
    
         
            +
            - lib/unicode_utils/uppercase_char_q.rb
         
     | 
| 
       30 
35 
     | 
    
         
             
            - lib/unicode_utils/upcase.rb
         
     | 
| 
      
 36 
     | 
    
         
            +
            - lib/unicode_utils/case_ignorable_char_q.rb
         
     | 
| 
       31 
37 
     | 
    
         
             
            - lib/unicode_utils/simple_upcase.rb
         
     | 
| 
      
 38 
     | 
    
         
            +
            - lib/unicode_utils/lowercase_char_q.rb
         
     | 
| 
      
 39 
     | 
    
         
            +
            - lib/unicode_utils/combining_class.rb
         
     | 
| 
       32 
40 
     | 
    
         
             
            - lib/unicode_utils/version.rb
         
     | 
| 
      
 41 
     | 
    
         
            +
            - lib/unicode_utils/soft_dotted_char_q.rb
         
     | 
| 
       33 
42 
     | 
    
         
             
            - lib/unicode_utils.rb
         
     | 
| 
      
 43 
     | 
    
         
            +
            - cdata/combining_class_map
         
     | 
| 
      
 44 
     | 
    
         
            +
            - cdata/cond_lc_map
         
     | 
| 
      
 45 
     | 
    
         
            +
            - cdata/prop_set_lowercase
         
     | 
| 
      
 46 
     | 
    
         
            +
            - cdata/cat_set_titlecase
         
     | 
| 
       34 
47 
     | 
    
         
             
            - cdata/special_lc_map
         
     | 
| 
       35 
48 
     | 
    
         
             
            - cdata/names
         
     | 
| 
      
 49 
     | 
    
         
            +
            - cdata/cond_uc_map
         
     | 
| 
       36 
50 
     | 
    
         
             
            - cdata/special_uc_map
         
     | 
| 
      
 51 
     | 
    
         
            +
            - cdata/soft_dotted_set
         
     | 
| 
       37 
52 
     | 
    
         
             
            - cdata/simple_lc_map
         
     | 
| 
      
 53 
     | 
    
         
            +
            - cdata/case_ignorable_set
         
     | 
| 
       38 
54 
     | 
    
         
             
            - cdata/simple_uc_map
         
     | 
| 
      
 55 
     | 
    
         
            +
            - cdata/prop_set_uppercase
         
     | 
| 
       39 
56 
     | 
    
         
             
            - test/test_unicode_utils.rb
         
     | 
| 
       40 
57 
     | 
    
         
             
            - README.txt
         
     | 
| 
       41 
58 
     | 
    
         
             
            - LICENSE.txt
         
     |