unicode 0.4.1-x86-mingw32 → 0.4.2-x86-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +15 -2
- data/ext/unicode/unicode.c +119 -1
- data/lib/unicode/1.8/unicode_native.so +0 -0
- data/lib/unicode/1.9/unicode_native.so +0 -0
- data/unicode.gemspec +1 -1
- metadata +4 -4
    
        data/README
    CHANGED
    
    | @@ -1,5 +1,5 @@ | |
| 1 1 | 
             
            		   Unicode Library for Ruby
         | 
| 2 | 
            -
            			Version 0.4. | 
| 2 | 
            +
            			Version 0.4.2
         | 
| 3 3 |  | 
| 4 4 | 
             
            		       Yoshida Masato
         | 
| 5 5 |  | 
| @@ -15,7 +15,7 @@ | |
| 15 15 | 
             
            - Install
         | 
| 16 16 |  | 
| 17 17 | 
             
              This can work with ruby-1.8 or later. I recommend you to
         | 
| 18 | 
            -
              use ruby-1.9. | 
| 18 | 
            +
              use ruby-1.9.3 or later.
         | 
| 19 19 |  | 
| 20 20 | 
             
              Make and install usually.
         | 
| 21 21 | 
             
              For example, when Ruby supports dynamic linking on your OS,
         | 
| @@ -59,6 +59,11 @@ | |
| 59 59 | 
             
                mapping in UnicodeData.txt and the Hangul decomposition
         | 
| 60 60 | 
             
                algorithm.
         | 
| 61 61 |  | 
| 62 | 
            +
              Unicode::decompose_safe(str)
         | 
| 63 | 
            +
                Decompose Unicode string with a non-standard mapping.
         | 
| 64 | 
            +
                It does not decompose the characters in
         | 
| 65 | 
            +
                CompositionExclusions.txt.
         | 
| 66 | 
            +
             | 
| 62 67 | 
             
              Unicode::compose(str)
         | 
| 63 68 | 
             
                Compose Unicode string. Before composing, the trailing
         | 
| 64 69 | 
             
                characters are sorted in canonical order.
         | 
| @@ -73,12 +78,19 @@ | |
| 73 78 | 
             
                Normalize Unicode string in form D or form KD.
         | 
| 74 79 | 
             
                These are aliases of decompose/decompose_compat.
         | 
| 75 80 |  | 
| 81 | 
            +
              Unicode::normalize_D_safe(str)  (Unicode::nfd_safe(str))
         | 
| 82 | 
            +
                This is an aliase of decompose_safe.
         | 
| 83 | 
            +
             | 
| 76 84 | 
             
              Unicode::normalize_C(str) (Unicode::nfc(str))
         | 
| 77 85 | 
             
              Unicode::normalize_KC(str) (Unicode::nfkc(str))
         | 
| 78 86 | 
             
                Normalize Unicode string in form C or form KC.
         | 
| 79 87 | 
             
                  normalize_C  = decompose + compose
         | 
| 80 88 | 
             
                  normalize_KC = decompose_compat + compose
         | 
| 81 89 |  | 
| 90 | 
            +
              Unicode::normalize_C_safe(str) (Unicode::nfc_safe(str))
         | 
| 91 | 
            +
                Normalize Unicode string with decompose_safe.
         | 
| 92 | 
            +
                  normalize_C_safe  = decompose_safe + compose
         | 
| 93 | 
            +
             | 
| 82 94 | 
             
              Unicode::upcase(str)
         | 
| 83 95 | 
             
              Unicode::downcase(str)
         | 
| 84 96 | 
             
              Unicode::capitalize(str)
         | 
| @@ -111,6 +123,7 @@ | |
| 111 123 |  | 
| 112 124 | 
             
            - History
         | 
| 113 125 |  | 
| 126 | 
            +
              Feb 29, 2012 version 0.4.2 add decompose_safe
         | 
| 114 127 | 
             
              Feb  3, 2012 version 0.4.1 update unidata.map for Unicode 6.1
         | 
| 115 128 | 
             
              Oct 14, 2010 version 0.4.0 fix the composition algorithm, and support Unicode 6.0
         | 
| 116 129 | 
             
              Feb 26, 2010 version 0.3.0 fix a capitalize bug and support SpecialCasing
         | 
    
        data/ext/unicode/unicode.c
    CHANGED
    
    | @@ -7,6 +7,8 @@ | |
| 7 7 | 
             
             *
         | 
| 8 8 | 
             
             */
         | 
| 9 9 |  | 
| 10 | 
            +
            #define UNICODE_VERSION "0.4.2"
         | 
| 11 | 
            +
             | 
| 10 12 | 
             
            #include "ruby.h"
         | 
| 11 13 | 
             
            #ifdef HAVE_RUBY_IO_H
         | 
| 12 14 | 
             
            #  include "ruby/io.h"
         | 
| @@ -86,6 +88,19 @@ get_canon(int ucs) | |
| 86 88 | 
             
              return NULL;
         | 
| 87 89 | 
             
            }
         | 
| 88 90 |  | 
| 91 | 
            +
            static const char*
         | 
| 92 | 
            +
            get_canon_ex(int ucs)
         | 
| 93 | 
            +
            {
         | 
| 94 | 
            +
              VALUE ch = rb_hash_aref(unicode_data, INT2FIX(ucs));
         | 
| 95 | 
            +
             | 
| 96 | 
            +
              if (!NIL_P(ch)) {
         | 
| 97 | 
            +
                int i = FIX2INT(ch);
         | 
| 98 | 
            +
                if (!unidata[i].exclusion)
         | 
| 99 | 
            +
                  return unidata[i].canon;
         | 
| 100 | 
            +
              }
         | 
| 101 | 
            +
              return NULL;
         | 
| 102 | 
            +
            }
         | 
| 103 | 
            +
             | 
| 89 104 | 
             
            static const char*
         | 
| 90 105 | 
             
            get_compat(int ucs)
         | 
| 91 106 | 
             
            {
         | 
| @@ -216,7 +231,41 @@ decompose_internal(WString* ustr, WString* result) | |
| 216 231 | 
             
            }
         | 
| 217 232 |  | 
| 218 233 | 
             
            /*
         | 
| 219 | 
            -
             * push  | 
| 234 | 
            +
             * push decomposed str into result 
         | 
| 235 | 
            +
             */
         | 
| 236 | 
            +
            static WString*
         | 
| 237 | 
            +
            decompose_safe_internal(WString* ustr, WString* result)
         | 
| 238 | 
            +
            {
         | 
| 239 | 
            +
              int i;
         | 
| 240 | 
            +
              int len = ustr->len;
         | 
| 241 | 
            +
             | 
| 242 | 
            +
              for (i = 0; i < len; i++) {
         | 
| 243 | 
            +
                int ucs = ustr->str[i];
         | 
| 244 | 
            +
                if (ucs >= SBASE && ucs < SBASE + SCOUNT) {
         | 
| 245 | 
            +
                  int l, v, t;
         | 
| 246 | 
            +
                  decompose_hangul(ucs, &l, &v, &t);
         | 
| 247 | 
            +
                  WStr_addWChar(result, l);
         | 
| 248 | 
            +
                  if (v) WStr_addWChar(result, v);
         | 
| 249 | 
            +
                  if (t) WStr_addWChar(result, t);
         | 
| 250 | 
            +
                }
         | 
| 251 | 
            +
                else {
         | 
| 252 | 
            +
                  const char* dc = get_canon_ex(ucs);
         | 
| 253 | 
            +
                  if (!dc) {
         | 
| 254 | 
            +
            	WStr_addWChar(result, ucs);
         | 
| 255 | 
            +
                  }
         | 
| 256 | 
            +
                  else {
         | 
| 257 | 
            +
            	WString wdc;
         | 
| 258 | 
            +
            	WStr_allocWithUTF8(&wdc, dc);
         | 
| 259 | 
            +
            	decompose_safe_internal(&wdc, result);
         | 
| 260 | 
            +
            	WStr_free(&wdc);
         | 
| 261 | 
            +
                  }
         | 
| 262 | 
            +
                }
         | 
| 263 | 
            +
              }
         | 
| 264 | 
            +
              return result;
         | 
| 265 | 
            +
            }
         | 
| 266 | 
            +
             | 
| 267 | 
            +
            /*
         | 
| 268 | 
            +
             * push compatibility decomposed str into result 
         | 
| 220 269 | 
             
             */
         | 
| 221 270 | 
             
            static WString*
         | 
| 222 271 | 
             
            decompose_compat_internal(WString* ustr, WString* result)
         | 
| @@ -582,6 +631,32 @@ unicode_decompose(VALUE obj, VALUE str) | |
| 582 631 | 
             
              return vret;
         | 
| 583 632 | 
             
            }
         | 
| 584 633 |  | 
| 634 | 
            +
            static VALUE
         | 
| 635 | 
            +
            unicode_decompose_safe(VALUE obj, VALUE str)
         | 
| 636 | 
            +
            {
         | 
| 637 | 
            +
              WString ustr;
         | 
| 638 | 
            +
              WString result;
         | 
| 639 | 
            +
              UString ret;
         | 
| 640 | 
            +
              VALUE vret;
         | 
| 641 | 
            +
             | 
| 642 | 
            +
              Check_Type(str, T_STRING);
         | 
| 643 | 
            +
            #ifdef HAVE_RUBY_ENCODING_H
         | 
| 644 | 
            +
              CONVERT_TO_UTF8(str);
         | 
| 645 | 
            +
            #endif
         | 
| 646 | 
            +
              WStr_allocWithUTF8(&ustr, RSTRING_PTR(str));
         | 
| 647 | 
            +
              WStr_alloc(&result);
         | 
| 648 | 
            +
              decompose_safe_internal(&ustr, &result);
         | 
| 649 | 
            +
              WStr_free(&ustr);
         | 
| 650 | 
            +
              sort_canonical(&result);
         | 
| 651 | 
            +
              UniStr_alloc(&ret);
         | 
| 652 | 
            +
              WStr_convertIntoUString(&result, &ret);
         | 
| 653 | 
            +
              WStr_free(&result);
         | 
| 654 | 
            +
              vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
         | 
| 655 | 
            +
              UniStr_free(&ret);
         | 
| 656 | 
            +
             | 
| 657 | 
            +
              return vret;
         | 
| 658 | 
            +
            }
         | 
| 659 | 
            +
             | 
| 585 660 | 
             
            static VALUE
         | 
| 586 661 | 
             
            unicode_decompose_compat(VALUE obj, VALUE str)
         | 
| 587 662 | 
             
            {
         | 
| @@ -664,6 +739,36 @@ unicode_normalize_C(VALUE obj, VALUE str) | |
| 664 739 | 
             
              return vret;
         | 
| 665 740 | 
             
            }
         | 
| 666 741 |  | 
| 742 | 
            +
            static VALUE
         | 
| 743 | 
            +
            unicode_normalize_safe(VALUE obj, VALUE str)
         | 
| 744 | 
            +
            {
         | 
| 745 | 
            +
              WString ustr1;
         | 
| 746 | 
            +
              WString ustr2;
         | 
| 747 | 
            +
              WString result;
         | 
| 748 | 
            +
              UString ret;
         | 
| 749 | 
            +
              VALUE vret;
         | 
| 750 | 
            +
             | 
| 751 | 
            +
              Check_Type(str, T_STRING);
         | 
| 752 | 
            +
            #ifdef HAVE_RUBY_ENCODING_H
         | 
| 753 | 
            +
              CONVERT_TO_UTF8(str);
         | 
| 754 | 
            +
            #endif
         | 
| 755 | 
            +
              WStr_allocWithUTF8(&ustr1, RSTRING_PTR(str));
         | 
| 756 | 
            +
              WStr_alloc(&ustr2);
         | 
| 757 | 
            +
              decompose_safe_internal(&ustr1, &ustr2);
         | 
| 758 | 
            +
              WStr_free(&ustr1);
         | 
| 759 | 
            +
              sort_canonical(&ustr2);
         | 
| 760 | 
            +
              WStr_alloc(&result);
         | 
| 761 | 
            +
              compose_internal(&ustr2, &result);
         | 
| 762 | 
            +
              WStr_free(&ustr2);
         | 
| 763 | 
            +
              UniStr_alloc(&ret);
         | 
| 764 | 
            +
              WStr_convertIntoUString(&result, &ret);
         | 
| 765 | 
            +
              WStr_free(&result);
         | 
| 766 | 
            +
              vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
         | 
| 767 | 
            +
              UniStr_free(&ret);
         | 
| 768 | 
            +
             | 
| 769 | 
            +
              return vret;
         | 
| 770 | 
            +
            }
         | 
| 771 | 
            +
             | 
| 667 772 | 
             
            static VALUE
         | 
| 668 773 | 
             
            unicode_normalize_KC(VALUE obj, VALUE str)
         | 
| 669 774 | 
             
            {
         | 
| @@ -811,6 +916,8 @@ Init_unicode_native() | |
| 811 916 |  | 
| 812 917 | 
             
              rb_define_module_function(mUnicode, "decompose",
         | 
| 813 918 | 
             
            			    unicode_decompose, 1);
         | 
| 919 | 
            +
              rb_define_module_function(mUnicode, "decompose_safe",
         | 
| 920 | 
            +
            			    unicode_decompose_safe, 1);
         | 
| 814 921 | 
             
              rb_define_module_function(mUnicode, "decompose_compat",
         | 
| 815 922 | 
             
            			    unicode_decompose_compat, 1);
         | 
| 816 923 | 
             
              rb_define_module_function(mUnicode, "compose",
         | 
| @@ -818,20 +925,28 @@ Init_unicode_native() | |
| 818 925 |  | 
| 819 926 | 
             
              rb_define_module_function(mUnicode, "normalize_D",
         | 
| 820 927 | 
             
            			    unicode_decompose, 1);
         | 
| 928 | 
            +
              rb_define_module_function(mUnicode, "normalize_D_safe",
         | 
| 929 | 
            +
            			    unicode_decompose_safe, 1);
         | 
| 821 930 | 
             
              rb_define_module_function(mUnicode, "normalize_KD",
         | 
| 822 931 | 
             
            			    unicode_decompose_compat, 1);
         | 
| 823 932 | 
             
              rb_define_module_function(mUnicode, "normalize_C",
         | 
| 824 933 | 
             
            			    unicode_normalize_C, 1);
         | 
| 934 | 
            +
              rb_define_module_function(mUnicode, "normalize_C_safe",
         | 
| 935 | 
            +
            			    unicode_normalize_safe, 1);
         | 
| 825 936 | 
             
              rb_define_module_function(mUnicode, "normalize_KC",
         | 
| 826 937 | 
             
            			    unicode_normalize_KC, 1);
         | 
| 827 938 |  | 
| 828 939 | 
             
              /* aliases */
         | 
| 829 940 | 
             
              rb_define_module_function(mUnicode, "nfd",
         | 
| 830 941 | 
             
            			    unicode_decompose, 1);
         | 
| 942 | 
            +
              rb_define_module_function(mUnicode, "nfd_safe",
         | 
| 943 | 
            +
            			    unicode_decompose_safe, 1);
         | 
| 831 944 | 
             
              rb_define_module_function(mUnicode, "nfkd",
         | 
| 832 945 | 
             
            			    unicode_decompose_compat, 1);
         | 
| 833 946 | 
             
              rb_define_module_function(mUnicode, "nfc",
         | 
| 834 947 | 
             
            			    unicode_normalize_C, 1);
         | 
| 948 | 
            +
              rb_define_module_function(mUnicode, "nfc_safe",
         | 
| 949 | 
            +
            			    unicode_normalize_safe, 1);
         | 
| 835 950 | 
             
              rb_define_module_function(mUnicode, "nfkc",
         | 
| 836 951 | 
             
            			    unicode_normalize_KC, 1);
         | 
| 837 952 |  | 
| @@ -841,4 +956,7 @@ Init_unicode_native() | |
| 841 956 | 
             
            			    unicode_downcase, 1);
         | 
| 842 957 | 
             
              rb_define_module_function(mUnicode, "capitalize",
         | 
| 843 958 | 
             
            			    unicode_capitalize, 1);
         | 
| 959 | 
            +
             | 
| 960 | 
            +
              rb_define_const(mUnicode, "VERSION",
         | 
| 961 | 
            +
            		  rb_str_new2(UNICODE_VERSION));
         | 
| 844 962 | 
             
            }
         | 
| Binary file | 
| Binary file | 
    
        data/unicode.gemspec
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,13 +1,13 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification 
         | 
| 2 2 | 
             
            name: unicode
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version 
         | 
| 4 | 
            -
              hash:  | 
| 4 | 
            +
              hash: 11
         | 
| 5 5 | 
             
              prerelease: 
         | 
| 6 6 | 
             
              segments: 
         | 
| 7 7 | 
             
              - 0
         | 
| 8 8 | 
             
              - 4
         | 
| 9 | 
            -
              -  | 
| 10 | 
            -
              version: 0.4. | 
| 9 | 
            +
              - 2
         | 
| 10 | 
            +
              version: 0.4.2
         | 
| 11 11 | 
             
            platform: x86-mingw32
         | 
| 12 12 | 
             
            authors: 
         | 
| 13 13 | 
             
            - Yoshida Masato
         | 
| @@ -73,7 +73,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 73 73 | 
             
            requirements: []
         | 
| 74 74 |  | 
| 75 75 | 
             
            rubyforge_project: 
         | 
| 76 | 
            -
            rubygems_version: 1.8. | 
| 76 | 
            +
            rubygems_version: 1.8.17
         | 
| 77 77 | 
             
            signing_key: 
         | 
| 78 78 | 
             
            specification_version: 3
         | 
| 79 79 | 
             
            summary: Unicode normalization library.
         |