RubyGems - unicode - Versions diffs - 0.4.1-x86-mingw32 → 0.4.2-x86-mingw32 - Mend

unicode 0.4.1-x86-mingw32 → 0.4.2-x86-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

data/README +15 -2
data/ext/unicode/unicode.c +119 -1
data/lib/unicode/1.8/unicode_native.so +0 -0
data/lib/unicode/1.9/unicode_native.so +0 -0
data/unicode.gemspec +1 -1
metadata +4 -4

data/README CHANGED

@@ -1,5 +1,5 @@
 		   Unicode Library for Ruby
-			Version 0.4.1
+			Version 0.4.2
 		       Yoshida Masato
@@ -15,7 +15,7 @@
 - Install
   This can work with ruby-1.8 or later. I recommend you to
-  use ruby-1.9.2 or later.
+  use ruby-1.9.3 or later.
   Make and install usually.
   For example, when Ruby supports dynamic linking on your OS,
@@ -59,6 +59,11 @@
     mapping in UnicodeData.txt and the Hangul decomposition
     algorithm.
+  Unicode::decompose_safe(str)
+    Decompose Unicode string with a non-standard mapping.
+    It does not decompose the characters in
+    CompositionExclusions.txt.
   Unicode::compose(str)
     Compose Unicode string. Before composing, the trailing
     characters are sorted in canonical order.
@@ -73,12 +78,19 @@
     Normalize Unicode string in form D or form KD.
     These are aliases of decompose/decompose_compat.
+  Unicode::normalize_D_safe(str)  (Unicode::nfd_safe(str))
+    This is an aliase of decompose_safe.
   Unicode::normalize_C(str) (Unicode::nfc(str))
   Unicode::normalize_KC(str) (Unicode::nfkc(str))
     Normalize Unicode string in form C or form KC.
       normalize_C  = decompose + compose
       normalize_KC = decompose_compat + compose
+  Unicode::normalize_C_safe(str) (Unicode::nfc_safe(str))
+    Normalize Unicode string with decompose_safe.
+      normalize_C_safe  = decompose_safe + compose
   Unicode::upcase(str)
   Unicode::downcase(str)
   Unicode::capitalize(str)
@@ -111,6 +123,7 @@
 - History
+  Feb 29, 2012 version 0.4.2 add decompose_safe
   Feb  3, 2012 version 0.4.1 update unidata.map for Unicode 6.1
   Oct 14, 2010 version 0.4.0 fix the composition algorithm, and support Unicode 6.0
   Feb 26, 2010 version 0.3.0 fix a capitalize bug and support SpecialCasing

data/ext/unicode/unicode.c CHANGED

@@ -7,6 +7,8 @@
  *
  */
+#define UNICODE_VERSION "0.4.2"
 #include "ruby.h"
 #ifdef HAVE_RUBY_IO_H
 #  include "ruby/io.h"
@@ -86,6 +88,19 @@ get_canon(int ucs)
   return NULL;
 }
+static const char*
+get_canon_ex(int ucs)
+{
+  VALUE ch = rb_hash_aref(unicode_data, INT2FIX(ucs));
+  if (!NIL_P(ch)) {
+    int i = FIX2INT(ch);
+    if (!unidata[i].exclusion)
+      return unidata[i].canon;
+  }
+  return NULL;
+}
 static const char*
 get_compat(int ucs)
 {
@@ -216,7 +231,41 @@ decompose_internal(WString* ustr, WString* result)
 }
 /*
- * push compatibility decomposed str into result
+ * push decomposed str into result
+ */
+static WString*
+decompose_safe_internal(WString* ustr, WString* result)
+{
+  int i;
+  int len = ustr->len;
+  for (i = 0; i < len; i++) {
+    int ucs = ustr->str[i];
+    if (ucs >= SBASE && ucs < SBASE + SCOUNT) {
+      int l, v, t;
+      decompose_hangul(ucs, &l, &v, &t);
+      WStr_addWChar(result, l);
+      if (v) WStr_addWChar(result, v);
+      if (t) WStr_addWChar(result, t);
+    }
+    else {
+      const char* dc = get_canon_ex(ucs);
+      if (!dc) {
+	WStr_addWChar(result, ucs);
+      }
+      else {
+	WString wdc;
+	WStr_allocWithUTF8(&wdc, dc);
+	decompose_safe_internal(&wdc, result);
+	WStr_free(&wdc);
+      }
+    }
+  }
+  return result;
+}
+/*
+ * push compatibility decomposed str into result
  */
 static WString*
 decompose_compat_internal(WString* ustr, WString* result)
@@ -582,6 +631,32 @@ unicode_decompose(VALUE obj, VALUE str)
   return vret;
 }
+static VALUE
+unicode_decompose_safe(VALUE obj, VALUE str)
+{
+  WString ustr;
+  WString result;
+  UString ret;
+  VALUE vret;
+  Check_Type(str, T_STRING);
+#ifdef HAVE_RUBY_ENCODING_H
+  CONVERT_TO_UTF8(str);
+#endif
+  WStr_allocWithUTF8(&ustr, RSTRING_PTR(str));
+  WStr_alloc(&result);
+  decompose_safe_internal(&ustr, &result);
+  WStr_free(&ustr);
+  sort_canonical(&result);
+  UniStr_alloc(&ret);
+  WStr_convertIntoUString(&result, &ret);
+  WStr_free(&result);
+  vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
+  UniStr_free(&ret);
+  return vret;
+}
 static VALUE
 unicode_decompose_compat(VALUE obj, VALUE str)
 {
@@ -664,6 +739,36 @@ unicode_normalize_C(VALUE obj, VALUE str)
   return vret;
 }
+static VALUE
+unicode_normalize_safe(VALUE obj, VALUE str)
+{
+  WString ustr1;
+  WString ustr2;
+  WString result;
+  UString ret;
+  VALUE vret;
+  Check_Type(str, T_STRING);
+#ifdef HAVE_RUBY_ENCODING_H
+  CONVERT_TO_UTF8(str);
+#endif
+  WStr_allocWithUTF8(&ustr1, RSTRING_PTR(str));
+  WStr_alloc(&ustr2);
+  decompose_safe_internal(&ustr1, &ustr2);
+  WStr_free(&ustr1);
+  sort_canonical(&ustr2);
+  WStr_alloc(&result);
+  compose_internal(&ustr2, &result);
+  WStr_free(&ustr2);
+  UniStr_alloc(&ret);
+  WStr_convertIntoUString(&result, &ret);
+  WStr_free(&result);
+  vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
+  UniStr_free(&ret);
+  return vret;
+}
 static VALUE
 unicode_normalize_KC(VALUE obj, VALUE str)
 {
@@ -811,6 +916,8 @@ Init_unicode_native()
   rb_define_module_function(mUnicode, "decompose",
 			    unicode_decompose, 1);
+  rb_define_module_function(mUnicode, "decompose_safe",
+			    unicode_decompose_safe, 1);
   rb_define_module_function(mUnicode, "decompose_compat",
 			    unicode_decompose_compat, 1);
   rb_define_module_function(mUnicode, "compose",
@@ -818,20 +925,28 @@ Init_unicode_native()
   rb_define_module_function(mUnicode, "normalize_D",
 			    unicode_decompose, 1);
+  rb_define_module_function(mUnicode, "normalize_D_safe",
+			    unicode_decompose_safe, 1);
   rb_define_module_function(mUnicode, "normalize_KD",
 			    unicode_decompose_compat, 1);
   rb_define_module_function(mUnicode, "normalize_C",
 			    unicode_normalize_C, 1);
+  rb_define_module_function(mUnicode, "normalize_C_safe",
+			    unicode_normalize_safe, 1);
   rb_define_module_function(mUnicode, "normalize_KC",
 			    unicode_normalize_KC, 1);
   /* aliases */
   rb_define_module_function(mUnicode, "nfd",
 			    unicode_decompose, 1);
+  rb_define_module_function(mUnicode, "nfd_safe",
+			    unicode_decompose_safe, 1);
   rb_define_module_function(mUnicode, "nfkd",
 			    unicode_decompose_compat, 1);
   rb_define_module_function(mUnicode, "nfc",
 			    unicode_normalize_C, 1);
+  rb_define_module_function(mUnicode, "nfc_safe",
+			    unicode_normalize_safe, 1);
   rb_define_module_function(mUnicode, "nfkc",
 			    unicode_normalize_KC, 1);
@@ -841,4 +956,7 @@ Init_unicode_native()
 			    unicode_downcase, 1);
   rb_define_module_function(mUnicode, "capitalize",
 			    unicode_capitalize, 1);
+  rb_define_const(mUnicode, "VERSION",
+		  rb_str_new2(UNICODE_VERSION));
 }

data/lib/unicode/1.8/unicode_native.so CHANGED

Binary file

data/lib/unicode/1.9/unicode_native.so CHANGED

Binary file

data/unicode.gemspec CHANGED

@@ -2,7 +2,7 @@
 Gem::Specification.new do |s|
   s.name = %q{unicode}
-  s.version = "0.4.1"
+  s.version = "0.4.2"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = [%q{Yoshida Masato}]

metadata CHANGED

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: unicode
 version: !ruby/object:Gem::Version
-  hash: 13
+  hash: 11
   prerelease:
   segments:
   - 0
   - 4
-  - 1
-  version: 0.4.1
+  - 2
+  version: 0.4.2
 platform: x86-mingw32
 authors:
 - Yoshida Masato
@@ -73,7 +73,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
 requirements: []
 rubyforge_project:
-rubygems_version: 1.8.15
+rubygems_version: 1.8.17
 signing_key:
 specification_version: 3
 summary: Unicode normalization library.