RubyGems - icu4r_19 - Versions diffs - 1.0 - Mend

icu4r_19 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

data/ChangeLog +87 -0
data/MIT-LICENSE +20 -0
data/README +156 -0
data/Rakefile +32 -0
data/calendar.c +636 -0
data/collator.c +233 -0
data/converter.c +322 -0
data/docs/FORMATTING +131 -0
data/docs/UNICODE_REGEXPS +204 -0
data/extconf.rb +17 -0
data/fmt.cpp +156 -0
data/icu4r.c +18 -0
data/icu_common.h +45 -0
data/lib/dummy +0 -0
data/samples/demo_each.rb +23 -0
data/samples/demo_locales.rb +16 -0
data/samples/demo_regexp.rb +11 -0
data/samples/resbundle/appmsg/root.res +0 -0
data/samples/resbundle/appmsg/ru.res +0 -0
data/samples/resbundle/demo_bundle.rb +4 -0
data/samples/resbundle/mkres.sh +4 -0
data/samples/resbundle/root.txt +10 -0
data/samples/resbundle/ru.txt +4 -0
data/test/test_calendar.rb +123 -0
data/test/test_collator.rb +33 -0
data/test/test_converter.rb +72 -0
data/test/test_ustring.rb +508 -0
data/tools/doc.sh +2 -0
data/tools/km.rb +425 -0
data/ubundle.c +223 -0
data/ucore_ext.c +168 -0
data/uregex.c +697 -0
data/uregex.h +27 -0
data/ustring.c +3039 -0
metadata +164 -0

data/uregex.h ADDED

@@ -0,0 +1,27 @@
+ extern  void 	icu_regex_free (ICURegexp *ptr);
+ extern  VALUE 	icu_reg_s_alloc (VALUE klass);
+ extern  VALUE 	icu_reg_initialize_m (int argc, VALUE *argv, VALUE self);
+ extern  VALUE 	icu_reg_new (UChar *s, long len, int options) ;
+ extern  VALUE 	icu_reg_clone (VALUE obj);
+ extern  VALUE 	icu_reg_comp (VALUE str);
+ extern  VALUE 	icu_reg_from_rb_reg (VALUE re);
+ extern  VALUE 	icu_reg_to_u (VALUE self);
+ extern  VALUE 	icu_reg_split (VALUE self, VALUE str, VALUE limit);
+ extern  VALUE 	icu_reg_nth_match (VALUE re, long nth);
+ extern  VALUE 	icu_reg_range (VALUE re, int nth, long *start, long *end);
+ extern  VALUE 	icu_reg_match (VALUE re, VALUE str);
+ extern  VALUE 	icu_reg_eqq (VALUE re, VALUE str);
+ extern  int 	icu_reg_find_next (VALUE pat);
+ extern  VALUE 	icu_reg_get_replacement (VALUE pat, VALUE repl_text, long prev_end);
+ extern  VALUE 	icu_reg_get_prematch (VALUE pat, long prev_end);
+ extern  VALUE 	icu_reg_get_tail (VALUE pat, long prev_end);
+ extern  VALUE 	icu_reg_from_rb_str (int argc, VALUE *argv, VALUE obj);
+ extern  VALUE 	icu_umatch_range (VALUE match, VALUE index);
+ extern  VALUE 	icu_umatch_size (VALUE match);
+ extern  VALUE 	icu_umatch_init (VALUE self, VALUE re);
+ extern  VALUE 	icu_umatch_aref (VALUE match, VALUE idx);
+ extern  VALUE 	icu_umatch_new (VALUE re);
+ extern  long   icu_group_count(VALUE re);
+ extern  long   icu_reg_search(VALUE re, VALUE str, int pos, int reverse);
+ extern  void 	initialize_uregexp (void);

data/ustring.c ADDED

@@ -0,0 +1,3039 @@
+/**
+ *  ustring.c - ICU based Unicode string support.
+ *
+ * $Id: ustring.c,v 1.20 2006/01/23 14:26:45 meadow Exp $
+ *
+ * Copyright (c) 2006 Nikolai Lugovoi
+ *
+ * This code is based on original ruby String class source (string.c):
+ *
+ *  * string.c -
+ *  *
+ *  * Copyright (C) 1993-2003 Yukihiro Matsumoto
+ *  * Copyright (C) 2000  Network Applied Communication Laboratory, Inc.
+ *  * Copyright (C) 2000  Information-technology Promotion Agency, Japan
+ *  *
+ **/
+#include "icu_common.h"
+VALUE           icu_ustr_replace(VALUE str, VALUE str2);
+VALUE		ustr_gsub(int argc, VALUE * argv, VALUE str, int bang, int once);
+extern VALUE icu_from_rstr(int argc, VALUE * argv, VALUE str);
+ VALUE rb_cURegexp;
+ VALUE rb_cUString;
+ VALUE rb_cUMatch;
+ VALUE rb_cUResourceBundle;
+ VALUE rb_cULocale;
+ VALUE rb_cUCalendar;
+ VALUE rb_cUConverter;
+ VALUE rb_cUCollator;
+#include "uregex.h"
+/* to be used in <=>, casecmp */
+static UCollator * s_UCA_collator, * s_case_UCA_collator;
+static void
+free_ustr(str)
+     ICUString      *str;
+{
+    if (str->ptr)
+	free(str->ptr);
+    str->ptr = 0;
+    free(str);
+}
+inline void icu_check_frozen(int check_busy, VALUE str)
+{
+	rb_check_frozen(str);
+	if(check_busy && USTRING(str)->busy > 0 ) rb_raise(rb_eRuntimeError, "String is busy. Can't modify");
+}
+#define START_BUF_LEN  16
+/**
+ * Allocate ICUString struct with given +capa+ capacity,
+ * if mode == 1 and UChar != 0 - copy len UChars from src,
+ * else set pointer to src.
+ */
+#define   ICU_COPY   1
+#define   ICU_SET    0
+VALUE icu_ustr_alloc_and_wrap(UChar * src, long len, long capa, int mode)
+{
+    ICUString      *n_str = ALLOC_N(ICUString, 1);
+    size_t 		alloc_capa;
+    if( mode == ICU_COPY ) {
+    	alloc_capa = START_BUF_LEN > capa ? START_BUF_LEN : capa;
+	if(alloc_capa<=len) alloc_capa = len + 1;
+    	n_str->ptr = ALLOC_N(UChar, alloc_capa);
+	n_str->capa = alloc_capa;
+    	n_str->len = len;
+	if( src ) {
+		u_memcpy(n_str->ptr, src, len);
+		n_str->ptr[len] = 0;
+	}
+    } else {
+    	n_str->ptr = src;
+	n_str->len = len;
+	n_str->capa = capa;
+    }
+     if(n_str->capa <= n_str->len) rb_raise(rb_eRuntimeError, "Capacity is not large then len, sentinel can't be set!");
+    n_str->busy = 0;
+    n_str->ptr[n_str->len] = 0;
+    return Data_Wrap_Struct(rb_cUString, 0, free_ustr, n_str);
+}
+VALUE
+icu_ustr_alloc(klass)
+     VALUE           klass;
+{
+	return icu_ustr_alloc_and_wrap(NULL, 0, 0, ICU_COPY);
+}
+void ustr_capa_resize(ICUString * str, long new_capa)
+{
+    if (new_capa != str->capa) {
+	if (str->capa < new_capa || (str->capa - new_capa > 1024)) {
+	    if(new_capa < START_BUF_LEN) new_capa = START_BUF_LEN;
+	    REALLOC_N(str->ptr, UChar, new_capa);
+	    str->capa = new_capa;
+	}
+    }
+}
+/* delete +del_len+ units from string and insert replacement */
+void ustr_splice_units(ICUString * str, long start, long del_len, const UChar * replacement, long repl_len)
+{
+   long new_len;
+   UChar * temp  = 0 ;
+   if( str->busy ) {
+   	rb_warn("Attempt to modify busy string. Ignored");
+	return;
+   }
+   if( repl_len < 0) return;
+   if( del_len == 0 && repl_len == 0) return;
+   new_len = str->len - del_len + repl_len;
+   if (replacement == str->ptr ) {
+       temp = ALLOC_N(UChar, repl_len);
+       u_memcpy(temp, replacement, repl_len);
+       replacement = temp;
+   }
+   if ( repl_len >= del_len) ustr_capa_resize(str, new_len+1);
+   /* move tail */
+   if(str->len - (start+del_len) > 0) {
+      u_memmove(str->ptr + start+repl_len, str->ptr + start+del_len, str->len-(start+del_len) );
+   }
+   /* copy string */
+   if( repl_len > 0) u_memcpy(str->ptr+start, replacement, repl_len);
+   if ( repl_len < del_len)  ustr_capa_resize(str, new_len+1);
+   str->len = new_len;
+   str->ptr[new_len] = 0;
+   if(temp) {
+     free(temp);
+   }
+}
+static inline void
+ustr_mod_check(VALUE s, UChar *p, long len)
+{
+    if (ICU_PTR(s) != p || ICU_LEN(s) != len){
+	rb_raise(rb_eRuntimeError, "string modified");
+    }
+}
+VALUE
+ustr_new(klass, ptr, len)
+     VALUE           klass;
+     UChar    *ptr;
+     long            len;
+{
+    if (len < 0) {
+	rb_raise(rb_eArgError, "negative string size (or size too big)");
+    }
+    return icu_ustr_alloc_and_wrap(ptr, len, len+1, ICU_COPY);
+}
+VALUE
+icu_ustr_new(ptr, len)
+     const UChar    *ptr;
+     long            len;
+{
+    return ustr_new(rb_cUString, ptr, len);
+}
+VALUE
+icu_ustr_new_set(ptr, len, capa)
+     UChar    *ptr;
+     long            len;
+     long 	     capa;
+{
+	return icu_ustr_alloc_and_wrap(ptr, len, capa, ICU_SET);
+}
+VALUE
+icu_ustr_new2(ptr)
+     const UChar    *ptr;
+{
+    if (!ptr) {
+	rb_raise(rb_eArgError, "NULL pointer given");
+    }
+    return icu_ustr_new(ptr, u_strlen(ptr));
+}
+inline VALUE
+icu_ustr_new_capa(UChar * ptr, long len, long capa)
+{
+	return icu_ustr_alloc_and_wrap(ptr, len, capa, ICU_COPY);
+}
+/* ------------ */
+/**
+ *  call-seq:
+ *     UString.new(str="".u)   => new_str
+ *
+ *  Returns a new string object containing a copy of <i>str</i>.
+ */
+VALUE
+icu_ustr_init(argc, argv, str)
+     int             argc;
+     VALUE          *argv;
+     VALUE           str;
+{
+    VALUE           orig;
+    if (rb_scan_args(argc, argv, "01", &orig) == 1)
+    {
+	icu_ustr_replace(str, orig);
+    }
+    return str;
+}
+/**
+ *  call-seq:
+ *     str.length  => integer
+ *
+ *  Returns the length of <i>str</i>.
+ */
+VALUE
+icu_ustr_length(str)
+     VALUE           str;
+{
+    return LONG2NUM(ICU_LEN(str));
+}
+/**
+ *  call-seq:
+ *     str.empty?   => true or false
+ *
+ *  Returns <code>true</code> if <i>str</i> has a length of zero.
+ *
+ *     "hello".u.empty?   #=> false
+ *     "".u.empty?        #=> true
+ */
+VALUE
+icu_ustr_empty(str)
+     VALUE           str;
+{
+    return 0 == ICU_LEN(str) ? Qtrue : Qfalse;
+}
+VALUE
+icu_ustr_resize(str, len)
+     VALUE           str;
+     long            len;
+{
+    if (len < 0) {
+	rb_raise(rb_eArgError, "negative string size (or size too big)");
+    }
+    ustr_capa_resize(USTRING(str), len);
+    ICU_LEN(str) = len;
+    ICU_PTR(str)[len] = 0;	/* sentinel */
+    return str;
+}
+/**
+ *  call-seq:
+ *     str.replace(other_str)   => str
+ *
+ *  Replaces the contents and taintedness of <i>str</i> with the corresponding
+ *  values in <i>other_str</i>.
+ *
+ *     s = "hello".u       #=> "hello"
+ *     s.replace "world".u   #=> "world"
+ */
+VALUE
+icu_ustr_replace(str, str2)
+     VALUE           str,
+                     str2;
+{
+    if (str == str2)
+	return str;
+    icu_check_frozen(1, str);
+    Check_Class(str2, rb_cUString);
+    ustr_splice_units(USTRING(str), 0, ICU_LEN(str), ICU_PTR(str2), ICU_LEN(str2));
+    OBJ_INFECT(str, str2);
+    return str;
+}
+/**
+ *  call-seq:
+ *     string.clear    ->  string
+ *
+ *  Makes string empty.
+ *
+ *     a = "abcde".u
+ *     a.clear    #=> ""
+ */
+VALUE
+icu_ustr_clear(str)
+     VALUE           str;
+{
+    icu_check_frozen(1, str);
+    icu_ustr_resize(str, 0);
+    return str;
+}
+int icu_collator_cmp (UCollator * collator, VALUE str1, VALUE str2)
+{
+    int  ret = 0,  result ;
+    result = ucol_strcoll(collator, ICU_PTR(str1), ICU_LEN(str1), ICU_PTR(str2), ICU_LEN(str2));
+    switch(result){
+	  case UCOL_EQUAL:   ret = 0;break;
+	  case UCOL_GREATER: ret = 1;break;
+	  case UCOL_LESS:    ret = -1;break;
+    }
+    return ret;
+}
+int
+icu_ustr_cmp(str1, str2)
+     VALUE           str1,
+                     str2;
+{
+	return icu_collator_cmp(s_UCA_collator, str1, str2);
+}
+/**
+ *  call-seq:
+ *     str == obj   => true or false
+ *
+ *  Equality---If <i>obj</i> is not a <code>UString</code>, returns
+ *  <code>false</code>. Otherwise, returns <code>true</code> if
+ *  strings are of the same length and content
+ *
+ */
+VALUE
+icu_ustr_equal(str1, str2)
+     VALUE           str1,
+                     str2;
+{
+    if (str1 == str2)
+	return Qtrue;
+    if (CLASS_OF(str2) != rb_cUString) {
+	return Qfalse;
+    }
+    if (ICU_LEN(str1) == ICU_LEN(str2) &&
+		    u_strncmp(ICU_PTR(str1), ICU_PTR(str2), ICU_LEN(str1) ) == 0) {
+	return Qtrue;
+    }
+    return Qfalse;
+}
+/**
+ *  call-seq:
+ *     str <=> other_str   => -1, 0, +1
+ *
+ *  Comparison---Returns -1 if <i>other_str</i> is less than, 0 if
+ *  <i>other_str</i> is equal to, and +1 if <i>other_str</i> is greater than
+ *  <i>str</i>.
+ *
+ *  <code><=></code> is the basis for the methods <code><</code>,
+ *  <code><=</code>, <code>></code>, <code>>=</code>, and <code>between?</code>,
+ *  included from module <code>Comparable</code>.  The method
+ *  <code>String#==</code> does not use <code>Comparable#==</code>.
+ *
+ *  This method uses UCA rules, see also #strcoll for locale-specific string collation.
+ *
+ *     "abcdef".u <=> "abcde".u     #=> 1
+ *     "abcdef".u <=> "abcdef".u    #=> 0
+ *     "abcdef".u <=> "abcdefg".u   #=> -1
+ *     "abcdef".u <=> "ABCDEF".u    #=> -1
+ */
+VALUE
+icu_ustr_cmp_m(str1, str2)
+     VALUE           str1,
+                     str2;
+{
+    long            result;
+    if (CLASS_OF(str2) != rb_cUString) {
+	return Qnil;
+    } else {
+	result = icu_ustr_cmp(str1, str2);
+    }
+    return LONG2NUM(result);
+}
+/**
+ *  call-seq:
+ *     str.casecmp(other_str)   => -1, 0, +1
+ *
+ *  Case-insensitive version of <code>UString#<=></code> .
+ *  This method uses UCA collator with secondary strength, see #strcoll
+ *
+ *
+ *     "abcdef".u.casecmp("abcde".u)     #=> 1
+ *     "aBcDeF".u.casecmp("abcdef".u)    #=> 0
+ *     "abcdef".u.casecmp("abcdefg".u)   #=> -1
+ *     "abcdef".u.casecmp("ABCDEF".u)    #=> 0
+ */
+VALUE
+icu_ustr_casecmp(str1, str2)
+     VALUE           str1,
+                     str2;
+{
+    Check_Class(str2, rb_cUString);
+    return INT2FIX(icu_collator_cmp(s_case_UCA_collator, str1, str2));
+}
+/**
+ *  call-seq:
+ *     str + other_str   => new_str
+ *
+ *  Concatenation---Returns a new <code>UString</code> containing
+ *  <i>other_str</i> concatenated to <i>str</i>.
+ *
+ *     "Hello from ".u + "main".u   #=> "Hello from main"
+ */
+VALUE
+icu_ustr_plus(str1, str2)
+     VALUE           str1,
+                     str2;
+{
+    VALUE           str3;
+    Check_Class(str2, rb_cUString);
+    str3 = icu_ustr_new_capa(ICU_PTR(str1), ICU_LEN(str1), ICU_LEN(str1) + ICU_LEN(str2));
+    ustr_splice_units(USTRING(str3), ICU_LEN(str3), 0, ICU_PTR(str2), ICU_LEN(str2));
+    if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2))
+	OBJ_TAINT(str3);
+    return str3;
+}
+/**
+ *  call-seq:
+ *     str * integer   => new_str
+ *
+ *  Copy---Returns a new <code>UString</code> containing <i>integer</i> copies of
+ *  the receiver.
+ *
+ *     "Ho! ".u * 3   #=> "Ho! Ho! Ho! ".u
+ */
+VALUE
+icu_ustr_times(str, times)
+     VALUE           str,
+                     times;
+{
+    VALUE           str2;
+    long            i,
+                    len;
+    Check_Type(times, T_FIXNUM);
+    len = NUM2LONG(times);
+    if (len < 0) {
+	rb_raise(rb_eArgError, "negative argument");
+    }
+    if (len && LONG_MAX / len < ICU_LEN(str)) {
+	rb_raise(rb_eArgError, "argument too big");
+    }
+    str2 = icu_ustr_new_capa(0, 0, len *= ICU_LEN(str));
+    for (i = 0; i < len; i += ICU_LEN(str)) {
+	ustr_splice_units(USTRING(str2), i, 0, ICU_PTR(str), ICU_LEN(str));
+    }
+    ICU_PTR(str2)[ICU_LEN(str2)] = 0;
+    OBJ_INFECT(str2, str);
+    return str2;
+}
+/**
+ *  call-seq:
+ *     str << other_str           => str
+ *     str.concat(other_str)      => str
+ *
+ *  Append---Concatenates the given string object to <i>str</i>.
+ *
+ *     a = "hello ".u
+ *     a << "world".u   #=> "hello world"
+ */
+VALUE
+icu_ustr_concat(str1, str2)
+     VALUE           str1,
+                     str2;
+{
+    icu_check_frozen(1, str1);
+    Check_Class(str2, rb_cUString);
+    if (ICU_LEN(str2) > 0) {
+	ustr_splice_units(USTRING(str1), ICU_LEN(str1), 0, ICU_PTR(str2), ICU_LEN(str2));
+	OBJ_INFECT(str1, str2);
+    }
+    return str1;
+}
+int
+icu_ustr_hash(str)
+     VALUE           str;
+{
+    register long   len = ICU_LEN(str) * (sizeof(UChar));
+    register char *p = (char*)ICU_PTR(str);
+    register int    key = 0;
+    while (len--) {
+	key += *p++;
+	key += (key << 10);
+	key ^= (key >> 6);
+    }
+    key += (key << 3);
+    key ^= (key >> 11);
+    key += (key << 15);
+    return key;
+}
+/**
+ * call-seq:
+ *    str.hash   => fixnum
+ *
+ * Return a hash based on the string's length and content.
+ */
+VALUE
+icu_ustr_hash_m(str)
+     VALUE           str;
+{
+    int             key = icu_ustr_hash(str);
+    return INT2FIX(key);
+}
+VALUE
+icu_ustr_dup(str)
+     VALUE           str;
+{
+    VALUE           dup = icu_ustr_new(ICU_PTR(str), ICU_LEN(str));
+    return dup;
+}
+/**
+ *  call-seq:
+ *     str.upcase!(locale = "")   => str or nil
+ *
+ *  Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
+ *  were made. This method is locale-sensitive.
+ */
+VALUE
+icu_ustr_upcase_bang(argc, argv, str)
+     int argc;
+     VALUE * argv;
+     VALUE           str;
+{
+    UErrorCode      error = 0;
+    UChar          *buf = 0;
+    long            len ;
+    VALUE           loc;
+    char *	    locale = NULL;
+    icu_check_frozen(1, str);
+    buf = ALLOC_N(UChar, ICU_LEN(str) + 1);
+    if (rb_scan_args(argc, argv, "01", &loc) == 1) {
+       if( loc != Qnil) {
+         Check_Type(loc, T_STRING);
+	 locale = RSTRING_PTR(loc);
+       }
+    }
+    len = u_strToUpper(buf, ICU_LEN(str), ICU_PTR(str), ICU_LEN(str), locale, &error);
+    if (U_BUFFER_OVERFLOW_ERROR == error) {
+	REALLOC_N(buf, UChar, len + 1);
+	error = 0;
+	len =
+	    u_strToUpper(buf, len, ICU_PTR(str), ICU_LEN(str), locale, &error);
+    }
+    if (0 == u_strncmp(buf, ICU_PTR(str), len))
+	return Qnil;
+    free(ICU_PTR(str));
+    ICU_PTR(str) = buf;
+    ICU_LEN(str) = len;
+    return str;
+}
+/**
+ *  call-seq:
+ *     str.upcase(locale = "")   => new_str
+ *
+ *  Returns a copy of <i>str</i> with all lowercase letters replaced with their
+ *  uppercase counterparts. The operation is locale sensitive.
+ *
+ *     "hEllO".u.upcase   #=> "HELLO"
+ */
+VALUE
+icu_ustr_upcase(argc, argv, str)
+     int argc;
+     VALUE * argv;
+     VALUE           str;
+{
+    str = icu_ustr_dup(str);
+    icu_ustr_upcase_bang(argc, argv, str);
+    return str;
+}
+/**
+ *  call-seq:
+ *     str.downcase!(locale = "")   => str or nil
+ *
+ *  Downcases the contents of <i>str</i>, returning <code>nil</code> if no
+ *  changes were made.
+ */
+VALUE
+icu_ustr_downcase_bang(argc, argv, str)
+     int argc;
+     VALUE * argv;
+     VALUE           str;
+{
+    UErrorCode      error = 0;
+    UChar          *buf;
+    long            len ;
+    VALUE           loc;
+    char *	    locale = NULL;
+    buf = ALLOC_N(UChar, ICU_LEN(str) + 1);
+    icu_check_frozen(1, str);
+    if (rb_scan_args(argc, argv, "01", &loc) == 1) {
+       if( loc != Qnil) {
+         Check_Type(loc, T_STRING);
+	 locale = RSTRING_PTR(loc);
+       }
+    }
+    len =
+	u_strToLower(buf, ICU_LEN(str), ICU_PTR(str), ICU_LEN(str), locale,
+		     &error);
+    if (U_BUFFER_OVERFLOW_ERROR == error) {
+	REALLOC_N(buf, UChar, len + 1);
+	error = 0;
+	len =
+	    u_strToLower(buf, len , ICU_PTR(str), ICU_LEN(str), locale,
+			 &error);
+    }
+    if (0 == u_strncmp(buf, ICU_PTR(str), len))
+	return Qnil;
+    free(ICU_PTR(str));
+    ICU_PTR(str) = buf;
+    ICU_LEN(str) = len;
+    return str;
+}
+/**
+ *  call-seq:
+ *     str.downcase(locale = "")   => new_str
+ *
+ *  Returns a copy of <i>str</i> with all uppercase letters replaced with their
+ *  lowercase counterparts. The operation is locale sensitive.
+ *
+ *     "hEllO".u.downcase   #=> "hello"
+ */
+VALUE
+icu_ustr_downcase(argc, argv, str)
+     int argc;
+     VALUE * argv;
+     VALUE           str;
+{
+    str = icu_ustr_dup(str);
+    icu_ustr_downcase_bang(argc, argv, str);
+    return str;
+}
+/**
+ * call-seq:
+ *     str.foldcase
+ *
+ * Case-fold the characters in a string.
+ * Case-folding is locale-independent and not context-sensitive.
+ *
+ */
+VALUE
+icu_ustr_foldcase(str)
+     VALUE           str;
+{
+    UErrorCode      error = 0;
+    UChar          *buf;
+    long            len, capa ;
+    capa = ICU_LEN(str) + 1;
+    buf = ALLOC_N(UChar, capa);
+    len = u_strFoldCase(buf, capa-1, ICU_PTR(str), ICU_LEN(str), U_FOLD_CASE_DEFAULT,   &error);
+    if (U_BUFFER_OVERFLOW_ERROR == error) {
+        capa = len + 1;
+	REALLOC_N(buf, UChar, len + 1);
+	error = 0;
+	len =  u_strFoldCase(buf, capa, ICU_PTR(str), ICU_LEN(str), U_FOLD_CASE_DEFAULT, &error);
+    }
+    return icu_ustr_new_set(buf, len, capa) ;
+}
+static long
+icu_ustr_index(str, sub, offset)
+     VALUE           str,
+                     sub;
+     long            offset;
+{
+    long            pos;
+    UChar          *found;
+    if (offset < 0) {
+	offset += ICU_LEN(str);
+	if (offset < 0)
+	    return -1;
+    }
+    if (ICU_LEN(str) - offset < ICU_LEN(sub))
+	return -1;
+    if (ICU_LEN(sub) == 0)
+	return offset;
+    found =
+	u_strFindFirst(ICU_PTR(str) + offset, ICU_LEN(str) - offset,
+		       ICU_PTR(sub), ICU_LEN(sub));
+    if (NULL == found)
+	return -1;
+    pos = found - (ICU_PTR(str) + offset);
+    return pos + offset;
+}
+/**
+ *  call-seq:
+ *     str.index(substring [, offset])   => fixnum or nil
+ *     str.index(regexp [, offset])      => fixnum or nil
+ *
+ *  Returns the index of the first occurrence of the given <i>substring</i>,
+ *  or pattern (<i>regexp</i>) in <i>str</i>. Returns
+ *  <code>nil</code> if not found. If the second parameter is present, it
+ *  specifies the position in the string to begin the search.
+ *
+ *     "hello".u.index('e'.u)             #=> 1
+ *     "hello".u.index('lo'.u)            #=> 3
+ *     "hello".u.index('a'.u)             #=> nil
+ *     "hello".u.index(/[aeiou]/.U, -3)   #=> 4
+ */
+VALUE
+icu_ustr_index_m(argc, argv, str)
+     int             argc;
+     VALUE          *argv;
+     VALUE           str;
+{
+    VALUE           sub;
+    VALUE           initpos;
+    long            pos ;
+    int             processed = 0;
+    if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
+	pos = NUM2LONG(initpos);
+    } else {
+	pos = 0;
+    }
+    if (pos < 0) {
+	pos += ICU_LEN(str);
+    }
+    if( CLASS_OF(sub) ==  rb_cUString) {
+      	pos = icu_ustr_index(str, sub, pos);
+    	processed = 1;
+    }
+    if( CLASS_OF(sub) == rb_cURegexp) {
+       	pos = icu_reg_search(sub, str, pos, 0);
+    	processed = 1;
+    }
+    if(! processed ) {
+    	rb_raise(rb_eTypeError, "Wrong Type, expected UString or URegexp, got %s", rb_class2name(CLASS_OF(sub)));
+    }
+    if (pos == -1)
+	return Qnil;
+    return LONG2NUM(pos);
+}
+static long
+icu_ustr_rindex(str, sub, pos)
+     VALUE           str,
+                     sub;
+     long            pos;
+{
+    long            len = ICU_LEN(sub);
+    UChar          *found;
+    /*
+     * substring longer than string
+     */
+    if (ICU_LEN(str) < len)
+	return -1;
+    if (ICU_LEN(str) - pos < len) {
+	pos = ICU_LEN(str) - len;
+    }
+    found = u_strFindLast(ICU_PTR(str), pos, ICU_PTR(sub), ICU_LEN(sub));
+    if (NULL == found)
+	return -1;
+    pos = found - (ICU_PTR(str));
+    return pos;
+}
+/**
+ *  call-seq:
+ *     str.rindex(substring [, fixnum])   => fixnum or nil
+ *     str.rindex(regexp [, fixnum])   => fixnum or nil
+ *
+ *  Returns the index of the last occurrence of the given <i>substring</i>,
+ *  or pattern (<i>regexp</i>) in <i>str</i>. Returns  <code>nil</code> if not
+ *  found. If the second parameter is present, it  specifies the position in the
+ *  string to end the search---characters beyond  this point will not be considered.
+ *
+ *     "hello".u.rindex('e')             #=> 1
+ *     "hello".u.rindex('l')             #=> 3
+ *     "hello".u.rindex('a')             #=> nil
+ *     "hello".u.rindex(/[aeiou]/.U, -2)   #=> 1
+ */
+VALUE
+icu_ustr_rindex_m(argc, argv, str)
+     int             argc;
+     VALUE          *argv;
+     VALUE           str;
+{
+    VALUE           sub;
+    VALUE           position;
+    long            pos;
+    if (rb_scan_args(argc, argv, "11", &sub, &position) == 2) {
+	pos = NUM2LONG(position);
+	if (pos < 0) {
+	    pos += ICU_LEN(str);
+	    if (pos < 0) {
+		return Qnil;
+	    }
+	}
+	if (pos > ICU_LEN(str))
+	    pos = ICU_LEN(str);
+    } else {
+	pos = ICU_LEN(str);
+    }
+    switch (TYPE(sub)) {
+    case T_DATA:
+	if (CLASS_OF(sub) == rb_cUString) {
+	    pos = icu_ustr_rindex(str, sub, pos);
+	    if (pos >= 0)
+		return LONG2NUM(pos);
+	    break;
+	}
+	if (CLASS_OF(sub) == rb_cURegexp) {
+	    pos = icu_reg_search(sub, str, pos, 1);
+	    if (pos >= 0)
+		return LONG2NUM(pos);
+	    break;
+	}
+    default:
+	rb_raise(rb_eTypeError, "type mismatch: %s given",
+		 rb_obj_classname(sub));
+    }
+    return Qnil;
+}
+/**
+ *  call-seq:
+ *     str.lstrip!   => self or nil
+ *
+ *  Removes leading whitespace from <i>str</i>, returning <code>nil</code> if no
+ *  change was made. See also <code>UString#rstrip!</code> and
+ *  <code>UString#strip!</code>, in all these methods whitespace is an
+ *  Unicode char that has White_Space property.
+ *
+ *     "  hello  ".u.lstrip   #=> "hello  "
+ *     "hello".u.lstrip!      #=> nil
+ */
+VALUE
+icu_ustr_lstrip_bang(str)
+     VALUE           str;
+{
+    UChar          *s;
+    int32_t         i,
+                    n,
+                    c;
+   icu_check_frozen(1, str);
+    s = ICU_PTR(str);
+    n = ICU_LEN(str);
+    if (!s || n == 0)
+	return Qnil;
+    /*
+     * remove spaces at head
+     */
+    i = 0;
+    U16_GET(s, 0, i, n, c); /* care about surrogates */
+    while (i < n && u_isUWhiteSpace(c)) {
+        U16_NEXT(s, i, n, c); /* care surr */
+    }
+    if (i > 0) {
+        if(! u_isUWhiteSpace(c)) --i;
+	ICU_LEN(str) = n - i;
+	u_memmove(ICU_PTR(str), s + i, ICU_LEN(str));
+	ICU_PTR(str)[ICU_LEN(str)] = 0;
+	return str;
+    }
+    return Qnil;
+}
+/**
+ *  call-seq:
+ *     str.lstrip   => new_str
+ *
+ *  Returns a copy of <i>str</i> with leading whitespace removed. See also
+ *  <code>UString#rstrip</code> and <code>UString#strip</code>.
+ *
+ *     "  hello  ".u.lstrip   #=> "hello  "
+ *     "hello".u.lstrip       #=> "hello"
+ */
+VALUE
+icu_ustr_lstrip(str)
+     VALUE           str;
+{
+    str = icu_ustr_dup(str);
+    icu_ustr_lstrip_bang(str);
+    return str;
+}
+/**
+ *  call-seq:
+ *     str.rstrip!   => self or nil
+ *
+ *  Removes trailing whitespace from <i>str</i>, returning <code>nil</code> if
+ *  no change was made. See also <code>UString#lstrip!</code> and
+ *  <code>UString#strip!</code>.
+ *
+ *     "  hello  ".u.rstrip   #=> "  hello"
+ *     "hello".u.rstrip!      #=> nil
+ */
+VALUE
+icu_ustr_rstrip_bang(str)
+     VALUE           str;
+{
+    UChar          *s;
+    int32_t         i,
+                    n,
+                    c;
+   icu_check_frozen(1, str);
+    s = ICU_PTR(str);
+    n = ICU_LEN(str);
+    if (!s || n == 0)
+	return Qnil;
+    i = n - 1;
+    U16_GET(s, 0, n - 1, n, c); /* care surrogates */
+    i = n;
+    /*
+     * remove trailing spaces
+     */
+    while (i > 0 && u_isUWhiteSpace(c)) {
+        U16_PREV(s, 0, i, c); /* care surrogates */
+    }
+    if (i < n) {
+	if(! u_isUWhiteSpace(c)) ++i;
+	ICU_LEN(str) = i;
+	ICU_PTR(str)[i] = 0;
+	return str;
+    }
+    return Qnil;
+}
+/**
+ *  call-seq:
+ *     str.rstrip   => new_str
+ *
+ *  Returns a copy of <i>str</i> with trailing whitespace removed. See also
+ *  <code>UString#lstrip</code> and <code>UString#strip</code>.
+ *
+ *     "  hello  ".u.rstrip   #=> "  hello"
+ *     "hello".u.rstrip       #=> "hello"
+ */
+VALUE
+icu_ustr_rstrip(str)
+     VALUE           str;
+{
+    str = icu_ustr_dup(str);
+    icu_ustr_rstrip_bang(str);
+    return str;
+}
+/**
+ *  call-seq:
+ *     str.strip!   => str or nil
+ *
+ *  Removes leading and trailing whitespace from <i>str</i>. Returns
+ *  <code>nil</code> if <i>str</i> was not altered.
+ */
+VALUE
+icu_ustr_strip_bang(str)
+     VALUE           str;
+{
+    VALUE           l = icu_ustr_lstrip_bang(str);
+    VALUE           r = icu_ustr_rstrip_bang(str);
+    if (NIL_P(l) && NIL_P(r))
+	return Qnil;
+    return str;
+}
+/**
+ *  call-seq:
+ *     str.strip   => new_str
+ *
+ *  Returns a copy of <i>str</i> with leading and trailing whitespace removed.
+ *
+ *     "    hello    ".u.strip   #=> "hello"
+ *     "\tgoodbye\r\n".u.strip   #=> "goodbye"
+ */
+VALUE
+icu_ustr_strip(str)
+     VALUE           str;
+{
+    str = icu_ustr_dup(str);
+    icu_ustr_strip_bang(str);
+    return str;
+}
+/* ----------------------------------- */
+VALUE
+icu_ustr_normalize(str, mode)
+     VALUE           str;
+     int32_t         mode;
+{
+    UErrorCode      error = U_ZERO_ERROR;
+    long            capa = ICU_LEN(str)+20;
+    UChar          *buf;
+    long 	needed;
+    VALUE ret;
+    if (UNORM_YES == unorm_quickCheck(ICU_PTR(str), ICU_LEN(str), mode, &error))
+	    return icu_ustr_dup(str);
+    buf = ALLOC_N(UChar, capa );
+    do {
+	error = 0;
+	 needed =
+	    unorm_normalize(ICU_PTR(str), ICU_LEN(str), mode, 0, buf, capa,
+			    &error);
+	if (U_SUCCESS(error)) {
+	    ret = icu_ustr_new_set(buf, needed, capa);
+	    return ret;
+	}
+	if (error == U_BUFFER_OVERFLOW_ERROR) {
+	    capa = needed + 1;
+	    REALLOC_N(buf, UChar, capa);
+	    if (!buf)
+		rb_raise(rb_eRuntimeError, "can't allocate memory");
+	} else
+	    rb_raise(rb_eArgError, u_errorName(error));
+    }
+    while (1);
+}
+/**
+ * UNORM_NFKC Compatibility decomposition followed by canonical
+ * composition.
+ */
+VALUE
+icu_ustr_normalize_KC(str)
+     VALUE           str;
+{
+    return icu_ustr_normalize(str, UNORM_NFKC);
+}
+/**
+ * UNORM_NFKD Compatibility decomposition.
+ */
+VALUE
+icu_ustr_normalize_KD(str)
+     VALUE           str;
+{
+    return icu_ustr_normalize(str, UNORM_NFKD);
+}
+/**
+ * UNORM_NFD Canonical decomposition.
+ */
+VALUE
+icu_ustr_normalize_D(str)
+     VALUE           str;
+{
+    return icu_ustr_normalize(str, UNORM_NFD);
+}
+/**
+ * UNORM_FCD
+ */
+VALUE
+icu_ustr_normalize_FCD(VALUE str)
+{
+    return icu_ustr_normalize(str, UNORM_FCD);
+}
+/**
+ * UNORM_NFC Canonical decomposition followed by canonical composition.
+ */
+VALUE
+icu_ustr_normalize_C(str)
+     VALUE           str;
+{
+    return icu_ustr_normalize(str, UNORM_NFC);
+}
+VALUE my_ubrk_close(UBreakIterator ** boundary, VALUE errorinfo)
+{
+	ubrk_close(*boundary);
+	*boundary = NULL;
+	rb_raise(rb_eRuntimeError, "Unhandled exception: %s", rb_obj_classname(errorinfo));
+	return Qnil;
+}
+/* UBRK_CHARACTER, UBRK_WORD, UBRK_LINE, UBRK_SENTENCE */
+VALUE
+icu_ustr_each_mode(argc, argv, str, mode)
+     int             argc;
+     VALUE          *argv;
+     VALUE           str;
+     int32_t         mode;
+{
+    UErrorCode      error = 0;
+    UBreakIterator *boundary;
+    int32_t         end, start;
+    VALUE           loc ;
+    VALUE           temp;
+    char           *locale = "";
+    if( rb_scan_args(argc, argv, "01", &loc) == 1) {
+        Check_Type(loc, T_STRING);
+	locale = RSTRING_PTR(loc);
+    }
+    boundary =
+	ubrk_open(mode, locale, ICU_PTR(str), ICU_LEN(str),
+		  &error);
+    if (U_FAILURE(error))
+	rb_raise(rb_eArgError, "Error %s", u_errorName(error));
+     start = ubrk_first(boundary);
+    ++(USTRING(str)->busy);
+    for (end = ubrk_next(boundary); end != UBRK_DONE; start = end, end = ubrk_next(boundary)) {
+	temp = icu_ustr_new(ICU_PTR(str) + start, end - start);
+	rb_rescue(rb_yield, (VALUE)temp, my_ubrk_close, (VALUE)&boundary);
+    }
+    --(USTRING(str)->busy);
+    ubrk_close(boundary);
+    return str;
+}
+/**
+ *  call-seq:
+ *     str.each_word(locale = "")       {|substr| block }        => str
+ *
+ * Word boundary analysis is used by search and replace functions, as well as within text editing
+ * applications that allow the user to select words with a double click. Word selection provides
+ * correct interpretation of punctuation marks within and following words. Characters that are not
+ * part of a word, such as symbols or punctuation marks, have word-breaks on both sides.
+ *
+ */
+VALUE
+icu_ustr_each_word(argc, argv, str)
+     int             argc;
+     VALUE          *argv;
+     VALUE           str;
+{
+    return icu_ustr_each_mode(argc, argv, str,  UBRK_WORD);
+}
+/**
+ *  call-seq:
+ *     str.each_char(locale = "")       {|substr| block }        => str
+ *
+ * Character boundary analysis allows users to interact with characters as they expect to,
+ * for example, when moving the cursor through a text string. Character boundary analysis provides
+ * correct navigation of through character strings, regardless of how the character is stored.
+ * For example, an accented character might be stored as a base character and a diacritical mark.
+ * What users consider to be a character can differ between languages.
+ *
+ */
+VALUE
+icu_ustr_each_char(argc, argv, str)
+     int             argc;
+     VALUE          *argv;
+     VALUE           str;
+{
+    return icu_ustr_each_mode(argc, argv, str, UBRK_CHARACTER);
+}
+/**
+ *  call-seq:
+ *     str.each_line_break(locale = "") {|substr| block }        => str
+ *
+ * Line boundary analysis determines where a text string can be broken when line-wrapping.
+ * The mechanism correctly handles punctuation and hyphenated words.
+ *
+ */
+VALUE
+icu_ustr_each_line(argc, argv, str)
+     int             argc;
+     VALUE          *argv;
+     VALUE           str;
+{
+    return icu_ustr_each_mode(argc, argv, str,  UBRK_LINE);
+}
+/**
+ *  call-seq:
+ *     str.each_sentence(locale = "")   {|substr| block }        => str
+ *
+ * Sentence boundary analysis allows selection with correct interpretation of periods
+ * within numbers and abbreviations, and trailing punctuation marks such as quotation marks and parentheses.
+ *
+ */
+VALUE
+icu_ustr_each_sentence(argc, argv, str)
+     int             argc;
+     VALUE          *argv;
+     VALUE           str;
+{
+    return icu_ustr_each_mode(argc, argv, str,  UBRK_SENTENCE);
+}
+/**
+ * call-seq:
+ *    str.to_u(encoding = 'utf8') => UString
+ *
+ * Returns self.
+ */
+VALUE
+icu_ustr_to_ustr(argc, argv, str)
+     int             argc;
+     VALUE          *argv;
+     VALUE           str;
+{
+    return str;
+}
+/**
+ * call-seq:
+ *    str.to_s(encoding = 'utf8') => String
+ *
+ * Converts to Ruby String (byte-oriented) value in  given encoding.
+ * When no encoding is given, assumes UTF-8.
+ */
+VALUE
+icu_ustr_to_rstr(argc, argv, str)
+     int             argc;
+     VALUE          *argv,
+                     str;
+{
+    VALUE           enc;
+    char           *encoding = 0;	/* default */
+    UErrorCode      error = 0;
+    UConverter     *conv ;
+    int enclen, needed = 0;
+    char * buf;
+    VALUE s;
+    if (rb_scan_args(argc, argv, "01", &enc) == 1) {
+	Check_Type(enc, T_STRING);
+	encoding = RSTRING_PTR(enc);
+    }
+    enclen = ICU_LEN(str) + 1;
+    buf = ALLOC_N(char, enclen);
+    if( !encoding || !strncmp(encoding, "utf8", 4)){
+	  u_strToUTF8( buf, enclen, &needed, ICU_PTR(str), ICU_LEN(str), &error);
+          if (U_BUFFER_OVERFLOW_ERROR == error) {
+        	REALLOC_N(buf, char, needed + 1);
+        	error = 0;
+        	u_strToUTF8( buf, needed, &needed, ICU_PTR(str), ICU_LEN(str), &error);
+         }
+	 if( U_FAILURE(error) ){
+	       free(buf);
+	       rb_raise(rb_eArgError, u_errorName(error));
+	 }
+         s = rb_str_new(buf, needed);
+    } else {
+            conv = ucnv_open(encoding, &error);
+            if (U_FAILURE(error)) {
+        	ucnv_close(conv);
+		free(buf);
+        	rb_raise(rb_eArgError, u_errorName(error));
+            }
+            enclen =
+        	ucnv_fromUChars(conv, buf, enclen, ICU_PTR(str), ICU_LEN(str),
+        			&error);
+            if (U_BUFFER_OVERFLOW_ERROR == error) {
+        	REALLOC_N(buf, char, enclen + 1);
+        	error = 0;
+        	ucnv_fromUChars(conv, buf, enclen, ICU_PTR(str), ICU_LEN(str),
+        			&error);
+            }
+	    if( U_FAILURE(error) ){
+	       free(buf);
+	       rb_raise(rb_eArgError, u_errorName(error));
+	    }
+            s = rb_str_new(buf, enclen);
+	    ucnv_close(conv);
+    }
+    free(buf);
+    return s;
+}
+/* -------------- */
+extern VALUE    icu_format(UChar * pattern, int32_t len, VALUE args,
+			   int32_t arg_len, char *locale);
+/**
+ * call-seq:
+ *     str.format(locale, [*args])
+ *
+ * Powerful locale-sensitive message formatting. see [./docs/FORMATTING]
+ *
+ * Valid argument types are: +Fixnum+, +UString+, +Float+, +Time+ .
+ *
+ * */
+VALUE
+icu_ustr_format(str, args)
+     VALUE           str,
+                     args;
+{
+    VALUE           loc;
+    Check_Type(args, T_ARRAY);
+    loc = rb_ary_shift(args);
+    Check_Type(loc, T_STRING);
+    return icu_format(ICU_PTR(str), ICU_LEN(str), args, RARRAY_LEN(args),
+		      RSTRING_PTR(loc));
+}
+/* ------ UString regexp related functions ---- */
+/**
+ *  call-seq:
+ *     str =~ uregexp         => UMatch or nil
+ *     str =~ other_str       => integer or nil
+ *
+ *  Match---If <code>URegexp</code> is given, use it as a pattern to
+ *  match against <i>uregexp</i> and return UMatch or +nil+.
+ *
+ *  If <code>UString</code> is given, returns index of it
+ *  (similar to <code>UString#index</code>).
+ *
+ *  Otherwise returns +nil+
+ *
+ *     "cat o' 9 tails".u =~ '\d'    #=> nil
+ *     "cat o' 9 tails".u =~ /\d/.U  #=> #<UMatch:0xf6fb7d5c @cg=[<U000039>]>
+ *     "cat o' 9 tails".u =~ 9       #=> false
+ *     "cat o' 9 tails".u =~ '9'.u   #=> 7
+ */
+VALUE
+icu_ustr_match(x, y)
+     VALUE           x,
+                     y;
+{
+    long pos ;
+    if (TYPE(y) == T_REGEXP){
+      rb_raise(rb_eTypeError, "Wrong type: can't match against Regexp. Use URegexp instead");
+    }
+    if (CLASS_OF(y) == rb_cURegexp) {
+	return icu_reg_match(y, x);
+    } else if (CLASS_OF(y) == rb_cUString) {
+	pos =  icu_ustr_index(x, y, 0);
+	if (pos == -1) return Qnil;
+	else return LONG2NUM(pos);
+    } else {
+	return Qnil;
+    }
+}
+VALUE
+get_pat(pat, quote)
+     VALUE           pat;
+     int             quote;
+{
+    if (CLASS_OF(pat) == rb_cURegexp)
+	return pat;
+    if (CLASS_OF(pat) == rb_cUString)
+	return icu_reg_comp(pat);
+    Check_Class(pat, rb_cURegexp);
+    return Qnil;
+}
+/**
+ *  call-seq:
+ *     str.match(pattern)   => matchdata or nil
+ *
+ *  Converts <i>pattern</i> to a <code>URegexp</code> (if it isn't already one),
+ *  then invokes its <code>match</code> method on <i>str</i>.
+ *
+ *     'hello'.u.match('(.)\1'.u)      #=> #<UMatch:0x401b3d30>
+ *     'hello'.u.match('(.)\1'.u)[0]   #=> "ll"
+ *     'hello'.u.match(/(.)\1/.U)[0]   #=> "ll"
+ *     'hello'.u.match('xx')         #=> nil
+ */
+VALUE
+icu_ustr_match_m(str, re)
+     VALUE           str,
+                     re;
+{
+    return rb_funcall(get_pat(re, 0), rb_intern("match"), 1, str);
+}
+VALUE
+ustr_scan_once(str, pat, start)
+     VALUE           str,
+                     pat;
+     long           *start;
+{
+    VALUE           result;
+    long            i;
+    long            beg,
+                    end, num_regs;
+    if (icu_reg_search(pat, str, *start, 0) >= 0) {
+	icu_reg_range(pat, 0, &beg, &end);
+	if (beg == end) {
+	    *start = end + 1;
+	} else {
+	    *start = end;
+	}
+	num_regs = icu_group_count(pat);
+	if (num_regs <= 1) {
+	    return icu_reg_nth_match(pat, 0);
+	}
+	result = rb_ary_new2(num_regs);
+	for (i = 1; i <= num_regs; i++) {
+	    rb_ary_store(result, i - 1, icu_reg_nth_match(pat, i));
+	}
+	return result;
+    }
+    return Qnil;
+}
+/**
+ *  call-seq:
+ *     str.scan(pattern)                         => array
+ *     str.scan(pattern) {|match, ...| block }   => str
+ *
+ *  Both forms iterate through <i>str</i>, matching the pattern (which may be a
+ *  <code>URegexp</code> or a <code>UString</code>). For each match, a result is
+ *  generated and either added to the result array or passed to the block. If
+ *  the pattern contains no groups, each individual result consists of the
+ *  matched string.  If the pattern contains groups, each
+ *  individual result is itself an array containing one entry per group.
+ *
+ *     a = "cruel world".u
+ *     a.scan(/\w+/.U)        #=> ["cruel", "world"]
+ *     a.scan(/.../.U)        #=> ["cru", "el ", "wor"]
+ *     a.scan(/(...)/.U)      #=> [["cru"], ["el "], ["wor"]]
+ *     a.scan(/(..)(..)/.U)   #=> [["cr", "ue"], ["l ", "wo"]]
+ *
+ *  And the block form:
+ *
+ *     a.scan(/\w+/.U) {|w| print "<<#{w}>> " }
+ *     print "\n"
+ *     a.scan(/(.)(.)/.U) {|a,b| print b, a }
+ *     print "\n"
+ *
+ *  <em>produces:</em>
+ *
+ *     <<cruel>> <<world>>
+ *     rceu lowlr
+ */
+VALUE
+icu_ustr_scan(str, pat)
+     VALUE           str,
+                     pat;
+{
+    VALUE           result;
+    long            start = 0;
+    pat = get_pat(pat, 1);
+    if (!rb_block_given_p()) {
+	VALUE           ary = rb_ary_new();
+	while (!NIL_P(result = ustr_scan_once(str, pat, &start))) {
+	    rb_ary_push(ary, result);
+	}
+	return ary;
+    }
+    ++(USTRING(str)->busy);
+    while (!NIL_P(result = ustr_scan_once(str, pat, &start))) {
+	rb_yield(result);
+    }
+    --(USTRING(str)->busy);
+    return str;
+}
+/**
+ * call-seq:
+ *     str.char_span(start[, len, [locale]])
+ *
+ * Returns substring starting at <code>start</code>-th char, with <code>len</code> chars length.
+ * Here "char" means "grapheme cluster", so start index and len are measured in terms of "graphemes"
+ * locale parameter is optional.
+ * Negative len can be supplied to receive to end of string.
+ *
+ * String is transformed to NFC before extract.
+ */
+VALUE
+icu_ustr_char_span(int argc, VALUE * argv, VALUE str)
+{
+    UErrorCode      error = 0;
+    int32_t         end, start, char_start = 0, char_len = -1, total_chars = 0;
+    int32_t         init_pos = -1, end_pos = -1, n;
+    char 	    *loc = NULL;
+    VALUE 	    cs, clen, locl, out;
+    UBreakIterator *boundary;
+    n = rb_scan_args(argc, argv, "12", &cs, &clen, &locl);
+    Check_Type(cs, T_FIXNUM);
+    char_start = FIX2INT(cs);
+    if(char_start < 0) rb_raise(rb_eArgError, "Negative offset aren't allowed!");
+    if( n > 1) {
+    	Check_Type(clen, T_FIXNUM);
+	char_len = FIX2INT(clen);
+	if(char_len <= 0) char_len = -1;
+    }
+    if( n > 2) {
+    	Check_Type(locl, T_STRING);
+	loc = RSTRING_PTR(locl);
+    }
+    if(UNORM_YES != unorm_quickCheck(ICU_PTR(str), ICU_LEN(str), UNORM_NFC, &error) )
+	    str = icu_ustr_normalize_C(str);
+    boundary  =
+	ubrk_open(UBRK_CHARACTER, loc, ICU_PTR(str), ICU_LEN(str), &error);
+    if (U_FAILURE(error))
+	rb_raise(rb_eArgError, "Error %s", u_errorName(error));
+    start = ubrk_first(boundary);
+    for (end = ubrk_next(boundary); end != UBRK_DONE;
+	 start = end, end = ubrk_next(boundary)) {
+	if( total_chars == char_start ) init_pos = start;
+	total_chars ++;
+	if( char_len>0 && total_chars == char_start+char_len) end_pos = end;
+    }
+    ubrk_close(boundary);
+    if( init_pos == -1) rb_raise(rb_eArgError, "Char index %d out of bounds %d", char_start, total_chars);
+    if( end_pos  == -1) end_pos = ICU_LEN(str); /* reached end of string */
+    out = icu_ustr_new(ICU_PTR(str)+init_pos, end_pos - init_pos);
+    return out;
+}
+VALUE
+icu_ustr_chars(str, loc)
+     VALUE           str;
+     char           *loc;
+{
+    UErrorCode      error = 0;
+    int32_t         end, start;
+    VALUE out;
+    UBreakIterator *boundary;
+    if(UNORM_YES != unorm_quickCheck(ICU_PTR(str), ICU_LEN(str), UNORM_NFC, &error) )
+	    str = icu_ustr_normalize_C(str);
+    boundary  =
+	ubrk_open(UBRK_CHARACTER, loc, ICU_PTR(str), ICU_LEN(str), &error);
+    if (U_FAILURE(error))
+	rb_raise(rb_eArgError, "Error %s", u_errorName(error));
+    out = rb_ary_new();
+    start = ubrk_first(boundary);
+    for (end = ubrk_next(boundary); end != UBRK_DONE;
+	 start = end, end = ubrk_next(boundary)) {
+	rb_ary_push(out, icu_ustr_new(ICU_PTR(str) + start, end - start));
+    }
+    ubrk_close(boundary);
+    return out;
+}
+/**
+ * call-seq:
+ *     str.chars(locale = "")  => array of character
+ *
+ * Returns array of character graphemes, locale dependent.
+ * String is transformed to NFC before split.
+ * */
+VALUE
+icu_ustr_chars_m(argc, argv, str)
+     int             argc;
+     VALUE          *argv;
+     VALUE           str;
+{
+    VALUE           locale;
+    if (rb_scan_args(argc, argv, "01", &locale) == 1) {
+	Check_Type(locale, T_STRING);
+	return icu_ustr_chars(str, RSTRING_PTR(locale));
+    } else {
+	return icu_ustr_chars(str, "");
+    }
+}
+/**
+ *  call-seq:
+ *     str.split(pattern, [limit])   => anArray
+ *
+ *  Divides <i>str</i> into substrings based on a delimiter, returning an array
+ *  of these substrings. <i>str</i> is divided where the
+ *  pattern matches.
+ *
+ *  NOTE: split(//) or split("") is not supported.
+ *  To get array of chars use #chars or #codepoints methods
+ *
+ *  If the <i>limit</i> parameter is omitted, trailing null fields are
+ *  suppressed. If <i>limit</i> is a positive number, at most that number of
+ *  fields will be returned (if <i>limit</i> is <code>1</code>, the entire
+ *  string is returned as the only entry in an array). If negative, there is no
+ *  limit to the number of fields returned, and trailing null fields are not
+ *  suppressed.
+ *
+ *  NOTE: there's a difference in ICU regexp split and Ruby Regexp actions:
+ *     "a,b,c,,".split(/,/, -1)  # => ["a", "b", "c", "", ""]
+ *     "a,b,c,,".u.split(ure(","), -1)  # => ["a", "b", "c", ""]
+ *  it seems to be by design, in icu/source/i18n/uregex.cpp uregex_split():
+ *          if (nextOutputStringStart == inputLen) {
+ *              // The delimiter was at the end of the string.  We're done.
+ *              break;
+ *          }
+ */
+VALUE
+icu_ustr_split_m(argc, argv, str)
+     int             argc;
+     VALUE          *argv;
+     VALUE           str;
+{
+    VALUE           spat;
+    VALUE           limit = Qnil;
+    int             lim = 0;
+    VALUE           result;
+    if (rb_scan_args(argc, argv, "11", &spat, &limit) == 2) {
+	lim = NUM2INT(limit);
+	if (lim <= 0)
+	    limit = Qnil;
+    }
+    if (CLASS_OF(spat) == rb_cURegexp) {
+	result = icu_reg_split(spat, str, limit);
+    } else {
+	if (CLASS_OF(spat) == rb_cUString) {
+	    result = icu_reg_split(icu_reg_comp(spat), str, limit);
+	} else {
+	    rb_raise(rb_eArgError, "Expected UString or URegexp, got %s",
+		     rb_class2name(CLASS_OF(spat)));
+	}
+    }
+    if (NIL_P(limit) && lim == 0) {
+	while (RARRAY_LEN(result) > 0 &&
+	       ICU_LEN( (RARRAY_PTR(result)[RARRAY_LEN(result) - 1])) == 0)
+	    rb_ary_pop(result);
+    }
+    return result;
+}
+/**
+ * call-seq:
+ *     str.inspect => String
+ *
+ * Shows codepoints in form of \uxxxx. For debug purposes.
+ */
+VALUE
+icu_ustr_inspect(str)
+     VALUE           str;
+{
+    VALUE           buf = rb_str_new2("");
+    char            temp[] = "\\u0010FFFF  ";
+    int32_t         i,
+                    n,
+		    k,
+                    c;
+    UChar          *s = ICU_PTR(str);
+    n = ICU_LEN(str);
+    i = 0;
+    while (i < n) {
+	U16_NEXT(s, i, n, c); /* care surrogate */
+	if(c >= 0x10000)
+		k = sprintf(temp, "\\u%08X", c);
+	else
+		k = sprintf(temp, "\\u%04X", c);
+	rb_str_cat(buf, temp, k);
+    }
+    return buf;
+}
+/**
+ * call-seq:
+ *     str.codepoints => array of fixnums
+ *
+ * Returns array of codepoints as fixnums.
+ */
+VALUE
+icu_ustr_points(str)
+     VALUE           str;
+{
+    VALUE           buf = rb_ary_new();
+    int32_t         i,
+                    n,
+                    c;
+    UChar          *s = ICU_PTR(str);
+    n = ICU_LEN(str);
+    i = 0;
+    while (i < n) {
+	U16_NEXT(s, i, n, c); /* care surrogates */
+	rb_ary_push(buf, LONG2NUM(c));
+    }
+    return buf;
+}
+/**
+ * call-seq:
+ *     str.inspect_names => String
+ *
+ * Dumps names of codepoints in this UString (debug).
+ */
+VALUE
+icu_ustr_inspect_names(str)
+     VALUE           str;
+{
+    VALUE           buf = rb_str_new2("");
+    char            temp[301];
+    UErrorCode      error;
+    int32_t         i,
+                    n,
+                    c,
+                    l;
+    UChar          *s = ICU_PTR(str);
+    n = ICU_LEN(str);
+    i = 0;
+    while (i < n) {
+	U16_NEXT(s, i, n, c) sprintf(temp, "<U%06X>", c); /* care surrogates */
+	rb_str_cat(buf, temp, 9);
+	error = 0;
+	l = u_charName(c, U_UNICODE_CHAR_NAME, temp, 300, &error);
+	rb_str_cat(buf, temp, l);
+	rb_str_cat(buf, "\n", 1);
+    }
+    return buf;
+}
+VALUE
+icu_ustr_subpat(str, re, nth)
+     VALUE           str,
+                     re;
+     int             nth;
+{
+    if (icu_reg_search(re, str, 0, 0) >= 0) {
+	return icu_reg_nth_match(re, nth);
+    }
+    return Qnil;
+}
+/* beg len are code unit indexes*/
+VALUE
+icu_ustr_substr(str, beg, len)
+     VALUE           str;
+     long            beg,
+                     len;
+{
+    int32_t         str_size;
+	str_size =  ICU_LEN(str);
+	if (len < 0) return Qnil;
+	if (beg > str_size) return Qnil;
+	if (beg < 0) {
+		beg += str_size;
+		if (beg < 0) return Qnil;
+	}
+	if (beg + len > str_size) {
+		len = str_size - beg;
+	}
+	if (len < 0) {
+		len = 0;
+	}
+	if( len == 0) return icu_ustr_new(0, 0);
+	/* adjust to codepoint boundaries */
+    	U16_SET_CP_START(ICU_PTR(str), 0, beg);
+	U16_SET_CP_LIMIT(ICU_PTR(str), 0, len, ICU_LEN(str));
+    	return icu_ustr_new(ICU_PTR(str) + beg,  len);
+}
+VALUE
+icu_ustr_aref(str, indx)
+     VALUE           str;
+     VALUE           indx;
+{
+    long            idx;
+    int32_t         cp_len = ICU_LEN(str);
+    switch (TYPE(indx)) {
+    case T_FIXNUM:
+	idx = FIX2LONG(indx);
+      num_index:
+	if (idx < 0) {
+	    idx = cp_len + idx;
+	}
+	if (idx < 0 || cp_len <= idx) {
+	    return Qnil;
+	}
+	return icu_ustr_substr(str, idx, 1);
+    case T_DATA:
+	if (CLASS_OF(indx) == rb_cURegexp)
+	    return icu_ustr_subpat(str, indx, 0);
+	if (CLASS_OF(indx) == rb_cUString) {
+	    if (icu_ustr_index(str, indx, 0) != -1)
+		return icu_ustr_dup(indx);
+	    return Qnil;
+	}
+    default:
+	/*
+	 * check if indx is Range
+	 */
+	{
+	    long            beg,
+	                    len;
+	    switch (rb_range_beg_len(indx, &beg, &len, cp_len, 0)) {
+	    case Qfalse:
+		break;
+	    case Qnil:
+		return Qnil;
+	    default:
+		return icu_ustr_substr(str, beg, len);
+	    }
+	}
+	idx = NUM2LONG(indx);
+	goto num_index;
+    }
+    return Qnil;		/* not reached */
+}
+/**
+ *  call-seq:
+ *     str[fixnum]                 => new_str or nil
+ *     str[fixnum, fixnum]         => new_str or nil
+ *     str[range]                  => new_str or nil
+ *     str[regexp]                 => new_str or nil
+ *     str[regexp, fixnum]         => new_str or nil
+ *     str[other_str]              => new_str or nil
+ *     str.slice(fixnum)           => new_str or nil
+ *     str.slice(fixnum, fixnum)   => new_str or nil
+ *     str.slice(range)            => new_str or nil
+ *     str.slice(regexp)           => new_str or nil
+ *     str.slice(regexp, fixnum)   => new_str or nil
+ *     str.slice(other_str)        => new_str or nil
+ *
+ *  Element Reference---If passed a single <code>Fixnum</code>, returns
+ *  substring with the character at that position. If passed two <code>Fixnum</code>
+ *  objects, returns a substring starting at the offset given by the first, and
+ *  a length given by the second. If given a range, a substring containing
+ *  characters at offsets given by the range is returned. In all three cases, if
+ *  an offset is negative, it is counted from the end of <i>str</i>. Returns
+ *  <code>nil</code> if the initial offset falls outside the string, the length
+ *  is negative, or the beginning of the range is greater than the end.
+ *
+ *  If a <code>URegexp</code> is supplied, the matching portion of <i>str</i> is
+ *  returned. If a numeric parameter follows the regular expression, that
+ *  component of the <code>UMatch</code> is returned instead. If a
+ *  <code>UString</code> is given, that string is returned if it occurs in
+ *  <i>str</i>. In both cases, <code>nil</code> is returned if there is no
+ *  match.
+ *
+ *     a = "hello there".u
+ *     a[1]                   #=> 'e'
+ *     a[1,3]                 #=> "ell"
+ *     a[1..3]                #=> "ell"
+ *     a[-3,2]                #=> "er"
+ *     a[-4..-2]              #=> "her"
+ *     a[12..-1]              #=> nil
+ *     a[-2..-4]              #=> ""
+ *     a[/[aeiou](.)\1/.U]      #=> "ell"
+ *     a[/[aeiou](.)\1/.U, 0]   #=> "ell"
+ *     a[/[aeiou](.)\1/.U, 1]   #=> "l"
+ *     a[/[aeiou](.)\1/.U, 2]   #=> nil
+ *     a["lo".u]                #=> "lo"
+ *     a["bye".u]               #=> nil
+ */
+VALUE
+icu_ustr_aref_m(argc, argv, str)
+     int             argc;
+     VALUE          *argv;
+     VALUE           str;
+{
+    if (argc == 2) {
+	if (CLASS_OF(argv[0]) == rb_cURegexp) {
+	    return icu_ustr_subpat(str, argv[0], NUM2INT(argv[1]));
+	}
+	return icu_ustr_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
+    }
+    if (argc != 1) {
+	rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)",
+		 argc);
+    }
+    return icu_ustr_aref(str, argv[0]);
+}
+/**
+ *  call-seq:
+ *     str.sub!(pattern, replacement)          => str or nil
+ *     str.sub!(pattern) {|match| block }      => str or nil
+ *
+ *  Performs the substitutions of <code>UString#sub</code> in place,
+ *  returning <i>str</i>, or <code>nil</code> if no substitutions were
+ *  performed.
+ */
+VALUE
+icu_ustr_sub_bang(argc, argv, str)
+     int             argc;
+     VALUE          *argv;
+     VALUE           str;
+{
+     return	ustr_gsub(argc, argv, str, 1, 1 );
+}
+/**
+ *  call-seq:
+ *     str.sub(pattern, replacement)         => new_str
+ *     str.sub(pattern) {|match| block }     => new_str
+ *
+ *  Returns a copy of <i>str</i> with the <em>first</em> occurrence of
+ *  <i>pattern</i> replaced with either <i>replacement</i> or the value of the
+ *  block. The <i>pattern</i> will typically be a <code>URegexp</code>; if it is
+ *  a <code>UString</code> then no regular expression metacharacters will be
+ *  interpreted (that is <code>/\d/.U</code> will match a digit, but
+ *  <code>'\d'</code> will match a backslash followed by a 'd').
+ *
+ *  The sequences <code>$1</code>,  <code>$2</code>, etc., may be used.
+ *
+ *  In the block form, the current UMatch object is passed in as a parameter.
+ *  The value returned by the block will be substituted for the match on each call.
+ *
+ *     "hello".u.sub(/[aeiou]/.U, '*'.u)               #=> "h*llo"
+ *     "hello".u.sub(/([aeiou])/.U, '<$1>'.u)          #=> "h<e>llo"
+ */
+VALUE
+icu_ustr_sub(argc, argv, str)
+     int             argc;
+     VALUE          *argv;
+     VALUE           str;
+{
+    str = icu_ustr_dup(str);
+    icu_ustr_sub_bang(argc, argv, str);
+    return str;
+}
+/**
+ * replace in string from +beg+ length +len+ (in code units)
+ */
+static void
+icu_ustr_splice(str, beg, len, val)
+     VALUE           str;
+     long            beg,
+                     len;
+     VALUE           val;
+{
+    long char_len;
+    Check_Class(val, rb_cUString);
+    if (val == str) {
+       val = icu_ustr_dup(str);
+    }
+    if (len < 0)
+    rb_raise(rb_eIndexError, "negative length %ld", len);
+    char_len = ICU_LEN(str);
+    if (char_len < beg) {
+      out_of_range:
+	rb_raise(rb_eIndexError, "index %ld out of string", beg);
+    }
+    if (beg < 0) {
+	if (-beg > char_len) {
+	    goto out_of_range;
+	}
+	beg += char_len;
+    }
+    if (char_len < beg + len) {
+	len = char_len - beg;
+    }
+    	/* adjust to codepoint boundaries */
+    	U16_SET_CP_START(ICU_PTR(str), 0, beg);
+	U16_SET_CP_LIMIT(ICU_PTR(str), 0, len, ICU_LEN(str));
+    ustr_splice_units(USTRING(str), beg, len, ICU_PTR(val), ICU_LEN(val));
+    OBJ_INFECT(str, val);
+}
+/**
+ *  call-seq:
+ *     str.insert(index, other_str)   => str
+ *
+ *  Inserts <i>other_str</i> before the character at the given
+ *  <i>index</i>, modifying <i>str</i>. Negative indices count from the
+ *  end of the string, and insert <em>after</em> the given character.
+ *  The intent is insert <i>other_str</i> so that it starts at the given
+ *  <i>index</i>.
+ *
+ *     "abcd".u.insert(0, 'X'.u)    #=> "Xabcd"
+ *     "abcd".u.insert(3, 'X'.u)    #=> "abcXd"
+ *     "abcd".u.insert(4, 'X'.u)    #=> "abcdX"
+ *     "abcd".u.insert(-3, 'X'.u)   #=> "abXcd"
+ *     "abcd".u.insert(-1, 'X'.u)   #=> "abcdX"
+ */
+VALUE
+icu_ustr_insert(str, idx, str2)
+     VALUE           str,
+                     idx,
+                     str2;
+{
+    long            pos = NUM2LONG(idx);
+    icu_check_frozen(1, str);
+    if (pos == -1) {
+	pos = NUM2LONG(icu_ustr_length(str));
+    } else if (pos < 0) {
+	pos++;
+    }
+    icu_ustr_splice(str, pos, 0, str2);
+    return str;
+}
+/**
+ *  call-seq:
+ *     str.include? other_str   => true or false
+ *
+ *  Returns <code>true</code> if <i>str</i> contains the given string
+ *
+ *     "hello".u.include? "lo".u   #=> true
+ *     "hello".u.include? "ol".u   #=> false
+ */
+VALUE
+icu_ustr_include(str, arg)
+     VALUE           str,
+                     arg;
+{
+    long            i;
+    i = icu_ustr_index(str, arg, 0);
+    if (i == -1)
+	return Qfalse;
+    return Qtrue;
+}
+static void
+icu_ustr_subpat_set(str, re, nth, val)
+     VALUE           str,
+                     re;
+     int             nth;
+     VALUE           val;
+{
+    long            start,
+                    end,
+                    len;
+    VALUE matched;
+    if (icu_reg_search(re, str, 0, 0) < 0) {
+	rb_raise(rb_eIndexError, "regexp not matched");
+    }
+    matched = icu_reg_range(re, nth, &start, &end);
+    if (NIL_P(matched)) {
+	rb_raise(rb_eIndexError, "regexp group %d not matched", nth);
+    }
+    len = end - start;
+	/* adjust to codepoint boundaries */
+    	U16_SET_CP_START(ICU_PTR(str), 0, start);
+	U16_SET_CP_LIMIT(ICU_PTR(str), 0, len, ICU_LEN(str));
+    ustr_splice_units(USTRING(str), start, len, ICU_PTR(val), ICU_LEN(val));
+}
+VALUE
+icu_ustr_aset(str, indx, val)
+     VALUE           str;
+     VALUE           indx,
+                     val;
+{
+    long            idx,
+                    beg;
+    long            char_len = ICU_LEN(str);
+    switch (TYPE(indx)) {
+    case T_FIXNUM:
+      num_index:
+	idx = FIX2LONG(indx);
+	if (char_len <= idx) {
+	  out_of_range:
+	    rb_raise(rb_eIndexError, "index %ld out of string", idx);
+	}
+	if (idx < 0) {
+	    if (-idx > char_len)
+		goto out_of_range;
+	    idx += char_len;
+	}
+	icu_ustr_splice(str, idx, 1, val);
+	return val;
+    case T_DATA:
+	if (CLASS_OF(indx) == rb_cURegexp) {
+	    icu_ustr_subpat_set(str, indx, 0, val);
+	    return val;
+	}
+	if (CLASS_OF(indx) == rb_cUString) {
+	    beg = icu_ustr_index(str, indx, 0);
+	    if (beg < 0) {
+		rb_raise(rb_eIndexError, "string not matched");
+	    }
+	    ustr_splice_units(USTRING(str), beg, ICU_LEN(indx), ICU_PTR(val), ICU_LEN(val));
+	    return val;
+	}
+    default:
+	/*
+	 * check if indx is Range
+	 */
+	{
+	    long            beg,
+	                    len;
+	    if (rb_range_beg_len(indx, &beg, &len, char_len, 2)) {
+		icu_ustr_splice(str, beg, len, val);
+		return val;
+	    }
+	}
+	idx = NUM2LONG(indx);
+	goto num_index;
+    }
+}
+/**
+ *  call-seq:
+ *     str[fixnum] = new_str
+ *     str[fixnum, fixnum] = new_str
+ *     str[range] = new_str
+ *     str[regexp] = new_str
+ *     str[regexp, fixnum] = new_str
+ *     str[other_str] = new_str
+ *
+ *  Element Assignment---Replaces some or all of the content of <i>str</i>. The
+ *  portion of the string affected is determined using the same criteria as
+ *  <code>UString#[]</code>. If the replacement string is not the same length as
+ *  the text it is replacing, the string will be adjusted accordingly. If the
+ *  regular expression or string is used as the index doesn't match a position
+ *  in the string, <code>IndexError</code> is raised. If the regular expression
+ *  form is used, the optional second <code>Fixnum</code> allows you to specify
+ *  which portion of the match to replace (effectively using the
+ *  <code>UMatch</code> indexing rules. The forms that take a
+ *  <code>Fixnum</code> will raise an <code>IndexError</code> if the value is
+ *  out of range; the <code>Range</code> form will raise a
+ *  <code>RangeError</code>, and the <code>URegexp</code> and <code>UString</code>
+ *  forms will silently ignore the assignment.
+ */
+VALUE
+icu_ustr_aset_m(argc, argv, str)
+     int             argc;
+     VALUE          *argv;
+     VALUE           str;
+{
+    icu_check_frozen(1, str);
+    if (argc == 3) {
+	if (CLASS_OF(argv[0]) == rb_cURegexp) {
+	    icu_ustr_subpat_set(str, argv[0], NUM2INT(argv[1]), argv[2]);
+	} else {
+	    icu_ustr_splice(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]),
+			    argv[2]);
+	}
+	return argv[2];
+    }
+    if (argc != 2) {
+	rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)",
+		 argc);
+    }
+    return icu_ustr_aset(str, argv[0], argv[1]);
+}
+/**
+ *  call-seq:
+ *     str.slice!(fixnum)           => new_str or nil
+ *     str.slice!(fixnum, fixnum)   => new_str or nil
+ *     str.slice!(range)            => new_str or nil
+ *     str.slice!(regexp)           => new_str or nil
+ *     str.slice!(other_str)        => new_str or nil
+ *
+ *  Deletes the specified portion from <i>str</i>, and returns the portion
+ *  deleted. The forms that take a <code>Fixnum</code> will raise an
+ *  <code>IndexError</code> if the value is out of range; the <code>Range</code>
+ *  form will raise a <code>RangeError</code>, and the <code>URegexp</code> and
+ *  <code>UString</code> forms will silently ignore the assignment.
+ *
+ *     string = "this is a string".u
+ *     string.slice!(2)        #=> 105
+ *     string.slice!(3..6)     #=> " is "
+ *     string.slice!(/s.*t/.U)   #=> "sa st"
+ *     string.slice!("r".u)      #=> "r"
+ *     string                  #=> "thing"
+ */
+VALUE
+icu_ustr_slice_bang(argc, argv, str)
+     int             argc;
+     VALUE          *argv;
+     VALUE           str;
+{
+    VALUE           result;
+    VALUE           buf[3];
+    int             i;
+    icu_check_frozen(1, str);
+    if (argc < 1 || 2 < argc) {
+	rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)",
+		 argc);
+    }
+    for (i = 0; i < argc; i++) {
+	buf[i] = argv[i];
+    }
+    buf[i] = icu_ustr_new(0, 0);
+    result = icu_ustr_aref_m(argc, buf, str);
+    if (!NIL_P(result)) {
+	icu_ustr_aset_m(argc + 1, buf, str);
+    }
+    return result;
+}
+VALUE
+ustr_gsub(argc, argv, str, bang, once)
+     int             argc;
+     VALUE          *argv;
+     VALUE           str;
+     int             bang;
+     int	     once;
+{
+    VALUE           pat,
+                    repl;
+    long            beg,
+                    end,
+                    prev_end;
+    int             tainted = 0,
+	iter = 0;
+    VALUE buf, curr_repl, umatch, block_res;
+    if (argc == 1 && rb_block_given_p()) {
+	iter = 1;
+    } else if (argc == 2) {
+	repl = argv[1];
+	Check_Class(repl, rb_cUString);
+	if (OBJ_TAINTED(repl))
+	    tainted = 1;
+    } else {
+	rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)",
+		 argc);
+    }
+    pat = get_pat(argv[0], 1);
+    beg = icu_reg_search(pat, str, 0, 0);
+    if (beg < 0) {
+	/* no match */
+	if (bang)
+	    return Qnil;
+	return icu_ustr_dup(str);
+    }
+    end = 0;
+//    icu_check_frozen(1, str);
+    ++(USTRING(str)->busy);
+    buf = icu_ustr_new(0, 0);
+    pat = icu_reg_clone(pat);
+    if(rb_block_given_p()) iter = 1;
+    do {
+	prev_end = end;
+	icu_reg_range(pat, 0, &beg, &end);
+	icu_ustr_concat(buf, icu_reg_get_prematch(pat, prev_end));
+	if ( iter ) {
+	    UChar * ptr = ICU_PTR(str);
+	    long o_len  = ICU_LEN(str);
+	    umatch = icu_umatch_new(pat);
+	    block_res = rb_yield(umatch);
+	    if (CLASS_OF(block_res) == rb_cUString)
+		curr_repl = block_res;
+	    else if (CLASS_OF(block_res) == rb_cUMatch)
+		curr_repl = icu_umatch_aref(block_res, INT2FIX(0));
+	    else
+		curr_repl =
+		    icu_from_rstr(0, NULL, rb_obj_as_string(block_res));
+	    ustr_mod_check(str, ptr, o_len);
+	} else {
+	    curr_repl = icu_reg_get_replacement(pat, repl, prev_end);
+	}
+	icu_ustr_concat(buf, curr_repl);
+    }
+    while (icu_reg_find_next(pat) && !once);
+    icu_ustr_concat(buf, icu_reg_get_tail(pat, end));
+    --(USTRING(str)->busy);
+    if (bang) {
+	icu_ustr_replace(str, buf);
+	return str;
+    } else {
+	return buf;
+    }
+}
+/**
+ *  call-seq:
+ *     str.gsub!(pattern, replacement)        => str or nil
+ *     str.gsub!(pattern) {|match| block }    => str or nil
+ *
+ *  Performs the substitutions of <code>UString#gsub</code> in place, returning
+ *  <i>str</i>, or <code>nil</code> if no substitutions were performed.
+ */
+VALUE
+icu_ustr_gsub_bang(argc, argv, str)
+     int             argc;
+     VALUE          *argv;
+     VALUE           str;
+{
+    icu_check_frozen(1, str);
+    return ustr_gsub(argc, argv, str, 1, 0);
+}
+/**
+ *  call-seq:
+ *     str.gsub(pattern, replacement)       => new_str
+ *     str.gsub(pattern) {|match| block }   => new_str
+ *
+ *  Returns a copy of <i>str</i> with <em>all</em> occurrences of <i>pattern</i>
+ *  replaced with either <i>replacement</i> or the value of the block. The
+ *  <i>pattern</i> will typically be a <code>URegexp</code>; if it is a
+ *  <code>UString</code> then no regular expression metacharacters will be
+ *  interpreted (that is <code>/\d/</code> will match a digit, but
+ *  <code>'\d'</code> will match a backslash followed by a 'd').
+ *
+ *  If a string is used as the replacement,  the sequences <code>$1</code>, <code>$2</code>, and so on
+ *  may be used to interpolate successive groups in the match.
+ *
+ *  In the block form, the current UMatch object is passed in as a parameter. The value
+ *  returned by the block will be substituted for the match on each call.
+ *
+ *     "hello".gsub(/[aeiou]/.U, '*')              #=> "h*ll*"
+ *     "hello".gsub(/([aeiou])/.U, '<$1>')         #=> "h<e>ll<o>"
+ */
+VALUE
+icu_ustr_gsub(argc, argv, str)
+     int             argc;
+     VALUE          *argv;
+     VALUE           str;
+{
+    return ustr_gsub(argc, argv, str, 0, 0);
+}
+/*-------------*/
+/**
+ * call-seq:
+ *     str.to_f( locale = "",[format_pattern]) => aFloat
+ *
+ * Parses string as double value, with respect to +locale+ and format pattern,
+ * if they are provided.
+ *
+ *      "456".u.to_f                                 # =>  456.0
+ *      "123,001".u.to_f("ru")                       # =>  123.001
+ *      "123,001".u.to_f("en")                       # =>  123001.0
+ *      "Got 123,001".u.to_f("en", "Got ###,###".u)  # =>  123001
+ */
+VALUE
+icu_ustr_parse_double( int argc, VALUE * argv, VALUE str)
+{
+	UParseError 	error;
+	UErrorCode 	status = U_ZERO_ERROR;
+	UNumberFormat  * format = NULL;
+	VALUE		loc, pattern;
+	char 		* locale;
+	double		value;
+	int32_t		pos, n;
+	n =  rb_scan_args(argc, argv, "02", &loc, &pattern) ;
+	if( n == 2) {
+		Check_Class(pattern, rb_cUString);
+	} else pattern = Qnil;
+	if (n > 0) {
+		Check_Type(loc, T_STRING);
+		locale = RSTRING_PTR(loc);
+	} else locale = NULL;
+	if( pattern != Qnil ) {
+		format = unum_open(UNUM_PATTERN_DECIMAL, ICU_PTR(pattern), ICU_LEN(pattern), locale,
+			&error, &status);
+	} else {
+		format = unum_open(UNUM_DECIMAL, NULL, 0,  locale,&error, &status);
+	}
+	if (U_FAILURE(status) )	rb_raise(rb_eArgError, "can't open format %s", u_errorName(status));
+	pos = 0;
+	value = unum_parseDouble(format, ICU_PTR(str), ICU_LEN(str), &pos, &status);
+	unum_close(format);
+	if (U_FAILURE(status) )	rb_raise(rb_eArgError, "can't parse %s at %d", u_errorName(status), pos);
+	return rb_float_new(value);
+}
+/**
+ * call-seq:
+ *     UString::strcoll(str1, str2 )                   => Fixnum
+ *     UString::strcoll(str1, str2 , locale)           => Fixnum
+ *     UString::strcoll(str1, str2 , locale, strength) => Fixnum
+ *
+ * Performs locale-sensitive string comparison.
+ * Special values for locales can be passed in - if +nil+ is passed for the locale,
+ * the default locale collation rules will be used. If empty string ("") or "root" are
+ * passed, UCA rules will be used.
+ *
+ * Strength must be a fixnum that set collation strength:
+ * -1 is default, 0 - primary, 1 - secondary, 2 - ternary.
+ * E.g., pass 0 to ignore case and accents, 1 - to ignore case only.
+ **/
+VALUE
+icu_ustr_coll(argc, argv, self)
+	int argc;
+	VALUE *argv;
+	VALUE self;
+{
+	UErrorCode status = 0 ;
+	UCollator * collator = 0;
+	int result;
+	VALUE ret = Qnil;
+	VALUE str1, str2, loc, strength = Qnil;
+	char * locale = NULL;
+	int n ;
+	n = rb_scan_args(argc, argv, "22", &str1, &str2, &loc, &strength);
+	if ( n == 3) {
+	   if( loc != Qnil) {
+		   Check_Type(loc, T_STRING);
+	   	   locale = RSTRING_PTR(loc);
+           }
+	}
+	Check_Class(str1, rb_cUString);
+	Check_Class(str2, rb_cUString);
+	collator = ucol_open(locale, &status);
+	if( U_FAILURE(status) )
+	{
+	  rb_raise(rb_eArgError, u_errorName(status));
+	}
+	if( n == 4 ){
+	 Check_Type(strength, T_FIXNUM);
+	 ucol_setStrength(collator, NUM2INT(strength));
+	}
+	result = ucol_strcoll(collator, ICU_PTR(str1), ICU_LEN(str1), ICU_PTR(str2), ICU_LEN(str2));
+	switch(result){
+	  case UCOL_EQUAL:   ret = INT2FIX(0);break;
+	  case UCOL_GREATER: ret = INT2FIX(1);break;
+	  case UCOL_LESS:    ret = INT2FIX(-1);break;
+	}
+	ucol_close(collator);
+	return ret;
+}
+/**
+ * call-seq:
+ *     UString::list_coll => anArray
+ *
+ * Returns array of available collator locales, to be used in UString#strcoll
+ * */
+VALUE icu_ustr_list_coll(str)
+	VALUE str;
+{
+	int32_t i, n =ucol_countAvailable();
+	VALUE ret = rb_ary_new();
+	for( i = 0; i<n; i++) {
+	   rb_ary_push(ret, rb_str_new2(ucol_getAvailable(i)));
+	}
+	return ret;
+}
+/**
+ * call-seq:
+ *     UString::list_locales => anArray
+ *
+ * Returns array of available locales.
+ * */
+VALUE icu_ustr_list_locales(str)
+	VALUE str;
+{
+	int32_t i, n =uloc_countAvailable();
+	VALUE ret = rb_ary_new();
+	for( i = 0; i<n; i++) {
+	   rb_ary_push(ret, rb_str_new2(uloc_getAvailable(i)));
+	}
+	return ret;
+}
+/**
+ * call-seq:
+ *     UString::list_translits => anArray
+ *
+ * Returns array of available translits.
+ * */
+VALUE icu_ustr_list_translits(str)
+	VALUE str;
+{
+	UErrorCode  status = U_ZERO_ERROR;
+	UEnumeration *  ids ;
+	VALUE ret ;
+	UChar * name;
+	int32_t len;
+	ids = utrans_openIDs (&status);
+	ICU_RAISE(status);
+	ret = rb_ary_new();
+	while( (name = (UChar*)uenum_unext(ids, &len, &status))) {
+		rb_ary_push(ret, icu_ustr_new(name, len));
+	}
+	uenum_close(ids);
+	return ret;
+}
+/**
+ * call-seq:
+ *     str.search(pattern, options = {})
+ *
+ * Searches for match in string. Returns array of +Range+
+ * corresponding to position where pattern is matched.
+ *
+ * Valid options are:
+ *      :locale -- locale, +String+, value e.g. "en", "ru_RU"
+ *      :ignore_case --	whether to ignore case, valid values are +true+ or +false+, default to +false+
+ *      :ignore_case_accents -- sets collator options to strength +0+ - primary difference, e.g. ignore case and accents,
+ *                            overrides :ignore_case: option, default to +false+,
+ *      :loosely -- same as :ignore_case_accents
+ *      :limit -- Fixnum limit of match positions to return.
+ *      :whole_words --  whether to match whole words only
+ *      :canonical --  use canonical equivalence
+ *
+ *
+ *     a = "A quick brown fox jumped over the lazy fox dancing foxtrote".u
+ *     a.search("fox".u)                                                       # => [14..16, 39..41, 51..53]
+ *     a.search("FoX".u)                                                       # => []
+ *     a.search("FoX".u, :ignore_case => true)                                 # => [14..16, 39..41, 51..53]
+ *     a.search("FoX".u, :ignore_case => true, :whole_words => true)           # => [14..16, 39..41]
+ *     a.search("FoX".u, :ignore_case => true, :whole_words => true, :limit => 1)  # => [14..16]
+ *
+ *     b = "Iñtërnâtiônàlizætiøn îs cọmpłèx".u.upcase     # => IÑTËRNÂTIÔNÀLIZÆTIØN ÎS CỌMPŁÈX
+ *     b.search("nâtiôn".u, :locale => "en")                                   # => []
+ *     b.search("nation".u)                                                    # => []
+ *     b.search("nation".u, :locale => "en", :ignore_case_accents => true)     # => [5..10]
+ *     b.search("nâtiôn".u, :locale => "en", :ignore_case => true)             # => [5..10]
+ *     b.search("zaeti".u,  :locale => "en" )                                  # => []
+ *     b.search("zaeti".u,  :locale => "en", :ignore_case => true)             # => []
+ *     b.search("zaeti".u,  :locale => "en", :ignore_case_accents => true)     # => [14..17]
+ *
+ *     v = [?a, 0x0325, 0x0300].to_u # =>   ḁ̀
+ *     v.search([?a, 0x300].to_u, :canonical => true) # => [0..2]
+ *     v.search([?a, 0x300].to_u) # => []
+ **/
+VALUE icu_ustr_search(argc, argv, str)
+        int           argc;
+	VALUE        *argv;
+	VALUE          str;
+{
+	UErrorCode status = U_ZERO_ERROR;
+	UStringSearch * search = 0 ;
+	VALUE  pat, locale ,  limit, options;
+	int lim = -1, count = 0 ;
+	int32_t start,  len;
+	VALUE ret = rb_ary_new();
+	UCollator * collator = 0;
+	UBreakIterator * brkit = 0;
+	char  * loc = 0;
+        if ( rb_scan_args(argc, argv, "11", &pat, &options) == 2 ) {
+	   Check_Type(options, T_HASH);
+	} else {
+	   options = Qnil;
+	}
+	Check_Class(pat, rb_cUString);
+	locale = options == Qnil ? Qnil : rb_hash_aref(options, ID2SYM(rb_intern("locale")));
+	if( locale != Qnil ) {
+	   Check_Type(locale, T_STRING);
+	   loc = RSTRING_PTR(locale);
+	}
+	limit = options == Qnil ? Qnil : rb_hash_aref(options, ID2SYM(rb_intern("limit")));
+	if(TYPE(limit) == T_FIXNUM) {
+	 lim = FIX2INT(limit);
+	 if(lim <= 0) {
+	   rb_raise(rb_eTypeError, "Limit must be positive or nil, got: %d", lim);
+	 }
+	}
+	else
+  	 if (limit!=Qnil)
+	    rb_raise(rb_eArgError, "Limit must be Fixnum, got %s", rb_class2name(CLASS_OF(limit)));
+	collator = ucol_open(loc, &status);
+	ucol_setStrength(collator, -1);
+	if( options != Qnil && Qtrue == rb_hash_aref( options, ID2SYM(rb_intern("whole_words"))) )
+	  brkit = ubrk_open(UBRK_WORD, loc, ICU_PTR(str), ICU_LEN(str),  &status);
+	if( options != Qnil && Qtrue == rb_hash_aref( options, ID2SYM(rb_intern("ignore_case"))) )
+	   ucol_setStrength(collator, UCOL_SECONDARY);
+	if( options != Qnil &&
+		( Qtrue == rb_hash_aref( options, ID2SYM(rb_intern("ignore_case_accents")) )
+		  || Qtrue == rb_hash_aref( options, ID2SYM(rb_intern("loosely")) )
+		)
+	   )
+	   ucol_setStrength(collator, UCOL_PRIMARY );
+	search   = usearch_openFromCollator(ICU_PTR(pat), ICU_LEN(pat),
+	                ICU_PTR(str), ICU_LEN(str),
+			collator,  brkit, &status);
+	if( options != Qnil && Qtrue == rb_hash_aref( options, ID2SYM(rb_intern("canonical"))) )
+	    usearch_setAttribute(search, USEARCH_CANONICAL_MATCH, USEARCH_ON, &status);
+	if( U_FAILURE(status) )	   goto failure;
+	status = U_ZERO_ERROR;
+	if( usearch_first(search, &status) == USEARCH_DONE) {
+	   usearch_close(search);
+	   ucol_close(collator);
+	   ubrk_close(brkit);
+	   return ret;
+	}
+	do {
+	  if( U_FAILURE(status) ) goto failure;
+	  start = usearch_getMatchedStart(search);
+	  len   = usearch_getMatchedLength(search);
+	  rb_ary_push(ret, rb_range_new(LONG2NUM(start), LONG2NUM(start+len-1), 0));
+	  status = U_ZERO_ERROR;
+	  count += 1;
+	  if (lim > 0 && count >= lim) break;
+	} while (USEARCH_DONE != usearch_next(search, &status));
+	usearch_close( search);
+	ucol_close(collator);
+	ubrk_close(brkit);
+	return ret;
+failure:
+        usearch_close( search);
+	ucol_close(collator);
+	ubrk_close(brkit);
+	rb_raise(rb_eArgError, u_errorName(status));
+	return Qnil;
+}
+/**
+ * call-seq:
+ *     str.conv_unit_range(unit_range) => code_point_range
+ *
+ * Converts <b>code unit</b> range to <b>code point</b> range.
+ * If your chars don't use multiple UTF16 codeunits, range will be the same.
+ */
+VALUE icu_ustr_convert_unit_range(str, range)
+	VALUE		str, range;
+{
+    long cu_start, cu_len, cur_pos, cp_len ;
+    if( rb_range_beg_len(range, &cu_start, &cu_len, ICU_LEN(str), 0) != Qtrue)
+	    return Qnil;
+    cur_pos  = u_countChar32( ICU_PTR(str), cu_start );
+    if( cu_start+cu_len > ICU_LEN(str)) --cu_len;
+    cp_len   = u_countChar32( ICU_PTR(str) + cu_start , cu_len);
+    return rb_range_new(LONG2NUM(cur_pos), LONG2NUM(cur_pos + cp_len-1), 0);
+}
+/**
+ * call-seq:
+ *     str.conv_point_range(point_range) => code_unit_range
+ *
+ * Converts <b>code point</b> range to <b>code unit</b> range.
+ * (inversion of #conv_unit_range)
+ * If your chars don't use multiple UTF16 codeuints, range will be the same.
+ */
+VALUE icu_ustr_convert_point_range(str, range)
+	VALUE		str, range;
+{
+    long cp_start,  cu_start, cu_end, cp_len, str_cp_len;
+    str_cp_len = u_countChar32( ICU_PTR(str), ICU_LEN(str));
+    if( Qtrue != rb_range_beg_len(range, &cp_start, &cp_len, str_cp_len, 0) )  return Qnil;
+    cu_start = 0;
+    U16_FWD_N(ICU_PTR(str), cu_start, ICU_LEN(str), cp_start); /* care sur */
+    cu_end = cu_start;
+    U16_FWD_N(ICU_PTR(str), cu_end, ICU_LEN(str), cp_len); /* care sur */
+    return rb_range_new(LONG2NUM(cu_start), LONG2NUM(cu_end-1), 0);
+}
+/**
+ * call-seq:
+ *     str.unit_count
+ *
+ * returns number of code units in string.
+ *
+ */
+VALUE icu_ustr_unit_count(VALUE str){
+   return LONG2NUM(ICU_LEN(str));
+}
+/**
+ * call-seq:
+ *     str.point_count
+ *
+ * returns number of code points in string.
+ *
+ */
+VALUE icu_ustr_point_count(VALUE str){
+   return LONG2NUM(u_countChar32(ICU_PTR(str), ICU_LEN(str)));
+}
+UChar icu_uchar_at(int32_t offset, void * context)
+{
+	return ((UChar*)context)[offset];
+}
+/**
+ * call-seq:
+ *    str.unescape  => new_str
+ *
+ * Unescape a string of characters.
+ *
+ * The following escape sequences are recognized:
+ *     \uhhhh 4 hex digits; h in [0-9A-Fa-f]
+ *     \Uhhhhhhhh 8 hex digits
+ *     \xhh 1-2 hex digits \x{h...} 1-8 hex digits
+ *     \ooo 1-3 octal digits; o in [0-7]
+ *     \cX control-X; X is masked with 0x1F
+ *
+ *  as well as the standard ANSI C escapes:
+ *   \a => U+0007,  \b => U+0008, \t => U+0009, \n => U+000A, \v => U+000B, \f => U+000C, \r => U+000D, \e => U+001B, \" => U+0022, \' => U+0027, \? => U+003F, \\ => U+005C
+ *
+ * If escape sequence is invalid, it is ignored.
+ *
+ *     "\\u044D\\u043A\\u0440\\u0430\\u043D\\u0438\\u0440\\u043E\\u0432\\u0430\\u043D\\u0438\\u0435".u.unescape => "экранирование"
+ *
+ **/
+VALUE icu_ustr_unescape(str)
+	VALUE	str;
+{
+	UChar32 	c32;
+	int32_t		offset, leng, i, segment_start;
+	UChar		* ptr;
+	UChar		buf[3];
+	VALUE		ret;
+	offset = 0;
+	segment_start = 0;
+	leng = ICU_LEN(str);
+	ptr  = ICU_PTR(str);
+	ret  = icu_ustr_new(0, 0);
+	while(offset < leng) {
+	    if( ptr[offset] == '\\' ) {
+	    	ustr_splice_units(USTRING(ret), ICU_LEN(ret), 0, ptr+segment_start, offset-segment_start);
+	    	++offset;
+	 	c32 = u_unescapeAt(icu_uchar_at, &offset, leng, ICU_PTR(str));
+		// append this char
+		if( 0xFFFFFFFF == c32) continue;
+		i = 0;
+		U16_APPEND_UNSAFE(buf, i, c32);
+		ustr_splice_units(USTRING(ret), ICU_LEN(ret), 0, buf, U16_LENGTH(c32));
+		segment_start = offset;
+	    } else {
+	    	++offset;
+	    }
+	}
+	if( segment_start < offset)
+	ustr_splice_units(USTRING(ret), ICU_LEN(ret), 0, ptr+segment_start, offset-segment_start);
+	return ret;
+}
+/* transliteration */
+extern VALUE icu_transliterate(UChar * str, int32_t str_len, UChar * id, int32_t id_len, UChar * rules, int32_t rule_len);
+/**
+ * call-seq:
+ *     str.translit(id, [rules])
+ *
+ * Performs {transliteration}[http://icu.sourceforge.net/userguide/Transformations.html],
+ * of this string, using given transform +id+ and +rules+
+ *
+ *     "yukihiro matsumoto".u.translit("Latin-Hiragana".u) # => ゆきひろ まつもと
+ *     "hello".u.translit("null".u, ":: upper();".u)        # => HELLO
+ **/
+VALUE icu_ustr_translit(argc, argv, str)
+	int argc;
+	VALUE * argv ;
+	VALUE str;
+{
+	VALUE id, rules ;
+	if(rb_scan_args(argc, argv, "11", &id, &rules) == 2) {
+		Check_Class(rules, rb_cUString);
+	} else rules = Qnil;
+	Check_Class(str, rb_cUString);
+	Check_Class(id, rb_cUString);
+	if( rules == Qnil) {
+	    return icu_transliterate(ICU_PTR(str), ICU_LEN(str), ICU_PTR(id), ICU_LEN(id), NULL, 0);
+	} else {
+	    return icu_transliterate(ICU_PTR(str), ICU_LEN(str), ICU_PTR(id), ICU_LEN(id),
+	    			ICU_PTR(rules), ICU_LEN(rules));
+	}
+}
+void
+initialize_ustring(void)
+{
+    UErrorCode status = U_ZERO_ERROR;
+    u_init(&status);
+    if( U_FAILURE(status) ){
+       rb_raise(rb_eRuntimeError, "Can't initialize : %s", u_errorName(status));
+    }
+    s_UCA_collator =  ucol_open("", &status);
+    if( U_FAILURE(status) ){
+       rb_raise(rb_eRuntimeError, "Can't initialize : %s", u_errorName(status));
+    }
+    s_case_UCA_collator =  ucol_open("", &status);
+    if( U_FAILURE(status) ){
+       rb_raise(rb_eRuntimeError, "Can't initialize : %s", u_errorName(status));
+    }
+    ucol_setStrength(s_case_UCA_collator, UCOL_SECONDARY);
+/*
+Document-class: UString
+UString is a string class that stores Unicode characters directly and provides
+similar functionality as the Ruby String class.
+An UString string consists of 16-bit Unicode code units. A Unicode character
+may be stored with either one code unit which is the most common case or with a matched
+pair of special code units ("surrogates").
+For single-character handling, a Unicode character code point is a value in the
+range 0..0x10ffff.
+Indexes and offsets into and lengths of strings always count code units, not code points.
+This is the same as with multi-byte char* strings in traditional string handling.
+Operations on partial strings typically do not test for code point boundaries.
+In order to use the collation, text boundary analysis, formatting and other ICU APIs,
+Unicode strings must be used. In order to get Unicode strings from your native codepage,
+you can use the conversion API.
+UString class is also point for  access to several ICU services, instead of
+mirroring ICU class hierarchy.
+====  Methods by category:
+- concat and modify:  + ,  * ,  << ,  #concat ,  #replace
+- element reference, insert, replace:  [] ,  #slice , []= ,  #slice! ,  #insert , #char_span
+- comparisons:  <=> ,  == ,  #casecmp ,  #strcoll
+- size and positions:  #length ,  #point_count ,  #clear ,  #empty? ,  #conv_unit_range ,  #conv_point_range
+- index/search methods:  #index ,  #rindex ,  #include? ,  #search
+- regexps, matching and replacing: =~ ,  #match ,  #scan ,  #split ,  #sub ,  #sub! ,  #gsub ,  #gsub!
+- conversion String/UString:  #to_s, Kernel#u, String#to_u
+- iterators:  #each_line_break ,  #each_word ,  #each_char ,  #each_sentence
+- split to chars/codepoints:  #chars ,  #codepoints , Array#to_u
+- character case:   #upcase ,  #upcase! ,  #downcase ,  #downcase!
+- stripping spaces:  #strip ,  #lstrip ,  #rstrip ,  #strip! ,  #lstrip! ,  #rstrip!
+- formatting and parsing:  #format ,  #parse_date ,  #to_f
+- UNICODE normalization:  #norm_C ,  #norm_D ,  #norm_KC ,  #norm_KD ,  #norm_FCD
+- utilities:  #unescape ,  #hash ,  #inspect ,  #inspect_names ,  #translit
+- ICU avalable info: #list_coll ,  #list_locales ,  #list_translits
+*/
+    rb_cUString = rb_define_class("UString", rb_cObject);
+    rb_include_module(rb_cUString, rb_mComparable);
+    /* initializations */
+    rb_define_alloc_func(rb_cUString, icu_ustr_alloc);
+    rb_define_method(rb_cUString, "initialize", icu_ustr_init, -1);
+    rb_define_method(rb_cUString, "initialize_copy", icu_ustr_replace, 1);
+    rb_define_method(rb_cUString, "replace", icu_ustr_replace, 1);
+    /* comparisons */
+    rb_define_method(rb_cUString, "<=>", icu_ustr_cmp_m, 1);
+    rb_define_method(rb_cUString, "==",  icu_ustr_equal, 1);
+    rb_define_method(rb_cUString, "eql?",  icu_ustr_equal, 1);
+    rb_define_method(rb_cUString, "casecmp", icu_ustr_casecmp, 1);
+    rb_define_singleton_method(rb_cUString, "strcoll", icu_ustr_coll, -1);
+    /* ICU avalable info */
+    rb_define_singleton_method(rb_cUString, "list_coll", icu_ustr_list_coll, 0);
+    rb_define_singleton_method(rb_cUString, "list_locales", icu_ustr_list_locales, 0);
+    rb_define_singleton_method(rb_cUString, "list_translits", icu_ustr_list_translits, 0);
+    /* hash code */
+    rb_define_method(rb_cUString, "hash", icu_ustr_hash_m, 0);
+    /* inspect */
+    rb_define_method(rb_cUString, "inspect", icu_ustr_inspect, 0);
+    rb_define_method(rb_cUString, "inspect_names", icu_ustr_inspect_names,  0);
+    /* size */
+    rb_define_method(rb_cUString, "length", icu_ustr_length, 0);
+    rb_define_alias (rb_cUString, "size", "length");
+    rb_define_method(rb_cUString, "unit_count", icu_ustr_unit_count, 0);
+    rb_define_method(rb_cUString, "point_count", icu_ustr_point_count, 0);
+    rb_define_method(rb_cUString, "clear", icu_ustr_clear, 0);
+    rb_define_method(rb_cUString, "empty?", icu_ustr_empty, 0);
+    /* UNICODE normalization */
+    rb_define_method(rb_cUString, "norm_C", icu_ustr_normalize_C, 0);
+    rb_define_method(rb_cUString, "norm_D", icu_ustr_normalize_D, 0);
+    rb_define_method(rb_cUString, "norm_KC", icu_ustr_normalize_KC, 0);
+    rb_define_method(rb_cUString, "norm_KD", icu_ustr_normalize_KD, 0);
+    rb_define_method(rb_cUString, "norm_FCD", icu_ustr_normalize_FCD, 0);
+    /* iterators */
+    rb_define_method(rb_cUString, "each_line_break", icu_ustr_each_line, -1);
+    rb_define_method(rb_cUString, "each_word", icu_ustr_each_word, -1);
+    rb_define_method(rb_cUString, "each_char", icu_ustr_each_char, -1);
+    rb_define_method(rb_cUString, "each_sentence", icu_ustr_each_sentence, -1);
+    rb_define_alias(rb_cUString,  "each", "each_line_break");
+    /* split to chars/codepoints */
+    rb_define_method(rb_cUString, "chars", icu_ustr_chars_m, -1);
+    rb_define_method(rb_cUString, "char_span", icu_ustr_char_span, -1);
+    rb_define_method(rb_cUString, "codepoints", icu_ustr_points, 0);
+    /* concat operations */
+    rb_define_method(rb_cUString, "+", icu_ustr_plus, 1);
+    rb_define_method(rb_cUString, "*", icu_ustr_times, 1);
+    rb_define_method(rb_cUString, "concat", icu_ustr_concat, 1);
+    rb_define_alias( rb_cUString, "<<", "concat");
+    /* character case  */
+    rb_define_method(rb_cUString, "upcase", icu_ustr_upcase, -1);
+    rb_define_method(rb_cUString, "upcase!", icu_ustr_upcase_bang, -1);
+    rb_define_method(rb_cUString, "downcase", icu_ustr_downcase, -1);
+    rb_define_method(rb_cUString, "downcase!", icu_ustr_downcase_bang, -1);
+    rb_define_method(rb_cUString, "foldcase", icu_ustr_foldcase, 0);
+    /* stripping spaces */
+    rb_define_method(rb_cUString, "strip", icu_ustr_strip, 0);
+    rb_define_method(rb_cUString, "lstrip", icu_ustr_lstrip, 0);
+    rb_define_method(rb_cUString, "rstrip", icu_ustr_rstrip, 0);
+    rb_define_method(rb_cUString, "strip!", icu_ustr_strip_bang, 0);
+    rb_define_method(rb_cUString, "lstrip!", icu_ustr_lstrip_bang, 0);
+    rb_define_method(rb_cUString, "rstrip!", icu_ustr_rstrip_bang, 0);
+    /* index/search methods */
+    rb_define_method(rb_cUString, "index", icu_ustr_index_m, -1);
+    rb_define_method(rb_cUString, "rindex", icu_ustr_rindex_m, -1);
+    rb_define_method(rb_cUString, "include?", icu_ustr_include, 1);
+    rb_define_method(rb_cUString, "search", icu_ustr_search, -1);
+    /* element reference */
+    rb_define_method(rb_cUString, "[]", icu_ustr_aref_m, -1);
+    rb_define_alias(rb_cUString, "slice", "[]");
+    /* codeunit/codepoint conversion */
+    rb_define_method(rb_cUString, "conv_unit_range", icu_ustr_convert_unit_range, 1);
+    rb_define_method(rb_cUString, "conv_point_range", icu_ustr_convert_point_range, 1);
+    /* insert/replace */
+    rb_define_method(rb_cUString, "[]=", icu_ustr_aset_m, -1);
+    rb_define_method(rb_cUString, "slice!", icu_ustr_slice_bang, -1);
+    rb_define_method(rb_cUString, "insert", icu_ustr_insert, 2);
+    /* conversion to String from UString */
+    rb_define_method(rb_cUString, "to_u", icu_ustr_to_ustr, -1);
+    rb_define_method(rb_cUString, "to_s", icu_ustr_to_rstr, -1);
+    rb_define_alias(rb_cUString, "to_str", "to_s");
+    /* formatting messages */
+    rb_define_method(rb_cUString, "format", icu_ustr_format, -2);
+    rb_define_alias( rb_cUString, "fmt", "format");
+    /* parsing */
+    rb_define_method(rb_cUString, "to_f", icu_ustr_parse_double, -1);
+    /* transliteration */
+    rb_define_method(rb_cUString, "translit", icu_ustr_translit, -1);
+    /* unescaping */
+    rb_define_method(rb_cUString, "unescape",  icu_ustr_unescape, 0);
+    /* regexp matching and replacing */
+    rb_define_method(rb_cUString, "=~", icu_ustr_match, 1);
+    rb_define_method(rb_cUString, "match", icu_ustr_match_m, 1);
+    rb_define_method(rb_cUString, "scan", icu_ustr_scan, 1);
+    rb_define_method(rb_cUString, "split", icu_ustr_split_m, -1);
+    rb_define_method(rb_cUString, "sub", icu_ustr_sub, -1);
+    rb_define_method(rb_cUString, "sub!", icu_ustr_sub_bang, -1);
+    rb_define_method(rb_cUString, "gsub", icu_ustr_gsub, -1);
+    rb_define_method(rb_cUString, "gsub!", icu_ustr_gsub_bang, -1);
+}