unicode 0.4.1-x86-mingw32 → 0.4.2-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -1,5 +1,5 @@
1
1
  Unicode Library for Ruby
2
- Version 0.4.1
2
+ Version 0.4.2
3
3
 
4
4
  Yoshida Masato
5
5
 
@@ -15,7 +15,7 @@
15
15
  - Install
16
16
 
17
17
  This can work with ruby-1.8 or later. I recommend you to
18
- use ruby-1.9.2 or later.
18
+ use ruby-1.9.3 or later.
19
19
 
20
20
  Make and install usually.
21
21
  For example, when Ruby supports dynamic linking on your OS,
@@ -59,6 +59,11 @@
59
59
  mapping in UnicodeData.txt and the Hangul decomposition
60
60
  algorithm.
61
61
 
62
+ Unicode::decompose_safe(str)
63
+ Decompose Unicode string with a non-standard mapping.
64
+ It does not decompose the characters in
65
+ CompositionExclusions.txt.
66
+
62
67
  Unicode::compose(str)
63
68
  Compose Unicode string. Before composing, the trailing
64
69
  characters are sorted in canonical order.
@@ -73,12 +78,19 @@
73
78
  Normalize Unicode string in form D or form KD.
74
79
  These are aliases of decompose/decompose_compat.
75
80
 
81
+ Unicode::normalize_D_safe(str) (Unicode::nfd_safe(str))
82
+ This is an aliase of decompose_safe.
83
+
76
84
  Unicode::normalize_C(str) (Unicode::nfc(str))
77
85
  Unicode::normalize_KC(str) (Unicode::nfkc(str))
78
86
  Normalize Unicode string in form C or form KC.
79
87
  normalize_C = decompose + compose
80
88
  normalize_KC = decompose_compat + compose
81
89
 
90
+ Unicode::normalize_C_safe(str) (Unicode::nfc_safe(str))
91
+ Normalize Unicode string with decompose_safe.
92
+ normalize_C_safe = decompose_safe + compose
93
+
82
94
  Unicode::upcase(str)
83
95
  Unicode::downcase(str)
84
96
  Unicode::capitalize(str)
@@ -111,6 +123,7 @@
111
123
 
112
124
  - History
113
125
 
126
+ Feb 29, 2012 version 0.4.2 add decompose_safe
114
127
  Feb 3, 2012 version 0.4.1 update unidata.map for Unicode 6.1
115
128
  Oct 14, 2010 version 0.4.0 fix the composition algorithm, and support Unicode 6.0
116
129
  Feb 26, 2010 version 0.3.0 fix a capitalize bug and support SpecialCasing
@@ -7,6 +7,8 @@
7
7
  *
8
8
  */
9
9
 
10
+ #define UNICODE_VERSION "0.4.2"
11
+
10
12
  #include "ruby.h"
11
13
  #ifdef HAVE_RUBY_IO_H
12
14
  # include "ruby/io.h"
@@ -86,6 +88,19 @@ get_canon(int ucs)
86
88
  return NULL;
87
89
  }
88
90
 
91
+ static const char*
92
+ get_canon_ex(int ucs)
93
+ {
94
+ VALUE ch = rb_hash_aref(unicode_data, INT2FIX(ucs));
95
+
96
+ if (!NIL_P(ch)) {
97
+ int i = FIX2INT(ch);
98
+ if (!unidata[i].exclusion)
99
+ return unidata[i].canon;
100
+ }
101
+ return NULL;
102
+ }
103
+
89
104
  static const char*
90
105
  get_compat(int ucs)
91
106
  {
@@ -216,7 +231,41 @@ decompose_internal(WString* ustr, WString* result)
216
231
  }
217
232
 
218
233
  /*
219
- * push compatibility decomposed str into result
234
+ * push decomposed str into result
235
+ */
236
+ static WString*
237
+ decompose_safe_internal(WString* ustr, WString* result)
238
+ {
239
+ int i;
240
+ int len = ustr->len;
241
+
242
+ for (i = 0; i < len; i++) {
243
+ int ucs = ustr->str[i];
244
+ if (ucs >= SBASE && ucs < SBASE + SCOUNT) {
245
+ int l, v, t;
246
+ decompose_hangul(ucs, &l, &v, &t);
247
+ WStr_addWChar(result, l);
248
+ if (v) WStr_addWChar(result, v);
249
+ if (t) WStr_addWChar(result, t);
250
+ }
251
+ else {
252
+ const char* dc = get_canon_ex(ucs);
253
+ if (!dc) {
254
+ WStr_addWChar(result, ucs);
255
+ }
256
+ else {
257
+ WString wdc;
258
+ WStr_allocWithUTF8(&wdc, dc);
259
+ decompose_safe_internal(&wdc, result);
260
+ WStr_free(&wdc);
261
+ }
262
+ }
263
+ }
264
+ return result;
265
+ }
266
+
267
+ /*
268
+ * push compatibility decomposed str into result
220
269
  */
221
270
  static WString*
222
271
  decompose_compat_internal(WString* ustr, WString* result)
@@ -582,6 +631,32 @@ unicode_decompose(VALUE obj, VALUE str)
582
631
  return vret;
583
632
  }
584
633
 
634
+ static VALUE
635
+ unicode_decompose_safe(VALUE obj, VALUE str)
636
+ {
637
+ WString ustr;
638
+ WString result;
639
+ UString ret;
640
+ VALUE vret;
641
+
642
+ Check_Type(str, T_STRING);
643
+ #ifdef HAVE_RUBY_ENCODING_H
644
+ CONVERT_TO_UTF8(str);
645
+ #endif
646
+ WStr_allocWithUTF8(&ustr, RSTRING_PTR(str));
647
+ WStr_alloc(&result);
648
+ decompose_safe_internal(&ustr, &result);
649
+ WStr_free(&ustr);
650
+ sort_canonical(&result);
651
+ UniStr_alloc(&ret);
652
+ WStr_convertIntoUString(&result, &ret);
653
+ WStr_free(&result);
654
+ vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
655
+ UniStr_free(&ret);
656
+
657
+ return vret;
658
+ }
659
+
585
660
  static VALUE
586
661
  unicode_decompose_compat(VALUE obj, VALUE str)
587
662
  {
@@ -664,6 +739,36 @@ unicode_normalize_C(VALUE obj, VALUE str)
664
739
  return vret;
665
740
  }
666
741
 
742
+ static VALUE
743
+ unicode_normalize_safe(VALUE obj, VALUE str)
744
+ {
745
+ WString ustr1;
746
+ WString ustr2;
747
+ WString result;
748
+ UString ret;
749
+ VALUE vret;
750
+
751
+ Check_Type(str, T_STRING);
752
+ #ifdef HAVE_RUBY_ENCODING_H
753
+ CONVERT_TO_UTF8(str);
754
+ #endif
755
+ WStr_allocWithUTF8(&ustr1, RSTRING_PTR(str));
756
+ WStr_alloc(&ustr2);
757
+ decompose_safe_internal(&ustr1, &ustr2);
758
+ WStr_free(&ustr1);
759
+ sort_canonical(&ustr2);
760
+ WStr_alloc(&result);
761
+ compose_internal(&ustr2, &result);
762
+ WStr_free(&ustr2);
763
+ UniStr_alloc(&ret);
764
+ WStr_convertIntoUString(&result, &ret);
765
+ WStr_free(&result);
766
+ vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
767
+ UniStr_free(&ret);
768
+
769
+ return vret;
770
+ }
771
+
667
772
  static VALUE
668
773
  unicode_normalize_KC(VALUE obj, VALUE str)
669
774
  {
@@ -811,6 +916,8 @@ Init_unicode_native()
811
916
 
812
917
  rb_define_module_function(mUnicode, "decompose",
813
918
  unicode_decompose, 1);
919
+ rb_define_module_function(mUnicode, "decompose_safe",
920
+ unicode_decompose_safe, 1);
814
921
  rb_define_module_function(mUnicode, "decompose_compat",
815
922
  unicode_decompose_compat, 1);
816
923
  rb_define_module_function(mUnicode, "compose",
@@ -818,20 +925,28 @@ Init_unicode_native()
818
925
 
819
926
  rb_define_module_function(mUnicode, "normalize_D",
820
927
  unicode_decompose, 1);
928
+ rb_define_module_function(mUnicode, "normalize_D_safe",
929
+ unicode_decompose_safe, 1);
821
930
  rb_define_module_function(mUnicode, "normalize_KD",
822
931
  unicode_decompose_compat, 1);
823
932
  rb_define_module_function(mUnicode, "normalize_C",
824
933
  unicode_normalize_C, 1);
934
+ rb_define_module_function(mUnicode, "normalize_C_safe",
935
+ unicode_normalize_safe, 1);
825
936
  rb_define_module_function(mUnicode, "normalize_KC",
826
937
  unicode_normalize_KC, 1);
827
938
 
828
939
  /* aliases */
829
940
  rb_define_module_function(mUnicode, "nfd",
830
941
  unicode_decompose, 1);
942
+ rb_define_module_function(mUnicode, "nfd_safe",
943
+ unicode_decompose_safe, 1);
831
944
  rb_define_module_function(mUnicode, "nfkd",
832
945
  unicode_decompose_compat, 1);
833
946
  rb_define_module_function(mUnicode, "nfc",
834
947
  unicode_normalize_C, 1);
948
+ rb_define_module_function(mUnicode, "nfc_safe",
949
+ unicode_normalize_safe, 1);
835
950
  rb_define_module_function(mUnicode, "nfkc",
836
951
  unicode_normalize_KC, 1);
837
952
 
@@ -841,4 +956,7 @@ Init_unicode_native()
841
956
  unicode_downcase, 1);
842
957
  rb_define_module_function(mUnicode, "capitalize",
843
958
  unicode_capitalize, 1);
959
+
960
+ rb_define_const(mUnicode, "VERSION",
961
+ rb_str_new2(UNICODE_VERSION));
844
962
  }
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{unicode}
5
- s.version = "0.4.1"
5
+ s.version = "0.4.2"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = [%q{Yoshida Masato}]
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode
3
3
  version: !ruby/object:Gem::Version
4
- hash: 13
4
+ hash: 11
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 4
9
- - 1
10
- version: 0.4.1
9
+ - 2
10
+ version: 0.4.2
11
11
  platform: x86-mingw32
12
12
  authors:
13
13
  - Yoshida Masato
@@ -73,7 +73,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
73
73
  requirements: []
74
74
 
75
75
  rubyforge_project:
76
- rubygems_version: 1.8.15
76
+ rubygems_version: 1.8.17
77
77
  signing_key:
78
78
  specification_version: 3
79
79
  summary: Unicode normalization library.