ninjudd-icunicode 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION.yml CHANGED
@@ -1,4 +1,4 @@
1
1
  ---
2
2
  :minor: 0
3
- :patch: 1
3
+ :patch: 2
4
4
  :major: 0
data/ext/icunicode.c CHANGED
@@ -6,6 +6,9 @@
6
6
 
7
7
  #define BUF_SIZE 1000
8
8
 
9
+ VALUE cTransliterator;
10
+ VALUE trans_hash;
11
+
9
12
  static void to_utf16(VALUE string, UChar *ustr, int32_t *ulen) {
10
13
  UErrorCode status = U_ZERO_ERROR;
11
14
 
@@ -50,36 +53,59 @@ static VALUE unicode_sort_key(VALUE string) {
50
53
  return rb_str_new(str, len - 1);
51
54
  }
52
55
 
56
+ static void trans_free(void *trans) {
57
+ utrans_close(trans);
58
+ }
59
+
60
+ static UTransliterator* get_trans(VALUE transform) {
61
+ UChar str[BUF_SIZE];
62
+ int32_t len = 0;
63
+ UTransliterator *trans;
64
+ UErrorCode status = U_ZERO_ERROR;
65
+ VALUE obj;
66
+
67
+ obj = rb_hash_aref(trans_hash, transform);
68
+ if (NIL_P(obj)) {
69
+ to_utf16(transform, str, &len);
70
+ trans = utrans_openU(str, len, UTRANS_FORWARD, NULL, 0, NULL, &status);
71
+ if (trans) {
72
+ obj = Data_Wrap_Struct(rb_cObject, 0, trans_free, trans);
73
+ rb_hash_aset(trans_hash, transform, obj);
74
+ } else {
75
+ rb_raise(rb_eArgError, "invalid transform: %s", RSTRING_PTR(transform));
76
+ }
77
+ }
78
+
79
+ Data_Get_Struct(obj, UTransliterator, trans);
80
+ return trans;
81
+ }
82
+
53
83
  /*
54
84
  * call-seq:
55
- * string.transliterate(transform) -> string
85
+ * string.transliterate(transform_string) -> string
56
86
  *
57
87
  * Transliterates string using transform.
58
88
  *
59
89
  */
60
90
  static VALUE unicode_transliterate(VALUE string, VALUE transform) {
61
91
  UChar str[BUF_SIZE];
62
- UChar trn[BUF_SIZE];
63
92
  int32_t slen = 0;
64
- int32_t tlen = 0;
65
- UErrorCode status = U_ZERO_ERROR;
93
+ UErrorCode status = U_ZERO_ERROR;
66
94
  UTransliterator *trans;
95
+ VALUE tobj;
67
96
 
68
- to_utf16(string, str, &slen);
69
- to_utf16(transform, trn, &tlen);
97
+ to_utf16(string, str, &slen);
70
98
 
71
- trans = utrans_openU(trn, tlen, UTRANS_FORWARD, NULL, 0, NULL, &status);
72
- if (trans) {
73
- utrans_transUChars(trans, str, &slen, BUF_SIZE, 0, &slen, &status);
74
- utrans_close(trans);
75
- } else {
76
- rb_raise(rb_eArgError, "invalid transform: %s", RSTRING_PTR(transform));
77
- }
99
+ trans = get_trans(transform);
100
+ utrans_transUChars(trans, str, &slen, BUF_SIZE, 0, &slen, &status);
78
101
 
79
102
  to_utf8(str, slen);
80
103
  }
81
-
104
+
82
105
  void Init_icunicode() {
83
106
  rb_define_method(rb_cString, "unicode_sort_key", unicode_sort_key, 0);
84
107
  rb_define_method(rb_cString, "transliterate", unicode_transliterate, 1);
108
+
109
+ trans_hash = rb_hash_new();
110
+ rb_global_variable(&trans_hash);
85
111
  }
@@ -1,7 +1,16 @@
1
1
  require File.dirname(__FILE__) + '/test_helper'
2
2
 
3
3
  class UnicodeCollationTest < Test::Unit::TestCase
4
- should "probably rename this file and start testing for real" do
5
- flunk "hey buddy, you should probably rename this file and start testing for real"
4
+ should "sort using unicode collation" do
5
+ assert_equal ["cafe", "café", "cafes"], ["cafe", "cafes", "café"].sort_by {|s| s.unicode_sort_key}
6
+ assert_equal ["role", "Role", "rôle"], ["rôle", "role", "Role"].sort_by {|s| s.unicode_sort_key}
7
+ assert_equal ["cote", "coté", "côte", "côté"], ["côté", "coté", "cote", "côte"].sort_by {|s| s.unicode_sort_key}
6
8
  end
7
- end
9
+
10
+ should "transliterate" do
11
+ assert_equal "ムクドナルデ'ス", "mcdonald's".transliterate('Katakana')
12
+ assert_equal "さむらい", "samurai".transliterate('Hiragana')
13
+ assert_equal "θε γρεατ γρεεκ", "the great greek".transliterate('Greek')
14
+ assert_equal "фром руссиа уитх лове", "from russia with love".transliterate('Cyrillic')
15
+ end
16
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ninjudd-icunicode
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justin Balthrop
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-08-15 00:00:00 -07:00
12
+ date: 2009-08-17 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies: []
15
15