ninjudd-icunicode 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION.yml +1 -1
- data/ext/icunicode.c +40 -14
- data/test/icunicode_test.rb +12 -3
- metadata +2 -2
data/VERSION.yml
CHANGED
data/ext/icunicode.c
CHANGED
|
@@ -6,6 +6,9 @@
|
|
|
6
6
|
|
|
7
7
|
#define BUF_SIZE 1000
|
|
8
8
|
|
|
9
|
+
VALUE cTransliterator;
|
|
10
|
+
VALUE trans_hash;
|
|
11
|
+
|
|
9
12
|
static void to_utf16(VALUE string, UChar *ustr, int32_t *ulen) {
|
|
10
13
|
UErrorCode status = U_ZERO_ERROR;
|
|
11
14
|
|
|
@@ -50,36 +53,59 @@ static VALUE unicode_sort_key(VALUE string) {
|
|
|
50
53
|
return rb_str_new(str, len - 1);
|
|
51
54
|
}
|
|
52
55
|
|
|
56
|
+
static void trans_free(void *trans) {
|
|
57
|
+
utrans_close(trans);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
static UTransliterator* get_trans(VALUE transform) {
|
|
61
|
+
UChar str[BUF_SIZE];
|
|
62
|
+
int32_t len = 0;
|
|
63
|
+
UTransliterator *trans;
|
|
64
|
+
UErrorCode status = U_ZERO_ERROR;
|
|
65
|
+
VALUE obj;
|
|
66
|
+
|
|
67
|
+
obj = rb_hash_aref(trans_hash, transform);
|
|
68
|
+
if (NIL_P(obj)) {
|
|
69
|
+
to_utf16(transform, str, &len);
|
|
70
|
+
trans = utrans_openU(str, len, UTRANS_FORWARD, NULL, 0, NULL, &status);
|
|
71
|
+
if (trans) {
|
|
72
|
+
obj = Data_Wrap_Struct(rb_cObject, 0, trans_free, trans);
|
|
73
|
+
rb_hash_aset(trans_hash, transform, obj);
|
|
74
|
+
} else {
|
|
75
|
+
rb_raise(rb_eArgError, "invalid transform: %s", RSTRING_PTR(transform));
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
Data_Get_Struct(obj, UTransliterator, trans);
|
|
80
|
+
return trans;
|
|
81
|
+
}
|
|
82
|
+
|
|
53
83
|
/*
|
|
54
84
|
* call-seq:
|
|
55
|
-
* string.transliterate(
|
|
85
|
+
* string.transliterate(transform_string) -> string
|
|
56
86
|
*
|
|
57
87
|
* Transliterates string using transform.
|
|
58
88
|
*
|
|
59
89
|
*/
|
|
60
90
|
static VALUE unicode_transliterate(VALUE string, VALUE transform) {
|
|
61
91
|
UChar str[BUF_SIZE];
|
|
62
|
-
UChar trn[BUF_SIZE];
|
|
63
92
|
int32_t slen = 0;
|
|
64
|
-
|
|
65
|
-
UErrorCode status = U_ZERO_ERROR;
|
|
93
|
+
UErrorCode status = U_ZERO_ERROR;
|
|
66
94
|
UTransliterator *trans;
|
|
95
|
+
VALUE tobj;
|
|
67
96
|
|
|
68
|
-
to_utf16(string,
|
|
69
|
-
to_utf16(transform, trn, &tlen);
|
|
97
|
+
to_utf16(string, str, &slen);
|
|
70
98
|
|
|
71
|
-
trans =
|
|
72
|
-
|
|
73
|
-
utrans_transUChars(trans, str, &slen, BUF_SIZE, 0, &slen, &status);
|
|
74
|
-
utrans_close(trans);
|
|
75
|
-
} else {
|
|
76
|
-
rb_raise(rb_eArgError, "invalid transform: %s", RSTRING_PTR(transform));
|
|
77
|
-
}
|
|
99
|
+
trans = get_trans(transform);
|
|
100
|
+
utrans_transUChars(trans, str, &slen, BUF_SIZE, 0, &slen, &status);
|
|
78
101
|
|
|
79
102
|
to_utf8(str, slen);
|
|
80
103
|
}
|
|
81
|
-
|
|
104
|
+
|
|
82
105
|
void Init_icunicode() {
|
|
83
106
|
rb_define_method(rb_cString, "unicode_sort_key", unicode_sort_key, 0);
|
|
84
107
|
rb_define_method(rb_cString, "transliterate", unicode_transliterate, 1);
|
|
108
|
+
|
|
109
|
+
trans_hash = rb_hash_new();
|
|
110
|
+
rb_global_variable(&trans_hash);
|
|
85
111
|
}
|
data/test/icunicode_test.rb
CHANGED
|
@@ -1,7 +1,16 @@
|
|
|
1
1
|
require File.dirname(__FILE__) + '/test_helper'
|
|
2
2
|
|
|
3
3
|
class UnicodeCollationTest < Test::Unit::TestCase
|
|
4
|
-
should "
|
|
5
|
-
|
|
4
|
+
should "sort using unicode collation" do
|
|
5
|
+
assert_equal ["cafe", "café", "cafes"], ["cafe", "cafes", "café"].sort_by {|s| s.unicode_sort_key}
|
|
6
|
+
assert_equal ["role", "Role", "rôle"], ["rôle", "role", "Role"].sort_by {|s| s.unicode_sort_key}
|
|
7
|
+
assert_equal ["cote", "coté", "côte", "côté"], ["côté", "coté", "cote", "côte"].sort_by {|s| s.unicode_sort_key}
|
|
6
8
|
end
|
|
7
|
-
|
|
9
|
+
|
|
10
|
+
should "transliterate" do
|
|
11
|
+
assert_equal "ムクドナルデ'ス", "mcdonald's".transliterate('Katakana')
|
|
12
|
+
assert_equal "さむらい", "samurai".transliterate('Hiragana')
|
|
13
|
+
assert_equal "θε γρεατ γρεεκ", "the great greek".transliterate('Greek')
|
|
14
|
+
assert_equal "фром руссиа уитх лове", "from russia with love".transliterate('Cyrillic')
|
|
15
|
+
end
|
|
16
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ninjudd-icunicode
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Justin Balthrop
|
|
@@ -9,7 +9,7 @@ autorequire:
|
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
11
|
|
|
12
|
-
date: 2009-08-
|
|
12
|
+
date: 2009-08-17 00:00:00 -07:00
|
|
13
13
|
default_executable:
|
|
14
14
|
dependencies: []
|
|
15
15
|
|