ninjudd-icunicode 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc ADDED
@@ -0,0 +1,43 @@
1
+ = ICUnicode
2
+
3
+ Unicode sorting is complicated (http://unicode.org/reports/tr10), and Ruby doesn't do it
4
+ correctly. But there is a widely-used implementation of the Unicode collation algorithm in
5
+ the ICU (International Components for Unicode) libraries. There is also no way to do
6
+ Transliteration in Ruby (http://userguide.icu-project.org/transforms/general). This gem is
7
+ a simple C wrapper around ucol_getSortKey from the ICU Collation API and utrans_transUChars
8
+ from the ICU Transliteration API. These are added as simple methods on String.
9
+
10
+ == Usage:
11
+
12
+ ["cafe", "cafes", "caf\303\251"].sort
13
+ => ["cafe", "cafes", "caf\303\251"]
14
+
15
+ require 'icunicode'
16
+
17
+ ["cafe", "cafes", "caf\303\251"].sort_by {|s| s.unicode_sort_key}
18
+ => ["cafe", "caf\303\251", "cafes"]
19
+
20
+ "blueberry".transliterate("Katakana").transliterate("Latin")
21
+ => "burueberrui"
22
+
23
+ "blueberry".transliterate("Greek").transliterate("Latin")
24
+ => "blyeberry"
25
+
26
+ == Install:
27
+
28
+ You must install ICU first. You can download the source from http://site.icu-project.org/download,
29
+ or on Mac, you can install with MacPorts:
30
+
31
+ sudo port install icu
32
+
33
+ Then install the gem:
34
+
35
+ sudo gem install ninjudd-icunicode -s http://gems.github.com
36
+
37
+ == To do:
38
+
39
+ Add support for locales other than en-US. Increase buffer size or make it grow dynamically.
40
+
41
+ == License:
42
+
43
+ Copyright (c) 2009 Justin Balthrop, Geni.com; Published under The MIT License, see LICENSE
data/VERSION.yml ADDED
@@ -0,0 +1,4 @@
1
+ ---
2
+ :minor: 0
3
+ :patch: 1
4
+ :major: 0
data/ext/extconf.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'mkmf'
2
+ have_library('icui18n')
3
+ have_library('icuio')
4
+ create_makefile('icunicode')
data/ext/icunicode.c ADDED
@@ -0,0 +1,85 @@
1
+ #include "ruby.h"
2
+ #include "unicode/ucol.h"
3
+ #include "unicode/utrans.h"
4
+ #include "unicode/ustring.h"
5
+ #include "unicode/ustdio.h"
6
+
7
+ #define BUF_SIZE 1000
8
+
9
+ static void to_utf16(VALUE string, UChar *ustr, int32_t *ulen) {
10
+ UErrorCode status = U_ZERO_ERROR;
11
+
12
+ string = StringValue(string);
13
+ u_strFromUTF8(ustr, BUF_SIZE, ulen, RSTRING_PTR(string), RSTRING_LEN(string), &status);
14
+ if (status == U_INVALID_CHAR_FOUND) ulen = 0;
15
+ }
16
+
17
+ static VALUE to_utf8(UChar *ustr, int32_t ulen) {
18
+ char str[BUF_SIZE];
19
+ int32_t len = 0;
20
+ UErrorCode status = U_ZERO_ERROR;
21
+
22
+ u_strToUTF8(str, BUF_SIZE, &len, ustr, ulen, &status);
23
+ if (status == U_INVALID_CHAR_FOUND) len = 0;
24
+ return rb_str_new(str, len);
25
+ }
26
+
27
+ /*
28
+ * call-seq:
29
+ * string.unicode_sort_key -> string
30
+ *
31
+ * Returns a string that will sort according to the Unicode collation algorithm.
32
+ *
33
+ */
34
+ static VALUE unicode_sort_key(VALUE string) {
35
+ char str[BUF_SIZE];
36
+ UChar ustr[BUF_SIZE];
37
+ int32_t len = 0;
38
+ int32_t ulen = 0;
39
+ UErrorCode status = U_ZERO_ERROR;
40
+ UCollator *col;
41
+
42
+ to_utf16(string, ustr, &ulen);
43
+
44
+ col = ucol_open("en_US", &status);
45
+ if (U_SUCCESS(status)) {
46
+ len = ucol_getSortKey(col, ustr, ulen, (uint8_t*)str, BUF_SIZE);
47
+ ucol_close(col);
48
+ }
49
+
50
+ return rb_str_new(str, len - 1);
51
+ }
52
+
53
+ /*
54
+ * call-seq:
55
+ * string.transliterate(transform) -> string
56
+ *
57
+ * Transliterates string using transform.
58
+ *
59
+ */
60
+ static VALUE unicode_transliterate(VALUE string, VALUE transform) {
61
+ UChar str[BUF_SIZE];
62
+ UChar trn[BUF_SIZE];
63
+ int32_t slen = 0;
64
+ int32_t tlen = 0;
65
+ UErrorCode status = U_ZERO_ERROR;
66
+ UTransliterator *trans;
67
+
68
+ to_utf16(string, str, &slen);
69
+ to_utf16(transform, trn, &tlen);
70
+
71
+ trans = utrans_openU(trn, tlen, UTRANS_FORWARD, NULL, 0, NULL, &status);
72
+ if (trans) {
73
+ utrans_transUChars(trans, str, &slen, BUF_SIZE, 0, &slen, &status);
74
+ utrans_close(trans);
75
+ } else {
76
+ rb_raise(rb_eArgError, "invalid transform: %s", RSTRING_PTR(transform));
77
+ }
78
+
79
+ to_utf8(str, slen);
80
+ }
81
+
82
+ void Init_icunicode() {
83
+ rb_define_method(rb_cString, "unicode_sort_key", unicode_sort_key, 0);
84
+ rb_define_method(rb_cString, "transliterate", unicode_transliterate, 1);
85
+ }
@@ -0,0 +1,7 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class UnicodeCollationTest < Test::Unit::TestCase
4
+ should "probably rename this file and start testing for real" do
5
+ flunk "hey buddy, you should probably rename this file and start testing for real"
6
+ end
7
+ end
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+ require 'mocha'
5
+
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'icunicode'
8
+
9
+ class Test::Unit::TestCase
10
+ end
metadata ADDED
@@ -0,0 +1,60 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ninjudd-icunicode
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Justin Balthrop
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-08-15 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: ICU Unicode Transliteration and Collation in Ruby.
17
+ email: code@justinbalthrop.com
18
+ executables: []
19
+
20
+ extensions:
21
+ - ext/extconf.rb
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - README.rdoc
26
+ - VERSION.yml
27
+ - ext/icunicode.c
28
+ - ext/extconf.rb
29
+ - test/test_helper.rb
30
+ - test/icunicode_test.rb
31
+ has_rdoc: true
32
+ homepage: http://github.com/ninjudd/unicode_collation
33
+ licenses:
34
+ post_install_message:
35
+ rdoc_options:
36
+ - --inline-source
37
+ - --charset=UTF-8
38
+ require_paths:
39
+ - ext
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: "0"
45
+ version:
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: "0"
51
+ version:
52
+ requirements: []
53
+
54
+ rubyforge_project:
55
+ rubygems_version: 1.3.5
56
+ signing_key:
57
+ specification_version: 2
58
+ summary: Unicode Transliteration and Collation in Ruby.
59
+ test_files: []
60
+