ninjudd-unicode_collation 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc ADDED
@@ -0,0 +1,33 @@
1
+ = Unicode Collation
2
+
3
+ Unicode sorting is complicated (http://unicode.org/reports/tr10/), and Ruby doesn't do it
4
+ correctly. But there is a widely-used implementation of the Unicode collation algorithm in
5
+ the ICU (International Components for Unicode) libraries. This gem is a simple C wrapper to
6
+ add the ucol_getSortKey function from the ICU Collation API to Ruby Strings.
7
+
8
+ == Usage:
9
+
10
+ ['cafe', 'cafes', 'caf.A��'].sort
11
+ => ['cafe', 'cafes', 'caf��']
12
+
13
+ require 'unicode_collation'
14
+
15
+ ['cafe', 'cafes', 'caf��'].sort_by {|s| s.unicode_sort_key}
16
+ => ['cafe', 'caf��', 'cafes']
17
+
18
+ == Install:
19
+
20
+ You must install ICU first. You can download the source from http://site.icu-project.org/download,
21
+ or on Mac, you can install with MacPorts:
22
+
23
+ sudo port install icu
24
+
25
+ sudo gem install ninjudd-unicode-collation -s http://gems.github.com
26
+
27
+ == To do:
28
+
29
+ Add support for locales other than en-US.
30
+
31
+ == License:
32
+
33
+ Copyright (c) 2009 Justin Balthrop, Geni.com; Published under The MIT License, see LICENSE
data/VERSION.yml ADDED
@@ -0,0 +1,4 @@
1
+ ---
2
+ :minor: 0
3
+ :patch: 1
4
+ :major: 0
data/ext/extconf.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'mkmf'
2
+ have_library('icui18n')
3
+ have_library('icuio')
4
+ create_makefile('unicode_collation')
@@ -0,0 +1,42 @@
1
+ #include "ruby.h"
2
+ #include "unicode/ucol.h"
3
+ #include "unicode/ustdio.h"
4
+
5
+ #define BUF_SIZE 1000
6
+
7
+ /*
8
+ * call-seq:
9
+ * string.unicode_sort_key -> string
10
+ *
11
+ * Returns a string that will sort according to the Unicode collation algorithm.
12
+ *
13
+ */
14
+ static VALUE unicode_sort_key(VALUE string) {
15
+ char str[BUF_SIZE];
16
+ UChar ustr[BUF_SIZE];
17
+ int32_t len = 0;
18
+ int32_t ulen = 0;
19
+ UErrorCode status = U_ZERO_ERROR;
20
+ UCollator *col;
21
+
22
+ string = StringValue(string);
23
+ u_strFromUTF8(ustr, BUF_SIZE, &ulen, RSTRING_PTR(string), RSTRING_LEN(string), &status);
24
+ if (status == U_INVALID_CHAR_FOUND) {
25
+ return Qnil;
26
+ }
27
+
28
+ col = ucol_open("en_US", &status);
29
+ if (U_SUCCESS(status)) {
30
+ len = ucol_getSortKey(col, ustr, ulen, (uint8_t*)str, BUF_SIZE);
31
+ ucol_close(col);
32
+ }
33
+ if (len == 0) {
34
+ return Qnil;
35
+ }
36
+
37
+ return rb_str_new(str, len - 1);
38
+ }
39
+
40
+ void Init_unicode_collation() {
41
+ rb_define_method(rb_cString, "unicode_sort_key", unicode_sort_key, 0);
42
+ }
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+ require 'mocha'
5
+
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'unicode_collation'
8
+
9
+ class Test::Unit::TestCase
10
+ end
@@ -0,0 +1,7 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class UnicodeCollationTest < Test::Unit::TestCase
4
+ should "probably rename this file and start testing for real" do
5
+ flunk "hey buddy, you should probably rename this file and start testing for real"
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,60 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ninjudd-unicode_collation
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Justin Balthrop
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-08-12 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Add unicode collation key from ICU library to String.Add unicode collation key from ICU library to Ruby Strings.
17
+ email: code@justinbalthrop.com
18
+ executables: []
19
+
20
+ extensions:
21
+ - ext/extconf.rb
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - README.rdoc
26
+ - VERSION.yml
27
+ - ext/unicode_collation.c
28
+ - ext/extconf.rb
29
+ - test/test_helper.rb
30
+ - test/unicode_collation_test.rb
31
+ has_rdoc: true
32
+ homepage: http://github.com/ninjudd/unicode_collation
33
+ licenses:
34
+ post_install_message:
35
+ rdoc_options:
36
+ - --inline-source
37
+ - --charset=UTF-8
38
+ require_paths:
39
+ - ext
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: "0"
45
+ version:
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: "0"
51
+ version:
52
+ requirements: []
53
+
54
+ rubyforge_project:
55
+ rubygems_version: 1.3.5
56
+ signing_key:
57
+ specification_version: 2
58
+ summary: Add unicode sort key to String.
59
+ test_files: []
60
+