ninjudd-unicode_collation 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +33 -0
- data/VERSION.yml +4 -0
- data/ext/extconf.rb +4 -0
- data/ext/unicode_collation.c +42 -0
- data/test/test_helper.rb +10 -0
- data/test/unicode_collation_test.rb +7 -0
- metadata +60 -0
data/README.rdoc
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
= Unicode Collation
|
2
|
+
|
3
|
+
Unicode sorting is complicated (http://unicode.org/reports/tr10/), and Ruby doesn't do it
|
4
|
+
correctly. But there is a widely-used implementation of the Unicode collation algorithm in
|
5
|
+
the ICU (International Components for Unicode) libraries. This gem is a simple C wrapper to
|
6
|
+
add the ucol_getSortKey function from the ICU Collation API to Ruby Strings.
|
7
|
+
|
8
|
+
== Usage:
|
9
|
+
|
10
|
+
['cafe', 'cafes', 'caf.A��'].sort
|
11
|
+
=> ['cafe', 'cafes', 'caf��']
|
12
|
+
|
13
|
+
require 'unicode_collation'
|
14
|
+
|
15
|
+
['cafe', 'cafes', 'caf��'].sort_by {|s| s.unicode_sort_key}
|
16
|
+
=> ['cafe', 'caf��', 'cafes']
|
17
|
+
|
18
|
+
== Install:
|
19
|
+
|
20
|
+
You must install ICU first. You can download the source from http://site.icu-project.org/download,
|
21
|
+
or on Mac, you can install with MacPorts:
|
22
|
+
|
23
|
+
sudo port install icu
|
24
|
+
|
25
|
+
sudo gem install ninjudd-unicode-collation -s http://gems.github.com
|
26
|
+
|
27
|
+
== To do:
|
28
|
+
|
29
|
+
Add support for locales other than en-US.
|
30
|
+
|
31
|
+
== License:
|
32
|
+
|
33
|
+
Copyright (c) 2009 Justin Balthrop, Geni.com; Published under The MIT License, see LICENSE
|
data/VERSION.yml
ADDED
data/ext/extconf.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "unicode/ucol.h"
|
3
|
+
#include "unicode/ustdio.h"
|
4
|
+
|
5
|
+
#define BUF_SIZE 1000
|
6
|
+
|
7
|
+
/*
|
8
|
+
* call-seq:
|
9
|
+
* string.unicode_sort_key -> string
|
10
|
+
*
|
11
|
+
* Returns a string that will sort according to the Unicode collation algorithm.
|
12
|
+
*
|
13
|
+
*/
|
14
|
+
static VALUE unicode_sort_key(VALUE string) {
|
15
|
+
char str[BUF_SIZE];
|
16
|
+
UChar ustr[BUF_SIZE];
|
17
|
+
int32_t len = 0;
|
18
|
+
int32_t ulen = 0;
|
19
|
+
UErrorCode status = U_ZERO_ERROR;
|
20
|
+
UCollator *col;
|
21
|
+
|
22
|
+
string = StringValue(string);
|
23
|
+
u_strFromUTF8(ustr, BUF_SIZE, &ulen, RSTRING_PTR(string), RSTRING_LEN(string), &status);
|
24
|
+
if (status == U_INVALID_CHAR_FOUND) {
|
25
|
+
return Qnil;
|
26
|
+
}
|
27
|
+
|
28
|
+
col = ucol_open("en_US", &status);
|
29
|
+
if (U_SUCCESS(status)) {
|
30
|
+
len = ucol_getSortKey(col, ustr, ulen, (uint8_t*)str, BUF_SIZE);
|
31
|
+
ucol_close(col);
|
32
|
+
}
|
33
|
+
if (len == 0) {
|
34
|
+
return Qnil;
|
35
|
+
}
|
36
|
+
|
37
|
+
return rb_str_new(str, len - 1);
|
38
|
+
}
|
39
|
+
|
40
|
+
void Init_unicode_collation() {
|
41
|
+
rb_define_method(rb_cString, "unicode_sort_key", unicode_sort_key, 0);
|
42
|
+
}
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ninjudd-unicode_collation
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Justin Balthrop
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-08-12 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Add unicode collation key from ICU library to String.Add unicode collation key from ICU library to Ruby Strings.
|
17
|
+
email: code@justinbalthrop.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions:
|
21
|
+
- ext/extconf.rb
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- README.rdoc
|
26
|
+
- VERSION.yml
|
27
|
+
- ext/unicode_collation.c
|
28
|
+
- ext/extconf.rb
|
29
|
+
- test/test_helper.rb
|
30
|
+
- test/unicode_collation_test.rb
|
31
|
+
has_rdoc: true
|
32
|
+
homepage: http://github.com/ninjudd/unicode_collation
|
33
|
+
licenses:
|
34
|
+
post_install_message:
|
35
|
+
rdoc_options:
|
36
|
+
- --inline-source
|
37
|
+
- --charset=UTF-8
|
38
|
+
require_paths:
|
39
|
+
- ext
|
40
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: "0"
|
45
|
+
version:
|
46
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: "0"
|
51
|
+
version:
|
52
|
+
requirements: []
|
53
|
+
|
54
|
+
rubyforge_project:
|
55
|
+
rubygems_version: 1.3.5
|
56
|
+
signing_key:
|
57
|
+
specification_version: 2
|
58
|
+
summary: Add unicode sort key to String.
|
59
|
+
test_files: []
|
60
|
+
|