ninjudd-unicode_collation 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +33 -0
- data/VERSION.yml +4 -0
- data/ext/extconf.rb +4 -0
- data/ext/unicode_collation.c +42 -0
- data/test/test_helper.rb +10 -0
- data/test/unicode_collation_test.rb +7 -0
- metadata +60 -0
data/README.rdoc
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
= Unicode Collation
|
2
|
+
|
3
|
+
Unicode sorting is complicated (http://unicode.org/reports/tr10/), and Ruby doesn't do it
|
4
|
+
correctly. But there is a widely-used implementation of the Unicode collation algorithm in
|
5
|
+
the ICU (International Components for Unicode) libraries. This gem is a simple C wrapper to
|
6
|
+
add the ucol_getSortKey function from the ICU Collation API to Ruby Strings.
|
7
|
+
|
8
|
+
== Usage:
|
9
|
+
|
10
|
+
['cafe', 'cafes', 'caf.A��'].sort
|
11
|
+
=> ['cafe', 'cafes', 'caf��']
|
12
|
+
|
13
|
+
require 'unicode_collation'
|
14
|
+
|
15
|
+
['cafe', 'cafes', 'caf��'].sort_by {|s| s.unicode_sort_key}
|
16
|
+
=> ['cafe', 'caf��', 'cafes']
|
17
|
+
|
18
|
+
== Install:
|
19
|
+
|
20
|
+
You must install ICU first. You can download the source from http://site.icu-project.org/download,
|
21
|
+
or on Mac, you can install with MacPorts:
|
22
|
+
|
23
|
+
sudo port install icu
|
24
|
+
|
25
|
+
sudo gem install ninjudd-unicode-collation -s http://gems.github.com
|
26
|
+
|
27
|
+
== To do:
|
28
|
+
|
29
|
+
Add support for locales other than en-US.
|
30
|
+
|
31
|
+
== License:
|
32
|
+
|
33
|
+
Copyright (c) 2009 Justin Balthrop, Geni.com; Published under The MIT License, see LICENSE
|
data/VERSION.yml
ADDED
data/ext/extconf.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "unicode/ucol.h"
|
3
|
+
#include "unicode/ustdio.h"
|
4
|
+
|
5
|
+
#define BUF_SIZE 1000
|
6
|
+
|
7
|
+
/*
|
8
|
+
* call-seq:
|
9
|
+
* string.unicode_sort_key -> string
|
10
|
+
*
|
11
|
+
* Returns a string that will sort according to the Unicode collation algorithm.
|
12
|
+
*
|
13
|
+
*/
|
14
|
+
static VALUE unicode_sort_key(VALUE string) {
|
15
|
+
char str[BUF_SIZE];
|
16
|
+
UChar ustr[BUF_SIZE];
|
17
|
+
int32_t len = 0;
|
18
|
+
int32_t ulen = 0;
|
19
|
+
UErrorCode status = U_ZERO_ERROR;
|
20
|
+
UCollator *col;
|
21
|
+
|
22
|
+
string = StringValue(string);
|
23
|
+
u_strFromUTF8(ustr, BUF_SIZE, &ulen, RSTRING_PTR(string), RSTRING_LEN(string), &status);
|
24
|
+
if (status == U_INVALID_CHAR_FOUND) {
|
25
|
+
return Qnil;
|
26
|
+
}
|
27
|
+
|
28
|
+
col = ucol_open("en_US", &status);
|
29
|
+
if (U_SUCCESS(status)) {
|
30
|
+
len = ucol_getSortKey(col, ustr, ulen, (uint8_t*)str, BUF_SIZE);
|
31
|
+
ucol_close(col);
|
32
|
+
}
|
33
|
+
if (len == 0) {
|
34
|
+
return Qnil;
|
35
|
+
}
|
36
|
+
|
37
|
+
return rb_str_new(str, len - 1);
|
38
|
+
}
|
39
|
+
|
40
|
+
void Init_unicode_collation() {
|
41
|
+
rb_define_method(rb_cString, "unicode_sort_key", unicode_sort_key, 0);
|
42
|
+
}
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ninjudd-unicode_collation
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Justin Balthrop
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-08-12 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Add unicode collation key from ICU library to String.Add unicode collation key from ICU library to Ruby Strings.
|
17
|
+
email: code@justinbalthrop.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions:
|
21
|
+
- ext/extconf.rb
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- README.rdoc
|
26
|
+
- VERSION.yml
|
27
|
+
- ext/unicode_collation.c
|
28
|
+
- ext/extconf.rb
|
29
|
+
- test/test_helper.rb
|
30
|
+
- test/unicode_collation_test.rb
|
31
|
+
has_rdoc: true
|
32
|
+
homepage: http://github.com/ninjudd/unicode_collation
|
33
|
+
licenses:
|
34
|
+
post_install_message:
|
35
|
+
rdoc_options:
|
36
|
+
- --inline-source
|
37
|
+
- --charset=UTF-8
|
38
|
+
require_paths:
|
39
|
+
- ext
|
40
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: "0"
|
45
|
+
version:
|
46
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: "0"
|
51
|
+
version:
|
52
|
+
requirements: []
|
53
|
+
|
54
|
+
rubyforge_project:
|
55
|
+
rubygems_version: 1.3.5
|
56
|
+
signing_key:
|
57
|
+
specification_version: 2
|
58
|
+
summary: Add unicode sort key to String.
|
59
|
+
test_files: []
|
60
|
+
|