ffi-icu 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/ffi-icu.rb +7 -4
- data/lib/ffi-icu/break_iterator.rb +3 -1
- data/lib/ffi-icu/chardet.rb +3 -10
- data/lib/ffi-icu/collation.rb +16 -19
- data/lib/ffi-icu/core_ext/string.rb +4 -0
- data/lib/ffi-icu/lib.rb +27 -17
- data/lib/ffi-icu/normalization.rb +1 -1
- data/lib/ffi-icu/transliteration.rb +4 -10
- data/lib/ffi-icu/version.rb +1 -1
- data/spec/break_iterator_spec.rb +13 -0
- data/spec/chardet_spec.rb +0 -1
- data/spec/collation_spec.rb +10 -2
- data/spec/transliteration_spec.rb +0 -2
- metadata +1 -1
data/lib/ffi-icu.rb
CHANGED
@@ -10,6 +10,8 @@ module ICU
|
|
10
10
|
:osx
|
11
11
|
when /linux/
|
12
12
|
:linux
|
13
|
+
when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
|
14
|
+
:windows
|
13
15
|
else
|
14
16
|
os
|
15
17
|
end
|
@@ -20,6 +22,11 @@ module ICU
|
|
20
22
|
end
|
21
23
|
end
|
22
24
|
|
25
|
+
unless ICU.ruby19?
|
26
|
+
require 'jcode'
|
27
|
+
$KCODE = 'u'
|
28
|
+
end
|
29
|
+
|
23
30
|
require "ffi-icu/core_ext/string"
|
24
31
|
require "ffi-icu/lib"
|
25
32
|
require "ffi-icu/uchar"
|
@@ -29,7 +36,3 @@ require "ffi-icu/transliteration"
|
|
29
36
|
require "ffi-icu/normalization"
|
30
37
|
require "ffi-icu/break_iterator"
|
31
38
|
|
32
|
-
unless ICU.ruby19?
|
33
|
-
require 'jcode'
|
34
|
-
$KCODE = 'u'
|
35
|
-
end
|
@@ -18,11 +18,13 @@ module ICU
|
|
18
18
|
|
19
19
|
def text=(str)
|
20
20
|
Lib.check_error { |err|
|
21
|
-
Lib.ubrk_setText @iterator, UCharPointer.from_string(str), str.
|
21
|
+
Lib.ubrk_setText @iterator, UCharPointer.from_string(str), str.jlength, err
|
22
22
|
}
|
23
23
|
end
|
24
24
|
|
25
25
|
def each(&blk)
|
26
|
+
return to_enum(:each) unless block_given?
|
27
|
+
|
26
28
|
int = first
|
27
29
|
|
28
30
|
while int != UBRK_DONE
|
data/lib/ffi-icu/chardet.rb
CHANGED
@@ -2,18 +2,15 @@ module ICU
|
|
2
2
|
module CharDet
|
3
3
|
|
4
4
|
def self.detect(string)
|
5
|
-
|
6
|
-
res = detector.detect string
|
7
|
-
detector.close
|
8
|
-
|
9
|
-
res
|
5
|
+
Detector.new.detect string
|
10
6
|
end
|
11
7
|
|
12
8
|
class Detector
|
13
9
|
Match = Struct.new(:name, :confidence, :language)
|
14
10
|
|
15
11
|
def initialize
|
16
|
-
|
12
|
+
ptr = Lib.check_error { |err| Lib.ucsdet_open err }
|
13
|
+
@detector = FFI::AutoPointer.new(ptr, Lib.method(:ucsdet_close))
|
17
14
|
end
|
18
15
|
|
19
16
|
def input_filter_enabled?
|
@@ -30,10 +27,6 @@ module ICU
|
|
30
27
|
end
|
31
28
|
end
|
32
29
|
|
33
|
-
def close
|
34
|
-
Lib.ucsdet_close @detector
|
35
|
-
end
|
36
|
-
|
37
30
|
def detect(str)
|
38
31
|
set_text(str)
|
39
32
|
|
data/lib/ffi-icu/collation.rb
CHANGED
@@ -2,11 +2,7 @@ module ICU
|
|
2
2
|
module Collation
|
3
3
|
|
4
4
|
def self.collate(locale, arr)
|
5
|
-
|
6
|
-
res = collator.collate(arr)
|
7
|
-
collator.close
|
8
|
-
|
9
|
-
res
|
5
|
+
Collator.new(locale).collate(arr)
|
10
6
|
end
|
11
7
|
|
12
8
|
def self.keywords
|
@@ -34,7 +30,8 @@ module ICU
|
|
34
30
|
ULOC_VALID_LOCALE = 1
|
35
31
|
|
36
32
|
def initialize(locale)
|
37
|
-
|
33
|
+
ptr = Lib.check_error { |error| Lib.ucol_open(locale, error) }
|
34
|
+
@c = FFI::AutoPointer.new(ptr, Lib.method(:ucol_close))
|
38
35
|
end
|
39
36
|
|
40
37
|
def locale
|
@@ -44,19 +41,19 @@ module ICU
|
|
44
41
|
def compare(a, b)
|
45
42
|
Lib.ucol_strcoll(
|
46
43
|
@c,
|
47
|
-
UCharPointer.from_string(a), a.
|
48
|
-
UCharPointer.from_string(b), b.
|
44
|
+
UCharPointer.from_string(a), a.jlength,
|
45
|
+
UCharPointer.from_string(b), b.jlength
|
49
46
|
)
|
50
47
|
end
|
51
48
|
|
52
49
|
def greater?(a, b)
|
53
|
-
Lib.ucol_greater(@c, UCharPointer.from_string(a), a.
|
54
|
-
UCharPointer.from_string(b), b.
|
50
|
+
Lib.ucol_greater(@c, UCharPointer.from_string(a), a.jlength,
|
51
|
+
UCharPointer.from_string(b), b.jlength)
|
55
52
|
end
|
56
53
|
|
57
54
|
def greater_or_equal?(a, b)
|
58
|
-
Lib.ucol_greaterOrEqual(@c, UCharPointer.from_string(a), a.
|
59
|
-
UCharPointer.from_string(b), b.
|
55
|
+
Lib.ucol_greaterOrEqual(@c, UCharPointer.from_string(a), a.jlength,
|
56
|
+
UCharPointer.from_string(b), b.jlength)
|
60
57
|
end
|
61
58
|
|
62
59
|
def equal?(*args)
|
@@ -68,16 +65,16 @@ module ICU
|
|
68
65
|
|
69
66
|
a, b = args
|
70
67
|
|
71
|
-
Lib.ucol_equal(@c, UCharPointer.from_string(a), a.
|
72
|
-
UCharPointer.from_string(b), b.
|
68
|
+
Lib.ucol_equal(@c, UCharPointer.from_string(a), a.jlength,
|
69
|
+
UCharPointer.from_string(b), b.jlength)
|
73
70
|
end
|
74
71
|
|
75
|
-
def collate(
|
76
|
-
|
77
|
-
|
72
|
+
def collate(sortable)
|
73
|
+
unless sortable.respond_to?(:sort)
|
74
|
+
raise ArgumentError, "argument must respond to :sort with arity of 2"
|
75
|
+
end
|
78
76
|
|
79
|
-
|
80
|
-
Lib.ucol_close(@c)
|
77
|
+
sortable.sort { |a, b| compare a, b }
|
81
78
|
end
|
82
79
|
end # Collator
|
83
80
|
|
data/lib/ffi-icu/lib.rb
CHANGED
@@ -9,10 +9,11 @@ module ICU
|
|
9
9
|
extend FFI::Library
|
10
10
|
|
11
11
|
VERSIONS = {
|
12
|
-
"
|
13
|
-
"
|
12
|
+
"48" => "_48",
|
13
|
+
"46" => "_46",
|
14
14
|
"45" => "_45",
|
15
|
-
"
|
15
|
+
"44" => "_44",
|
16
|
+
"42" => "_4_2",
|
16
17
|
}
|
17
18
|
|
18
19
|
# FIXME: this is incredibly ugly, figure out some better way
|
@@ -33,27 +34,34 @@ module ICU
|
|
33
34
|
end
|
34
35
|
end
|
35
36
|
|
37
|
+
libs = nil
|
38
|
+
versions = VERSIONS.keys
|
39
|
+
|
36
40
|
# ok, try to find it
|
37
41
|
case ICU.platform
|
38
42
|
when :osx
|
39
43
|
ffi_lib "icucore"
|
40
44
|
when :linux
|
41
|
-
versions = VERSIONS.keys
|
42
45
|
libs = ffi_lib versions.map { |v| "libicui18n.so.#{v}" },
|
43
46
|
versions.map { |v| "libicutu.so.#{v}" }
|
44
47
|
|
45
|
-
|
46
|
-
|
47
|
-
suffix = func_version
|
48
|
-
end
|
49
|
-
end
|
48
|
+
when :windows
|
49
|
+
libs = ffi_lib versions.map { |v| "icuin#{v}.dll" }
|
50
50
|
else
|
51
51
|
raise LoadError
|
52
52
|
end
|
53
53
|
|
54
|
+
if libs
|
55
|
+
lib_name = libs.first.name
|
56
|
+
version = VERSIONS.find { |object, func| lib_name =~ /#{object}(\.dll)?$/ }
|
57
|
+
|
58
|
+
version or raise "unable to find suffix in #{lib_name}"
|
59
|
+
suffix = version.last
|
60
|
+
end
|
61
|
+
|
54
62
|
suffix
|
55
63
|
rescue LoadError => ex
|
56
|
-
raise LoadError, "no idea how to load ICU on #{ICU.platform}, patches appreciated! (#{ex.message})"
|
64
|
+
raise LoadError, "no idea how to load ICU on #{ICU.platform.inspect}, patches appreciated! (#{ex.message})"
|
57
65
|
end
|
58
66
|
|
59
67
|
def self.check_error
|
@@ -98,8 +106,8 @@ module ICU
|
|
98
106
|
|
99
107
|
attach_function :u_errorName, "u_errorName#{suffix}", [:int], :string
|
100
108
|
attach_function :uenum_count, "uenum_count#{suffix}", [:pointer, :pointer], :int
|
101
|
-
attach_function :uenum_close, "uenum_close#{suffix}",
|
102
|
-
attach_function :uenum_next, "uenum_next#{suffix}",
|
109
|
+
attach_function :uenum_close, "uenum_close#{suffix}", [:pointer], :void
|
110
|
+
attach_function :uenum_next, "uenum_next#{suffix}", [:pointer, :pointer, :pointer], :string
|
103
111
|
attach_function :u_charsToUChars, "u_charsToUChars#{suffix}", [:string, :pointer, :int32_t], :void
|
104
112
|
attach_function :u_UCharsToChars, "u_UCharsToChars#{suffix}", [:pointer, :string, :int32_t], :void
|
105
113
|
|
@@ -126,11 +134,11 @@ module ICU
|
|
126
134
|
# http://icu-project.org/apiref/icu4c/ucol_8h.html
|
127
135
|
#
|
128
136
|
|
129
|
-
attach_function :ucol_open, "ucol_open#{suffix}",
|
130
|
-
attach_function :ucol_close, "ucol_close#{suffix}",
|
131
|
-
attach_function :ucol_strcoll, "ucol_strcoll#{suffix}",
|
132
|
-
attach_function :ucol_getKeywords, "ucol_getKeywords#{suffix}",
|
133
|
-
attach_function :ucol_getKeywordValues, "ucol_getKeywordValues#{suffix}",
|
137
|
+
attach_function :ucol_open, "ucol_open#{suffix}", [:string, :pointer], :pointer
|
138
|
+
attach_function :ucol_close, "ucol_close#{suffix}", [:pointer], :void
|
139
|
+
attach_function :ucol_strcoll, "ucol_strcoll#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :int32_t], :int
|
140
|
+
attach_function :ucol_getKeywords, "ucol_getKeywords#{suffix}", [:pointer], :pointer
|
141
|
+
attach_function :ucol_getKeywordValues, "ucol_getKeywordValues#{suffix}", [:string, :pointer], :pointer
|
134
142
|
attach_function :ucol_getAvailable, "ucol_getAvailable#{suffix}", [:int32_t], :string
|
135
143
|
attach_function :ucol_countAvailable, "ucol_countAvailable#{suffix}", [], :int32_t
|
136
144
|
attach_function :ucol_getLocale, "ucol_getLocale#{suffix}", [:pointer, :int, :pointer], :string
|
@@ -189,6 +197,8 @@ module ICU
|
|
189
197
|
#
|
190
198
|
# Text Boundary Analysis
|
191
199
|
#
|
200
|
+
# http://icu-project.org/apiref/icu4c/ubrk_8h.html
|
201
|
+
#
|
192
202
|
|
193
203
|
enum :iterator_type, [ :character, :word, :line, :sentence, :title]
|
194
204
|
enum :word_break, [ :none, 0,
|
@@ -2,7 +2,7 @@ module ICU
|
|
2
2
|
module Normalization
|
3
3
|
|
4
4
|
def self.normalize(input, mode = :default)
|
5
|
-
input_length = input.
|
5
|
+
input_length = input.jlength
|
6
6
|
needed_length = out_length = options = 0
|
7
7
|
in_ptr = UCharPointer.from_string(input)
|
8
8
|
out_ptr = UCharPointer.new(out_length)
|
@@ -4,10 +4,7 @@ module ICU
|
|
4
4
|
class << self
|
5
5
|
def transliterate(translit_id, str, rules = nil)
|
6
6
|
t = Transliterator.new translit_id, rules
|
7
|
-
|
8
|
-
t.close
|
9
|
-
|
10
|
-
res
|
7
|
+
t.transliterate str
|
11
8
|
end
|
12
9
|
alias_method :translit, :transliterate
|
13
10
|
|
@@ -29,7 +26,7 @@ module ICU
|
|
29
26
|
rules_length = 0
|
30
27
|
|
31
28
|
if rules
|
32
|
-
rules_length = rules.
|
29
|
+
rules_length = rules.jlength + 1
|
33
30
|
rules = UCharPointer.from_string(rules)
|
34
31
|
end
|
35
32
|
|
@@ -37,7 +34,8 @@ module ICU
|
|
37
34
|
begin
|
38
35
|
Lib.check_error do |status|
|
39
36
|
# couldn't get utrans_openU to work properly, so using deprecated utrans_open for now
|
40
|
-
|
37
|
+
ptr = Lib.utrans_openU(UCharPointer.from_string(id), id.jlength, direction, rules, rules_length, @parse_error, status)
|
38
|
+
@tr = FFI::AutoPointer.new(ptr, Lib.method(:utrans_close))
|
41
39
|
end
|
42
40
|
rescue ICU::Error => ex
|
43
41
|
raise ex, "#{ex.message} (#{parse_error})"
|
@@ -80,10 +78,6 @@ module ICU
|
|
80
78
|
buf.string text_length.get_int32(0)
|
81
79
|
end
|
82
80
|
|
83
|
-
def close
|
84
|
-
Lib.utrans_close @tr
|
85
|
-
end
|
86
|
-
|
87
81
|
end # Transliterator
|
88
82
|
end # Translit
|
89
83
|
end # ICU
|
data/lib/ffi-icu/version.rb
CHANGED
data/spec/break_iterator_spec.rb
CHANGED
@@ -18,6 +18,12 @@ module ICU
|
|
18
18
|
iterator.to_a.should == [0, 5, 6, 11, 12, 17, 18, 21, 22, 26, 27, 28, 39, 40, 51, 52, 56, 57, 58, 61, 62, 64, 65, 72, 73, 79, 80, 90, 91, 93, 94, 100, 101, 103, 104, 110, 111, 116, 117, 123, 124]
|
19
19
|
end
|
20
20
|
|
21
|
+
it "finds all word boundaries in a Thai string" do
|
22
|
+
iterator = BreakIterator.new :word, "th_TH"
|
23
|
+
iterator.text = "การทดลอง"
|
24
|
+
iterator.to_a.should == [0, 3, 8]
|
25
|
+
end
|
26
|
+
|
21
27
|
it "finds all sentence boundaries in an English string" do
|
22
28
|
iterator = BreakIterator.new :sentence, "en_US"
|
23
29
|
iterator.text = "This is a sentence. This is another sentence, with a comma in it."
|
@@ -45,5 +51,12 @@ module ICU
|
|
45
51
|
iterator.should_not be_boundary(10)
|
46
52
|
end
|
47
53
|
|
54
|
+
it "returns an Enumerator if no block was given" do
|
55
|
+
iterator = BreakIterator.new :word, "nb"
|
56
|
+
expected = ICU.ruby19? ? Enumerator : Enumerable::Enumerator
|
57
|
+
|
58
|
+
iterator.each.should be_kind_of(expected)
|
59
|
+
end
|
60
|
+
|
48
61
|
end # BreakIterator
|
49
62
|
end # ICU
|
data/spec/chardet_spec.rb
CHANGED
data/spec/collation_spec.rb
CHANGED
@@ -4,15 +4,24 @@ require 'spec_helper'
|
|
4
4
|
|
5
5
|
module ICU
|
6
6
|
module Collation
|
7
|
+
describe "Collation" do
|
8
|
+
it "should collate an array of strings" do
|
9
|
+
Collation.collate("nb", %w[æ å ø]).should == %w[æ ø å]
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
7
13
|
describe Collator do
|
8
14
|
|
9
15
|
before { @c = Collator.new("nb") }
|
10
|
-
after { @c.close }
|
11
16
|
|
12
17
|
it "should collate an array of strings" do
|
13
18
|
@c.collate(%w[å ø æ]).should == %w[æ ø å]
|
14
19
|
end
|
15
20
|
|
21
|
+
it "raises an error if argument does not respond to :sort" do
|
22
|
+
lambda { @c.collate(1) }.should raise_error(ArgumentError)
|
23
|
+
end
|
24
|
+
|
16
25
|
it "should return available locales" do
|
17
26
|
locales = ICU::Collation.available_locales
|
18
27
|
locales.should be_kind_of(Array)
|
@@ -22,7 +31,6 @@ module ICU
|
|
22
31
|
|
23
32
|
it "should return the locale of the collator" do
|
24
33
|
l = @c.locale
|
25
|
-
l.should be_kind_of(String)
|
26
34
|
l.should == "nb"
|
27
35
|
end
|
28
36
|
|