ffi-icu 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/ffi-icu.rb +7 -4
- data/lib/ffi-icu/break_iterator.rb +3 -1
- data/lib/ffi-icu/chardet.rb +3 -10
- data/lib/ffi-icu/collation.rb +16 -19
- data/lib/ffi-icu/core_ext/string.rb +4 -0
- data/lib/ffi-icu/lib.rb +27 -17
- data/lib/ffi-icu/normalization.rb +1 -1
- data/lib/ffi-icu/transliteration.rb +4 -10
- data/lib/ffi-icu/version.rb +1 -1
- data/spec/break_iterator_spec.rb +13 -0
- data/spec/chardet_spec.rb +0 -1
- data/spec/collation_spec.rb +10 -2
- data/spec/transliteration_spec.rb +0 -2
- metadata +1 -1
data/lib/ffi-icu.rb
CHANGED
@@ -10,6 +10,8 @@ module ICU
|
|
10
10
|
:osx
|
11
11
|
when /linux/
|
12
12
|
:linux
|
13
|
+
when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
|
14
|
+
:windows
|
13
15
|
else
|
14
16
|
os
|
15
17
|
end
|
@@ -20,6 +22,11 @@ module ICU
|
|
20
22
|
end
|
21
23
|
end
|
22
24
|
|
25
|
+
unless ICU.ruby19?
|
26
|
+
require 'jcode'
|
27
|
+
$KCODE = 'u'
|
28
|
+
end
|
29
|
+
|
23
30
|
require "ffi-icu/core_ext/string"
|
24
31
|
require "ffi-icu/lib"
|
25
32
|
require "ffi-icu/uchar"
|
@@ -29,7 +36,3 @@ require "ffi-icu/transliteration"
|
|
29
36
|
require "ffi-icu/normalization"
|
30
37
|
require "ffi-icu/break_iterator"
|
31
38
|
|
32
|
-
unless ICU.ruby19?
|
33
|
-
require 'jcode'
|
34
|
-
$KCODE = 'u'
|
35
|
-
end
|
@@ -18,11 +18,13 @@ module ICU
|
|
18
18
|
|
19
19
|
def text=(str)
|
20
20
|
Lib.check_error { |err|
|
21
|
-
Lib.ubrk_setText @iterator, UCharPointer.from_string(str), str.
|
21
|
+
Lib.ubrk_setText @iterator, UCharPointer.from_string(str), str.jlength, err
|
22
22
|
}
|
23
23
|
end
|
24
24
|
|
25
25
|
def each(&blk)
|
26
|
+
return to_enum(:each) unless block_given?
|
27
|
+
|
26
28
|
int = first
|
27
29
|
|
28
30
|
while int != UBRK_DONE
|
data/lib/ffi-icu/chardet.rb
CHANGED
@@ -2,18 +2,15 @@ module ICU
|
|
2
2
|
module CharDet
|
3
3
|
|
4
4
|
def self.detect(string)
|
5
|
-
|
6
|
-
res = detector.detect string
|
7
|
-
detector.close
|
8
|
-
|
9
|
-
res
|
5
|
+
Detector.new.detect string
|
10
6
|
end
|
11
7
|
|
12
8
|
class Detector
|
13
9
|
Match = Struct.new(:name, :confidence, :language)
|
14
10
|
|
15
11
|
def initialize
|
16
|
-
|
12
|
+
ptr = Lib.check_error { |err| Lib.ucsdet_open err }
|
13
|
+
@detector = FFI::AutoPointer.new(ptr, Lib.method(:ucsdet_close))
|
17
14
|
end
|
18
15
|
|
19
16
|
def input_filter_enabled?
|
@@ -30,10 +27,6 @@ module ICU
|
|
30
27
|
end
|
31
28
|
end
|
32
29
|
|
33
|
-
def close
|
34
|
-
Lib.ucsdet_close @detector
|
35
|
-
end
|
36
|
-
|
37
30
|
def detect(str)
|
38
31
|
set_text(str)
|
39
32
|
|
data/lib/ffi-icu/collation.rb
CHANGED
@@ -2,11 +2,7 @@ module ICU
|
|
2
2
|
module Collation
|
3
3
|
|
4
4
|
def self.collate(locale, arr)
|
5
|
-
|
6
|
-
res = collator.collate(arr)
|
7
|
-
collator.close
|
8
|
-
|
9
|
-
res
|
5
|
+
Collator.new(locale).collate(arr)
|
10
6
|
end
|
11
7
|
|
12
8
|
def self.keywords
|
@@ -34,7 +30,8 @@ module ICU
|
|
34
30
|
ULOC_VALID_LOCALE = 1
|
35
31
|
|
36
32
|
def initialize(locale)
|
37
|
-
|
33
|
+
ptr = Lib.check_error { |error| Lib.ucol_open(locale, error) }
|
34
|
+
@c = FFI::AutoPointer.new(ptr, Lib.method(:ucol_close))
|
38
35
|
end
|
39
36
|
|
40
37
|
def locale
|
@@ -44,19 +41,19 @@ module ICU
|
|
44
41
|
def compare(a, b)
|
45
42
|
Lib.ucol_strcoll(
|
46
43
|
@c,
|
47
|
-
UCharPointer.from_string(a), a.
|
48
|
-
UCharPointer.from_string(b), b.
|
44
|
+
UCharPointer.from_string(a), a.jlength,
|
45
|
+
UCharPointer.from_string(b), b.jlength
|
49
46
|
)
|
50
47
|
end
|
51
48
|
|
52
49
|
def greater?(a, b)
|
53
|
-
Lib.ucol_greater(@c, UCharPointer.from_string(a), a.
|
54
|
-
UCharPointer.from_string(b), b.
|
50
|
+
Lib.ucol_greater(@c, UCharPointer.from_string(a), a.jlength,
|
51
|
+
UCharPointer.from_string(b), b.jlength)
|
55
52
|
end
|
56
53
|
|
57
54
|
def greater_or_equal?(a, b)
|
58
|
-
Lib.ucol_greaterOrEqual(@c, UCharPointer.from_string(a), a.
|
59
|
-
UCharPointer.from_string(b), b.
|
55
|
+
Lib.ucol_greaterOrEqual(@c, UCharPointer.from_string(a), a.jlength,
|
56
|
+
UCharPointer.from_string(b), b.jlength)
|
60
57
|
end
|
61
58
|
|
62
59
|
def equal?(*args)
|
@@ -68,16 +65,16 @@ module ICU
|
|
68
65
|
|
69
66
|
a, b = args
|
70
67
|
|
71
|
-
Lib.ucol_equal(@c, UCharPointer.from_string(a), a.
|
72
|
-
UCharPointer.from_string(b), b.
|
68
|
+
Lib.ucol_equal(@c, UCharPointer.from_string(a), a.jlength,
|
69
|
+
UCharPointer.from_string(b), b.jlength)
|
73
70
|
end
|
74
71
|
|
75
|
-
def collate(
|
76
|
-
|
77
|
-
|
72
|
+
def collate(sortable)
|
73
|
+
unless sortable.respond_to?(:sort)
|
74
|
+
raise ArgumentError, "argument must respond to :sort with arity of 2"
|
75
|
+
end
|
78
76
|
|
79
|
-
|
80
|
-
Lib.ucol_close(@c)
|
77
|
+
sortable.sort { |a, b| compare a, b }
|
81
78
|
end
|
82
79
|
end # Collator
|
83
80
|
|
data/lib/ffi-icu/lib.rb
CHANGED
@@ -9,10 +9,11 @@ module ICU
|
|
9
9
|
extend FFI::Library
|
10
10
|
|
11
11
|
VERSIONS = {
|
12
|
-
"
|
13
|
-
"
|
12
|
+
"48" => "_48",
|
13
|
+
"46" => "_46",
|
14
14
|
"45" => "_45",
|
15
|
-
"
|
15
|
+
"44" => "_44",
|
16
|
+
"42" => "_4_2",
|
16
17
|
}
|
17
18
|
|
18
19
|
# FIXME: this is incredibly ugly, figure out some better way
|
@@ -33,27 +34,34 @@ module ICU
|
|
33
34
|
end
|
34
35
|
end
|
35
36
|
|
37
|
+
libs = nil
|
38
|
+
versions = VERSIONS.keys
|
39
|
+
|
36
40
|
# ok, try to find it
|
37
41
|
case ICU.platform
|
38
42
|
when :osx
|
39
43
|
ffi_lib "icucore"
|
40
44
|
when :linux
|
41
|
-
versions = VERSIONS.keys
|
42
45
|
libs = ffi_lib versions.map { |v| "libicui18n.so.#{v}" },
|
43
46
|
versions.map { |v| "libicutu.so.#{v}" }
|
44
47
|
|
45
|
-
|
46
|
-
|
47
|
-
suffix = func_version
|
48
|
-
end
|
49
|
-
end
|
48
|
+
when :windows
|
49
|
+
libs = ffi_lib versions.map { |v| "icuin#{v}.dll" }
|
50
50
|
else
|
51
51
|
raise LoadError
|
52
52
|
end
|
53
53
|
|
54
|
+
if libs
|
55
|
+
lib_name = libs.first.name
|
56
|
+
version = VERSIONS.find { |object, func| lib_name =~ /#{object}(\.dll)?$/ }
|
57
|
+
|
58
|
+
version or raise "unable to find suffix in #{lib_name}"
|
59
|
+
suffix = version.last
|
60
|
+
end
|
61
|
+
|
54
62
|
suffix
|
55
63
|
rescue LoadError => ex
|
56
|
-
raise LoadError, "no idea how to load ICU on #{ICU.platform}, patches appreciated! (#{ex.message})"
|
64
|
+
raise LoadError, "no idea how to load ICU on #{ICU.platform.inspect}, patches appreciated! (#{ex.message})"
|
57
65
|
end
|
58
66
|
|
59
67
|
def self.check_error
|
@@ -98,8 +106,8 @@ module ICU
|
|
98
106
|
|
99
107
|
attach_function :u_errorName, "u_errorName#{suffix}", [:int], :string
|
100
108
|
attach_function :uenum_count, "uenum_count#{suffix}", [:pointer, :pointer], :int
|
101
|
-
attach_function :uenum_close, "uenum_close#{suffix}",
|
102
|
-
attach_function :uenum_next, "uenum_next#{suffix}",
|
109
|
+
attach_function :uenum_close, "uenum_close#{suffix}", [:pointer], :void
|
110
|
+
attach_function :uenum_next, "uenum_next#{suffix}", [:pointer, :pointer, :pointer], :string
|
103
111
|
attach_function :u_charsToUChars, "u_charsToUChars#{suffix}", [:string, :pointer, :int32_t], :void
|
104
112
|
attach_function :u_UCharsToChars, "u_UCharsToChars#{suffix}", [:pointer, :string, :int32_t], :void
|
105
113
|
|
@@ -126,11 +134,11 @@ module ICU
|
|
126
134
|
# http://icu-project.org/apiref/icu4c/ucol_8h.html
|
127
135
|
#
|
128
136
|
|
129
|
-
attach_function :ucol_open, "ucol_open#{suffix}",
|
130
|
-
attach_function :ucol_close, "ucol_close#{suffix}",
|
131
|
-
attach_function :ucol_strcoll, "ucol_strcoll#{suffix}",
|
132
|
-
attach_function :ucol_getKeywords, "ucol_getKeywords#{suffix}",
|
133
|
-
attach_function :ucol_getKeywordValues, "ucol_getKeywordValues#{suffix}",
|
137
|
+
attach_function :ucol_open, "ucol_open#{suffix}", [:string, :pointer], :pointer
|
138
|
+
attach_function :ucol_close, "ucol_close#{suffix}", [:pointer], :void
|
139
|
+
attach_function :ucol_strcoll, "ucol_strcoll#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :int32_t], :int
|
140
|
+
attach_function :ucol_getKeywords, "ucol_getKeywords#{suffix}", [:pointer], :pointer
|
141
|
+
attach_function :ucol_getKeywordValues, "ucol_getKeywordValues#{suffix}", [:string, :pointer], :pointer
|
134
142
|
attach_function :ucol_getAvailable, "ucol_getAvailable#{suffix}", [:int32_t], :string
|
135
143
|
attach_function :ucol_countAvailable, "ucol_countAvailable#{suffix}", [], :int32_t
|
136
144
|
attach_function :ucol_getLocale, "ucol_getLocale#{suffix}", [:pointer, :int, :pointer], :string
|
@@ -189,6 +197,8 @@ module ICU
|
|
189
197
|
#
|
190
198
|
# Text Boundary Analysis
|
191
199
|
#
|
200
|
+
# http://icu-project.org/apiref/icu4c/ubrk_8h.html
|
201
|
+
#
|
192
202
|
|
193
203
|
enum :iterator_type, [ :character, :word, :line, :sentence, :title]
|
194
204
|
enum :word_break, [ :none, 0,
|
@@ -2,7 +2,7 @@ module ICU
|
|
2
2
|
module Normalization
|
3
3
|
|
4
4
|
def self.normalize(input, mode = :default)
|
5
|
-
input_length = input.
|
5
|
+
input_length = input.jlength
|
6
6
|
needed_length = out_length = options = 0
|
7
7
|
in_ptr = UCharPointer.from_string(input)
|
8
8
|
out_ptr = UCharPointer.new(out_length)
|
@@ -4,10 +4,7 @@ module ICU
|
|
4
4
|
class << self
|
5
5
|
def transliterate(translit_id, str, rules = nil)
|
6
6
|
t = Transliterator.new translit_id, rules
|
7
|
-
|
8
|
-
t.close
|
9
|
-
|
10
|
-
res
|
7
|
+
t.transliterate str
|
11
8
|
end
|
12
9
|
alias_method :translit, :transliterate
|
13
10
|
|
@@ -29,7 +26,7 @@ module ICU
|
|
29
26
|
rules_length = 0
|
30
27
|
|
31
28
|
if rules
|
32
|
-
rules_length = rules.
|
29
|
+
rules_length = rules.jlength + 1
|
33
30
|
rules = UCharPointer.from_string(rules)
|
34
31
|
end
|
35
32
|
|
@@ -37,7 +34,8 @@ module ICU
|
|
37
34
|
begin
|
38
35
|
Lib.check_error do |status|
|
39
36
|
# couldn't get utrans_openU to work properly, so using deprecated utrans_open for now
|
40
|
-
|
37
|
+
ptr = Lib.utrans_openU(UCharPointer.from_string(id), id.jlength, direction, rules, rules_length, @parse_error, status)
|
38
|
+
@tr = FFI::AutoPointer.new(ptr, Lib.method(:utrans_close))
|
41
39
|
end
|
42
40
|
rescue ICU::Error => ex
|
43
41
|
raise ex, "#{ex.message} (#{parse_error})"
|
@@ -80,10 +78,6 @@ module ICU
|
|
80
78
|
buf.string text_length.get_int32(0)
|
81
79
|
end
|
82
80
|
|
83
|
-
def close
|
84
|
-
Lib.utrans_close @tr
|
85
|
-
end
|
86
|
-
|
87
81
|
end # Transliterator
|
88
82
|
end # Translit
|
89
83
|
end # ICU
|
data/lib/ffi-icu/version.rb
CHANGED
data/spec/break_iterator_spec.rb
CHANGED
@@ -18,6 +18,12 @@ module ICU
|
|
18
18
|
iterator.to_a.should == [0, 5, 6, 11, 12, 17, 18, 21, 22, 26, 27, 28, 39, 40, 51, 52, 56, 57, 58, 61, 62, 64, 65, 72, 73, 79, 80, 90, 91, 93, 94, 100, 101, 103, 104, 110, 111, 116, 117, 123, 124]
|
19
19
|
end
|
20
20
|
|
21
|
+
it "finds all word boundaries in a Thai string" do
|
22
|
+
iterator = BreakIterator.new :word, "th_TH"
|
23
|
+
iterator.text = "การทดลอง"
|
24
|
+
iterator.to_a.should == [0, 3, 8]
|
25
|
+
end
|
26
|
+
|
21
27
|
it "finds all sentence boundaries in an English string" do
|
22
28
|
iterator = BreakIterator.new :sentence, "en_US"
|
23
29
|
iterator.text = "This is a sentence. This is another sentence, with a comma in it."
|
@@ -45,5 +51,12 @@ module ICU
|
|
45
51
|
iterator.should_not be_boundary(10)
|
46
52
|
end
|
47
53
|
|
54
|
+
it "returns an Enumerator if no block was given" do
|
55
|
+
iterator = BreakIterator.new :word, "nb"
|
56
|
+
expected = ICU.ruby19? ? Enumerator : Enumerable::Enumerator
|
57
|
+
|
58
|
+
iterator.each.should be_kind_of(expected)
|
59
|
+
end
|
60
|
+
|
48
61
|
end # BreakIterator
|
49
62
|
end # ICU
|
data/spec/chardet_spec.rb
CHANGED
data/spec/collation_spec.rb
CHANGED
@@ -4,15 +4,24 @@ require 'spec_helper'
|
|
4
4
|
|
5
5
|
module ICU
|
6
6
|
module Collation
|
7
|
+
describe "Collation" do
|
8
|
+
it "should collate an array of strings" do
|
9
|
+
Collation.collate("nb", %w[æ å ø]).should == %w[æ ø å]
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
7
13
|
describe Collator do
|
8
14
|
|
9
15
|
before { @c = Collator.new("nb") }
|
10
|
-
after { @c.close }
|
11
16
|
|
12
17
|
it "should collate an array of strings" do
|
13
18
|
@c.collate(%w[å ø æ]).should == %w[æ ø å]
|
14
19
|
end
|
15
20
|
|
21
|
+
it "raises an error if argument does not respond to :sort" do
|
22
|
+
lambda { @c.collate(1) }.should raise_error(ArgumentError)
|
23
|
+
end
|
24
|
+
|
16
25
|
it "should return available locales" do
|
17
26
|
locales = ICU::Collation.available_locales
|
18
27
|
locales.should be_kind_of(Array)
|
@@ -22,7 +31,6 @@ module ICU
|
|
22
31
|
|
23
32
|
it "should return the locale of the collator" do
|
24
33
|
l = @c.locale
|
25
|
-
l.should be_kind_of(String)
|
26
34
|
l.should == "nb"
|
27
35
|
end
|
28
36
|
|