ffi-icu 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
data/lib/ffi-icu.rb CHANGED
@@ -10,6 +10,8 @@ module ICU
10
10
  :osx
11
11
  when /linux/
12
12
  :linux
13
+ when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
14
+ :windows
13
15
  else
14
16
  os
15
17
  end
@@ -20,6 +22,11 @@ module ICU
20
22
  end
21
23
  end
22
24
 
25
+ unless ICU.ruby19?
26
+ require 'jcode'
27
+ $KCODE = 'u'
28
+ end
29
+
23
30
  require "ffi-icu/core_ext/string"
24
31
  require "ffi-icu/lib"
25
32
  require "ffi-icu/uchar"
@@ -29,7 +36,3 @@ require "ffi-icu/transliteration"
29
36
  require "ffi-icu/normalization"
30
37
  require "ffi-icu/break_iterator"
31
38
 
32
- unless ICU.ruby19?
33
- require 'jcode'
34
- $KCODE = 'u'
35
- end
@@ -18,11 +18,13 @@ module ICU
18
18
 
19
19
  def text=(str)
20
20
  Lib.check_error { |err|
21
- Lib.ubrk_setText @iterator, UCharPointer.from_string(str), str.length, err
21
+ Lib.ubrk_setText @iterator, UCharPointer.from_string(str), str.jlength, err
22
22
  }
23
23
  end
24
24
 
25
25
  def each(&blk)
26
+ return to_enum(:each) unless block_given?
27
+
26
28
  int = first
27
29
 
28
30
  while int != UBRK_DONE
@@ -2,18 +2,15 @@ module ICU
2
2
  module CharDet
3
3
 
4
4
  def self.detect(string)
5
- detector = Detector.new
6
- res = detector.detect string
7
- detector.close
8
-
9
- res
5
+ Detector.new.detect string
10
6
  end
11
7
 
12
8
  class Detector
13
9
  Match = Struct.new(:name, :confidence, :language)
14
10
 
15
11
  def initialize
16
- @detector = Lib.check_error { |ptr| Lib.ucsdet_open(ptr) }
12
+ ptr = Lib.check_error { |err| Lib.ucsdet_open err }
13
+ @detector = FFI::AutoPointer.new(ptr, Lib.method(:ucsdet_close))
17
14
  end
18
15
 
19
16
  def input_filter_enabled?
@@ -30,10 +27,6 @@ module ICU
30
27
  end
31
28
  end
32
29
 
33
- def close
34
- Lib.ucsdet_close @detector
35
- end
36
-
37
30
  def detect(str)
38
31
  set_text(str)
39
32
 
@@ -2,11 +2,7 @@ module ICU
2
2
  module Collation
3
3
 
4
4
  def self.collate(locale, arr)
5
- collator = Collator.new(locale)
6
- res = collator.collate(arr)
7
- collator.close
8
-
9
- res
5
+ Collator.new(locale).collate(arr)
10
6
  end
11
7
 
12
8
  def self.keywords
@@ -34,7 +30,8 @@ module ICU
34
30
  ULOC_VALID_LOCALE = 1
35
31
 
36
32
  def initialize(locale)
37
- @c = Lib.check_error { |error| Lib.ucol_open(locale, error) }
33
+ ptr = Lib.check_error { |error| Lib.ucol_open(locale, error) }
34
+ @c = FFI::AutoPointer.new(ptr, Lib.method(:ucol_close))
38
35
  end
39
36
 
40
37
  def locale
@@ -44,19 +41,19 @@ module ICU
44
41
  def compare(a, b)
45
42
  Lib.ucol_strcoll(
46
43
  @c,
47
- UCharPointer.from_string(a), a.length,
48
- UCharPointer.from_string(b), b.length
44
+ UCharPointer.from_string(a), a.jlength,
45
+ UCharPointer.from_string(b), b.jlength
49
46
  )
50
47
  end
51
48
 
52
49
  def greater?(a, b)
53
- Lib.ucol_greater(@c, UCharPointer.from_string(a), a.length,
54
- UCharPointer.from_string(b), b.length)
50
+ Lib.ucol_greater(@c, UCharPointer.from_string(a), a.jlength,
51
+ UCharPointer.from_string(b), b.jlength)
55
52
  end
56
53
 
57
54
  def greater_or_equal?(a, b)
58
- Lib.ucol_greaterOrEqual(@c, UCharPointer.from_string(a), a.length,
59
- UCharPointer.from_string(b), b.length)
55
+ Lib.ucol_greaterOrEqual(@c, UCharPointer.from_string(a), a.jlength,
56
+ UCharPointer.from_string(b), b.jlength)
60
57
  end
61
58
 
62
59
  def equal?(*args)
@@ -68,16 +65,16 @@ module ICU
68
65
 
69
66
  a, b = args
70
67
 
71
- Lib.ucol_equal(@c, UCharPointer.from_string(a), a.length,
72
- UCharPointer.from_string(b), b.length)
68
+ Lib.ucol_equal(@c, UCharPointer.from_string(a), a.jlength,
69
+ UCharPointer.from_string(b), b.jlength)
73
70
  end
74
71
 
75
- def collate(array)
76
- array.sort { |a,b| compare a, b }
77
- end
72
+ def collate(sortable)
73
+ unless sortable.respond_to?(:sort)
74
+ raise ArgumentError, "argument must respond to :sort with arity of 2"
75
+ end
78
76
 
79
- def close
80
- Lib.ucol_close(@c)
77
+ sortable.sort { |a, b| compare a, b }
81
78
  end
82
79
  end # Collator
83
80
 
@@ -2,4 +2,8 @@ class String
2
2
  unless method_defined?(:bytesize)
3
3
  alias_method :bytesize, :length
4
4
  end
5
+
6
+ unless method_defined?(:jlength)
7
+ alias_method :jlength, :length
8
+ end
5
9
  end
data/lib/ffi-icu/lib.rb CHANGED
@@ -9,10 +9,11 @@ module ICU
9
9
  extend FFI::Library
10
10
 
11
11
  VERSIONS = {
12
- "42" => "_4_2",
13
- "44" => "_44",
12
+ "48" => "_48",
13
+ "46" => "_46",
14
14
  "45" => "_45",
15
- "46" => "_46"
15
+ "44" => "_44",
16
+ "42" => "_4_2",
16
17
  }
17
18
 
18
19
  # FIXME: this is incredibly ugly, figure out some better way
@@ -33,27 +34,34 @@ module ICU
33
34
  end
34
35
  end
35
36
 
37
+ libs = nil
38
+ versions = VERSIONS.keys
39
+
36
40
  # ok, try to find it
37
41
  case ICU.platform
38
42
  when :osx
39
43
  ffi_lib "icucore"
40
44
  when :linux
41
- versions = VERSIONS.keys
42
45
  libs = ffi_lib versions.map { |v| "libicui18n.so.#{v}" },
43
46
  versions.map { |v| "libicutu.so.#{v}" }
44
47
 
45
- VERSIONS.find do |so_version, func_version|
46
- if libs.first.name =~ /#{so_version}$/
47
- suffix = func_version
48
- end
49
- end
48
+ when :windows
49
+ libs = ffi_lib versions.map { |v| "icuin#{v}.dll" }
50
50
  else
51
51
  raise LoadError
52
52
  end
53
53
 
54
+ if libs
55
+ lib_name = libs.first.name
56
+ version = VERSIONS.find { |object, func| lib_name =~ /#{object}(\.dll)?$/ }
57
+
58
+ version or raise "unable to find suffix in #{lib_name}"
59
+ suffix = version.last
60
+ end
61
+
54
62
  suffix
55
63
  rescue LoadError => ex
56
- raise LoadError, "no idea how to load ICU on #{ICU.platform}, patches appreciated! (#{ex.message})"
64
+ raise LoadError, "no idea how to load ICU on #{ICU.platform.inspect}, patches appreciated! (#{ex.message})"
57
65
  end
58
66
 
59
67
  def self.check_error
@@ -98,8 +106,8 @@ module ICU
98
106
 
99
107
  attach_function :u_errorName, "u_errorName#{suffix}", [:int], :string
100
108
  attach_function :uenum_count, "uenum_count#{suffix}", [:pointer, :pointer], :int
101
- attach_function :uenum_close, "uenum_close#{suffix}", [:pointer], :void
102
- attach_function :uenum_next, "uenum_next#{suffix}", [:pointer, :pointer, :pointer], :string
109
+ attach_function :uenum_close, "uenum_close#{suffix}", [:pointer], :void
110
+ attach_function :uenum_next, "uenum_next#{suffix}", [:pointer, :pointer, :pointer], :string
103
111
  attach_function :u_charsToUChars, "u_charsToUChars#{suffix}", [:string, :pointer, :int32_t], :void
104
112
  attach_function :u_UCharsToChars, "u_UCharsToChars#{suffix}", [:pointer, :string, :int32_t], :void
105
113
 
@@ -126,11 +134,11 @@ module ICU
126
134
  # http://icu-project.org/apiref/icu4c/ucol_8h.html
127
135
  #
128
136
 
129
- attach_function :ucol_open, "ucol_open#{suffix}", [:string, :pointer], :pointer
130
- attach_function :ucol_close, "ucol_close#{suffix}", [:pointer], :void
131
- attach_function :ucol_strcoll, "ucol_strcoll#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :int32_t], :int
132
- attach_function :ucol_getKeywords, "ucol_getKeywords#{suffix}", [:pointer], :pointer
133
- attach_function :ucol_getKeywordValues, "ucol_getKeywordValues#{suffix}", [:string, :pointer], :pointer
137
+ attach_function :ucol_open, "ucol_open#{suffix}", [:string, :pointer], :pointer
138
+ attach_function :ucol_close, "ucol_close#{suffix}", [:pointer], :void
139
+ attach_function :ucol_strcoll, "ucol_strcoll#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :int32_t], :int
140
+ attach_function :ucol_getKeywords, "ucol_getKeywords#{suffix}", [:pointer], :pointer
141
+ attach_function :ucol_getKeywordValues, "ucol_getKeywordValues#{suffix}", [:string, :pointer], :pointer
134
142
  attach_function :ucol_getAvailable, "ucol_getAvailable#{suffix}", [:int32_t], :string
135
143
  attach_function :ucol_countAvailable, "ucol_countAvailable#{suffix}", [], :int32_t
136
144
  attach_function :ucol_getLocale, "ucol_getLocale#{suffix}", [:pointer, :int, :pointer], :string
@@ -189,6 +197,8 @@ module ICU
189
197
  #
190
198
  # Text Boundary Analysis
191
199
  #
200
+ # http://icu-project.org/apiref/icu4c/ubrk_8h.html
201
+ #
192
202
 
193
203
  enum :iterator_type, [ :character, :word, :line, :sentence, :title]
194
204
  enum :word_break, [ :none, 0,
@@ -2,7 +2,7 @@ module ICU
2
2
  module Normalization
3
3
 
4
4
  def self.normalize(input, mode = :default)
5
- input_length = input.unpack("U*").size
5
+ input_length = input.jlength
6
6
  needed_length = out_length = options = 0
7
7
  in_ptr = UCharPointer.from_string(input)
8
8
  out_ptr = UCharPointer.new(out_length)
@@ -4,10 +4,7 @@ module ICU
4
4
  class << self
5
5
  def transliterate(translit_id, str, rules = nil)
6
6
  t = Transliterator.new translit_id, rules
7
- res = t.transliterate str
8
- t.close
9
-
10
- res
7
+ t.transliterate str
11
8
  end
12
9
  alias_method :translit, :transliterate
13
10
 
@@ -29,7 +26,7 @@ module ICU
29
26
  rules_length = 0
30
27
 
31
28
  if rules
32
- rules_length = rules.length + 1
29
+ rules_length = rules.jlength + 1
33
30
  rules = UCharPointer.from_string(rules)
34
31
  end
35
32
 
@@ -37,7 +34,8 @@ module ICU
37
34
  begin
38
35
  Lib.check_error do |status|
39
36
  # couldn't get utrans_openU to work properly, so using deprecated utrans_open for now
40
- @tr = Lib.utrans_openU(UCharPointer.from_string(id), id.length, direction, rules, rules_length, @parse_error, status)
37
+ ptr = Lib.utrans_openU(UCharPointer.from_string(id), id.jlength, direction, rules, rules_length, @parse_error, status)
38
+ @tr = FFI::AutoPointer.new(ptr, Lib.method(:utrans_close))
41
39
  end
42
40
  rescue ICU::Error => ex
43
41
  raise ex, "#{ex.message} (#{parse_error})"
@@ -80,10 +78,6 @@ module ICU
80
78
  buf.string text_length.get_int32(0)
81
79
  end
82
80
 
83
- def close
84
- Lib.utrans_close @tr
85
- end
86
-
87
81
  end # Transliterator
88
82
  end # Translit
89
83
  end # ICU
@@ -1,3 +1,3 @@
1
1
  module ICU
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
@@ -18,6 +18,12 @@ module ICU
18
18
  iterator.to_a.should == [0, 5, 6, 11, 12, 17, 18, 21, 22, 26, 27, 28, 39, 40, 51, 52, 56, 57, 58, 61, 62, 64, 65, 72, 73, 79, 80, 90, 91, 93, 94, 100, 101, 103, 104, 110, 111, 116, 117, 123, 124]
19
19
  end
20
20
 
21
+ it "finds all word boundaries in a Thai string" do
22
+ iterator = BreakIterator.new :word, "th_TH"
23
+ iterator.text = "การทดลอง"
24
+ iterator.to_a.should == [0, 3, 8]
25
+ end
26
+
21
27
  it "finds all sentence boundaries in an English string" do
22
28
  iterator = BreakIterator.new :sentence, "en_US"
23
29
  iterator.text = "This is a sentence. This is another sentence, with a comma in it."
@@ -45,5 +51,12 @@ module ICU
45
51
  iterator.should_not be_boundary(10)
46
52
  end
47
53
 
54
+ it "returns an Enumerator if no block was given" do
55
+ iterator = BreakIterator.new :word, "nb"
56
+ expected = ICU.ruby19? ? Enumerator : Enumerable::Enumerator
57
+
58
+ iterator.each.should be_kind_of(expected)
59
+ end
60
+
48
61
  end # BreakIterator
49
62
  end # ICU
data/spec/chardet_spec.rb CHANGED
@@ -5,7 +5,6 @@ require 'spec_helper'
5
5
  describe ICU::CharDet::Detector do
6
6
 
7
7
  before { @d = ICU::CharDet::Detector.new }
8
- after { @d.close }
9
8
 
10
9
  it "should recognize UTF-8" do
11
10
  m = @d.detect("æåø")
@@ -4,15 +4,24 @@ require 'spec_helper'
4
4
 
5
5
  module ICU
6
6
  module Collation
7
+ describe "Collation" do
8
+ it "should collate an array of strings" do
9
+ Collation.collate("nb", %w[æ å ø]).should == %w[æ ø å]
10
+ end
11
+ end
12
+
7
13
  describe Collator do
8
14
 
9
15
  before { @c = Collator.new("nb") }
10
- after { @c.close }
11
16
 
12
17
  it "should collate an array of strings" do
13
18
  @c.collate(%w[å ø æ]).should == %w[æ ø å]
14
19
  end
15
20
 
21
+ it "raises an error if argument does not respond to :sort" do
22
+ lambda { @c.collate(1) }.should raise_error(ArgumentError)
23
+ end
24
+
16
25
  it "should return available locales" do
17
26
  locales = ICU::Collation.available_locales
18
27
  locales.should be_kind_of(Array)
@@ -22,7 +31,6 @@ module ICU
22
31
 
23
32
  it "should return the locale of the collator" do
24
33
  l = @c.locale
25
- l.should be_kind_of(String)
26
34
  l.should == "nb"
27
35
  end
28
36
 
@@ -9,8 +9,6 @@ module ICU
9
9
  @t = Transliteration::Transliterator.new(*args)
10
10
  end
11
11
 
12
- after { @t.close if @t }
13
-
14
12
  [
15
13
  { :id => "Any-Hex", :input => "abcde", :output => "\\u0061\\u0062\\u0063\\u0064\\u0065" },
16
14
  { :id => "Lower", :input => "ABC", :output => "abc" },
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: ffi-icu
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.0.5
5
+ version: 0.0.6
6
6
  platform: ruby
7
7
  authors:
8
8
  - Jari Bakken