ffi-icu 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/ffi-icu.rb CHANGED
@@ -10,6 +10,8 @@ module ICU
10
10
  :osx
11
11
  when /linux/
12
12
  :linux
13
+ when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
14
+ :windows
13
15
  else
14
16
  os
15
17
  end
@@ -20,6 +22,11 @@ module ICU
20
22
  end
21
23
  end
22
24
 
25
+ unless ICU.ruby19?
26
+ require 'jcode'
27
+ $KCODE = 'u'
28
+ end
29
+
23
30
  require "ffi-icu/core_ext/string"
24
31
  require "ffi-icu/lib"
25
32
  require "ffi-icu/uchar"
@@ -29,7 +36,3 @@ require "ffi-icu/transliteration"
29
36
  require "ffi-icu/normalization"
30
37
  require "ffi-icu/break_iterator"
31
38
 
32
- unless ICU.ruby19?
33
- require 'jcode'
34
- $KCODE = 'u'
35
- end
@@ -18,11 +18,13 @@ module ICU
18
18
 
19
19
  def text=(str)
20
20
  Lib.check_error { |err|
21
- Lib.ubrk_setText @iterator, UCharPointer.from_string(str), str.length, err
21
+ Lib.ubrk_setText @iterator, UCharPointer.from_string(str), str.jlength, err
22
22
  }
23
23
  end
24
24
 
25
25
  def each(&blk)
26
+ return to_enum(:each) unless block_given?
27
+
26
28
  int = first
27
29
 
28
30
  while int != UBRK_DONE
@@ -2,18 +2,15 @@ module ICU
2
2
  module CharDet
3
3
 
4
4
  def self.detect(string)
5
- detector = Detector.new
6
- res = detector.detect string
7
- detector.close
8
-
9
- res
5
+ Detector.new.detect string
10
6
  end
11
7
 
12
8
  class Detector
13
9
  Match = Struct.new(:name, :confidence, :language)
14
10
 
15
11
  def initialize
16
- @detector = Lib.check_error { |ptr| Lib.ucsdet_open(ptr) }
12
+ ptr = Lib.check_error { |err| Lib.ucsdet_open err }
13
+ @detector = FFI::AutoPointer.new(ptr, Lib.method(:ucsdet_close))
17
14
  end
18
15
 
19
16
  def input_filter_enabled?
@@ -30,10 +27,6 @@ module ICU
30
27
  end
31
28
  end
32
29
 
33
- def close
34
- Lib.ucsdet_close @detector
35
- end
36
-
37
30
  def detect(str)
38
31
  set_text(str)
39
32
 
@@ -2,11 +2,7 @@ module ICU
2
2
  module Collation
3
3
 
4
4
  def self.collate(locale, arr)
5
- collator = Collator.new(locale)
6
- res = collator.collate(arr)
7
- collator.close
8
-
9
- res
5
+ Collator.new(locale).collate(arr)
10
6
  end
11
7
 
12
8
  def self.keywords
@@ -34,7 +30,8 @@ module ICU
34
30
  ULOC_VALID_LOCALE = 1
35
31
 
36
32
  def initialize(locale)
37
- @c = Lib.check_error { |error| Lib.ucol_open(locale, error) }
33
+ ptr = Lib.check_error { |error| Lib.ucol_open(locale, error) }
34
+ @c = FFI::AutoPointer.new(ptr, Lib.method(:ucol_close))
38
35
  end
39
36
 
40
37
  def locale
@@ -44,19 +41,19 @@ module ICU
44
41
  def compare(a, b)
45
42
  Lib.ucol_strcoll(
46
43
  @c,
47
- UCharPointer.from_string(a), a.length,
48
- UCharPointer.from_string(b), b.length
44
+ UCharPointer.from_string(a), a.jlength,
45
+ UCharPointer.from_string(b), b.jlength
49
46
  )
50
47
  end
51
48
 
52
49
  def greater?(a, b)
53
- Lib.ucol_greater(@c, UCharPointer.from_string(a), a.length,
54
- UCharPointer.from_string(b), b.length)
50
+ Lib.ucol_greater(@c, UCharPointer.from_string(a), a.jlength,
51
+ UCharPointer.from_string(b), b.jlength)
55
52
  end
56
53
 
57
54
  def greater_or_equal?(a, b)
58
- Lib.ucol_greaterOrEqual(@c, UCharPointer.from_string(a), a.length,
59
- UCharPointer.from_string(b), b.length)
55
+ Lib.ucol_greaterOrEqual(@c, UCharPointer.from_string(a), a.jlength,
56
+ UCharPointer.from_string(b), b.jlength)
60
57
  end
61
58
 
62
59
  def equal?(*args)
@@ -68,16 +65,16 @@ module ICU
68
65
 
69
66
  a, b = args
70
67
 
71
- Lib.ucol_equal(@c, UCharPointer.from_string(a), a.length,
72
- UCharPointer.from_string(b), b.length)
68
+ Lib.ucol_equal(@c, UCharPointer.from_string(a), a.jlength,
69
+ UCharPointer.from_string(b), b.jlength)
73
70
  end
74
71
 
75
- def collate(array)
76
- array.sort { |a,b| compare a, b }
77
- end
72
+ def collate(sortable)
73
+ unless sortable.respond_to?(:sort)
74
+ raise ArgumentError, "argument must respond to :sort with arity of 2"
75
+ end
78
76
 
79
- def close
80
- Lib.ucol_close(@c)
77
+ sortable.sort { |a, b| compare a, b }
81
78
  end
82
79
  end # Collator
83
80
 
@@ -2,4 +2,8 @@ class String
2
2
  unless method_defined?(:bytesize)
3
3
  alias_method :bytesize, :length
4
4
  end
5
+
6
+ unless method_defined?(:jlength)
7
+ alias_method :jlength, :length
8
+ end
5
9
  end
data/lib/ffi-icu/lib.rb CHANGED
@@ -9,10 +9,11 @@ module ICU
9
9
  extend FFI::Library
10
10
 
11
11
  VERSIONS = {
12
- "42" => "_4_2",
13
- "44" => "_44",
12
+ "48" => "_48",
13
+ "46" => "_46",
14
14
  "45" => "_45",
15
- "46" => "_46"
15
+ "44" => "_44",
16
+ "42" => "_4_2",
16
17
  }
17
18
 
18
19
  # FIXME: this is incredibly ugly, figure out some better way
@@ -33,27 +34,34 @@ module ICU
33
34
  end
34
35
  end
35
36
 
37
+ libs = nil
38
+ versions = VERSIONS.keys
39
+
36
40
  # ok, try to find it
37
41
  case ICU.platform
38
42
  when :osx
39
43
  ffi_lib "icucore"
40
44
  when :linux
41
- versions = VERSIONS.keys
42
45
  libs = ffi_lib versions.map { |v| "libicui18n.so.#{v}" },
43
46
  versions.map { |v| "libicutu.so.#{v}" }
44
47
 
45
- VERSIONS.find do |so_version, func_version|
46
- if libs.first.name =~ /#{so_version}$/
47
- suffix = func_version
48
- end
49
- end
48
+ when :windows
49
+ libs = ffi_lib versions.map { |v| "icuin#{v}.dll" }
50
50
  else
51
51
  raise LoadError
52
52
  end
53
53
 
54
+ if libs
55
+ lib_name = libs.first.name
56
+ version = VERSIONS.find { |object, func| lib_name =~ /#{object}(\.dll)?$/ }
57
+
58
+ version or raise "unable to find suffix in #{lib_name}"
59
+ suffix = version.last
60
+ end
61
+
54
62
  suffix
55
63
  rescue LoadError => ex
56
- raise LoadError, "no idea how to load ICU on #{ICU.platform}, patches appreciated! (#{ex.message})"
64
+ raise LoadError, "no idea how to load ICU on #{ICU.platform.inspect}, patches appreciated! (#{ex.message})"
57
65
  end
58
66
 
59
67
  def self.check_error
@@ -98,8 +106,8 @@ module ICU
98
106
 
99
107
  attach_function :u_errorName, "u_errorName#{suffix}", [:int], :string
100
108
  attach_function :uenum_count, "uenum_count#{suffix}", [:pointer, :pointer], :int
101
- attach_function :uenum_close, "uenum_close#{suffix}", [:pointer], :void
102
- attach_function :uenum_next, "uenum_next#{suffix}", [:pointer, :pointer, :pointer], :string
109
+ attach_function :uenum_close, "uenum_close#{suffix}", [:pointer], :void
110
+ attach_function :uenum_next, "uenum_next#{suffix}", [:pointer, :pointer, :pointer], :string
103
111
  attach_function :u_charsToUChars, "u_charsToUChars#{suffix}", [:string, :pointer, :int32_t], :void
104
112
  attach_function :u_UCharsToChars, "u_UCharsToChars#{suffix}", [:pointer, :string, :int32_t], :void
105
113
 
@@ -126,11 +134,11 @@ module ICU
126
134
  # http://icu-project.org/apiref/icu4c/ucol_8h.html
127
135
  #
128
136
 
129
- attach_function :ucol_open, "ucol_open#{suffix}", [:string, :pointer], :pointer
130
- attach_function :ucol_close, "ucol_close#{suffix}", [:pointer], :void
131
- attach_function :ucol_strcoll, "ucol_strcoll#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :int32_t], :int
132
- attach_function :ucol_getKeywords, "ucol_getKeywords#{suffix}", [:pointer], :pointer
133
- attach_function :ucol_getKeywordValues, "ucol_getKeywordValues#{suffix}", [:string, :pointer], :pointer
137
+ attach_function :ucol_open, "ucol_open#{suffix}", [:string, :pointer], :pointer
138
+ attach_function :ucol_close, "ucol_close#{suffix}", [:pointer], :void
139
+ attach_function :ucol_strcoll, "ucol_strcoll#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :int32_t], :int
140
+ attach_function :ucol_getKeywords, "ucol_getKeywords#{suffix}", [:pointer], :pointer
141
+ attach_function :ucol_getKeywordValues, "ucol_getKeywordValues#{suffix}", [:string, :pointer], :pointer
134
142
  attach_function :ucol_getAvailable, "ucol_getAvailable#{suffix}", [:int32_t], :string
135
143
  attach_function :ucol_countAvailable, "ucol_countAvailable#{suffix}", [], :int32_t
136
144
  attach_function :ucol_getLocale, "ucol_getLocale#{suffix}", [:pointer, :int, :pointer], :string
@@ -189,6 +197,8 @@ module ICU
189
197
  #
190
198
  # Text Boundary Analysis
191
199
  #
200
+ # http://icu-project.org/apiref/icu4c/ubrk_8h.html
201
+ #
192
202
 
193
203
  enum :iterator_type, [ :character, :word, :line, :sentence, :title]
194
204
  enum :word_break, [ :none, 0,
@@ -2,7 +2,7 @@ module ICU
2
2
  module Normalization
3
3
 
4
4
  def self.normalize(input, mode = :default)
5
- input_length = input.unpack("U*").size
5
+ input_length = input.jlength
6
6
  needed_length = out_length = options = 0
7
7
  in_ptr = UCharPointer.from_string(input)
8
8
  out_ptr = UCharPointer.new(out_length)
@@ -4,10 +4,7 @@ module ICU
4
4
  class << self
5
5
  def transliterate(translit_id, str, rules = nil)
6
6
  t = Transliterator.new translit_id, rules
7
- res = t.transliterate str
8
- t.close
9
-
10
- res
7
+ t.transliterate str
11
8
  end
12
9
  alias_method :translit, :transliterate
13
10
 
@@ -29,7 +26,7 @@ module ICU
29
26
  rules_length = 0
30
27
 
31
28
  if rules
32
- rules_length = rules.length + 1
29
+ rules_length = rules.jlength + 1
33
30
  rules = UCharPointer.from_string(rules)
34
31
  end
35
32
 
@@ -37,7 +34,8 @@ module ICU
37
34
  begin
38
35
  Lib.check_error do |status|
39
36
  # couldn't get utrans_openU to work properly, so using deprecated utrans_open for now
40
- @tr = Lib.utrans_openU(UCharPointer.from_string(id), id.length, direction, rules, rules_length, @parse_error, status)
37
+ ptr = Lib.utrans_openU(UCharPointer.from_string(id), id.jlength, direction, rules, rules_length, @parse_error, status)
38
+ @tr = FFI::AutoPointer.new(ptr, Lib.method(:utrans_close))
41
39
  end
42
40
  rescue ICU::Error => ex
43
41
  raise ex, "#{ex.message} (#{parse_error})"
@@ -80,10 +78,6 @@ module ICU
80
78
  buf.string text_length.get_int32(0)
81
79
  end
82
80
 
83
- def close
84
- Lib.utrans_close @tr
85
- end
86
-
87
81
  end # Transliterator
88
82
  end # Translit
89
83
  end # ICU
@@ -1,3 +1,3 @@
1
1
  module ICU
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
@@ -18,6 +18,12 @@ module ICU
18
18
  iterator.to_a.should == [0, 5, 6, 11, 12, 17, 18, 21, 22, 26, 27, 28, 39, 40, 51, 52, 56, 57, 58, 61, 62, 64, 65, 72, 73, 79, 80, 90, 91, 93, 94, 100, 101, 103, 104, 110, 111, 116, 117, 123, 124]
19
19
  end
20
20
 
21
+ it "finds all word boundaries in a Thai string" do
22
+ iterator = BreakIterator.new :word, "th_TH"
23
+ iterator.text = "การทดลอง"
24
+ iterator.to_a.should == [0, 3, 8]
25
+ end
26
+
21
27
  it "finds all sentence boundaries in an English string" do
22
28
  iterator = BreakIterator.new :sentence, "en_US"
23
29
  iterator.text = "This is a sentence. This is another sentence, with a comma in it."
@@ -45,5 +51,12 @@ module ICU
45
51
  iterator.should_not be_boundary(10)
46
52
  end
47
53
 
54
+ it "returns an Enumerator if no block was given" do
55
+ iterator = BreakIterator.new :word, "nb"
56
+ expected = ICU.ruby19? ? Enumerator : Enumerable::Enumerator
57
+
58
+ iterator.each.should be_kind_of(expected)
59
+ end
60
+
48
61
  end # BreakIterator
49
62
  end # ICU
data/spec/chardet_spec.rb CHANGED
@@ -5,7 +5,6 @@ require 'spec_helper'
5
5
  describe ICU::CharDet::Detector do
6
6
 
7
7
  before { @d = ICU::CharDet::Detector.new }
8
- after { @d.close }
9
8
 
10
9
  it "should recognize UTF-8" do
11
10
  m = @d.detect("æåø")
@@ -4,15 +4,24 @@ require 'spec_helper'
4
4
 
5
5
  module ICU
6
6
  module Collation
7
+ describe "Collation" do
8
+ it "should collate an array of strings" do
9
+ Collation.collate("nb", %w[æ å ø]).should == %w[æ ø å]
10
+ end
11
+ end
12
+
7
13
  describe Collator do
8
14
 
9
15
  before { @c = Collator.new("nb") }
10
- after { @c.close }
11
16
 
12
17
  it "should collate an array of strings" do
13
18
  @c.collate(%w[å ø æ]).should == %w[æ ø å]
14
19
  end
15
20
 
21
+ it "raises an error if argument does not respond to :sort" do
22
+ lambda { @c.collate(1) }.should raise_error(ArgumentError)
23
+ end
24
+
16
25
  it "should return available locales" do
17
26
  locales = ICU::Collation.available_locales
18
27
  locales.should be_kind_of(Array)
@@ -22,7 +31,6 @@ module ICU
22
31
 
23
32
  it "should return the locale of the collator" do
24
33
  l = @c.locale
25
- l.should be_kind_of(String)
26
34
  l.should == "nb"
27
35
  end
28
36
 
@@ -9,8 +9,6 @@ module ICU
9
9
  @t = Transliteration::Transliterator.new(*args)
10
10
  end
11
11
 
12
- after { @t.close if @t }
13
-
14
12
  [
15
13
  { :id => "Any-Hex", :input => "abcde", :output => "\\u0061\\u0062\\u0063\\u0064\\u0065" },
16
14
  { :id => "Lower", :input => "ABC", :output => "abc" },
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: ffi-icu
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.0.5
5
+ version: 0.0.6
6
6
  platform: ruby
7
7
  authors:
8
8
  - Jari Bakken