ffi-icu 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -20,3 +20,4 @@ pkg
20
20
 
21
21
  ## PROJECT::SPECIFIC
22
22
  *.rbc
23
+ Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source :rubygems
2
+
3
+ gemspec
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2009 Jari Bakken
1
+ Copyright (c) 2010-2011 Jari Bakken
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.rdoc CHANGED
@@ -82,4 +82,4 @@ Rubies:
82
82
 
83
83
  == Copyright
84
84
 
85
- Copyright (c) 2010 Jari Bakken. See LICENSE for details.
85
+ Copyright (c) 2010-2011 Jari Bakken. See LICENSE for details.
data/Rakefile CHANGED
@@ -1,39 +1,20 @@
1
1
  require 'rubygems'
2
2
  require 'rake'
3
3
 
4
- begin
5
- require 'jeweler'
6
- Jeweler::Tasks.new do |gem|
7
- gem.name = "ffi-icu"
8
- gem.summary = %Q{Simple FFI wrappers for things I need from ICU.}
9
- gem.description = %Q{Provides charset detection, locale sensitive collation and more.}
10
- gem.email = "jari.bakken@gmail.com"
11
- gem.homepage = "http://github.com/jarib/ffi-icu"
12
- gem.authors = ["Jari Bakken"]
13
-
14
- gem.add_dependency "ffi", ">= 0.6.3"
15
- gem.add_development_dependency "rspec", ">= 1.3.0"
16
- # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
17
- end
4
+ require 'bundler'
5
+ Bundler::GemHelper.install_tasks
18
6
 
19
- Jeweler::GemcutterTasks.new
20
- rescue LoadError
21
- puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
22
- end
23
-
24
- require 'spec/rake/spectask'
25
- Spec::Rake::SpecTask.new(:spec) do |spec|
26
- spec.libs << 'lib' << 'spec'
27
- spec.spec_files = FileList['spec/**/*_spec.rb']
7
+ require 'rspec/core/rake_task'
8
+ RSpec::Core::RakeTask.new(:spec) do |spec|
9
+ spec.pattern = 'spec/**/*_spec.rb'
28
10
  end
29
11
 
30
- Spec::Rake::SpecTask.new(:rcov) do |spec|
31
- spec.libs << 'lib' << 'spec'
12
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
32
13
  spec.pattern = 'spec/**/*_spec.rb'
33
14
  spec.rcov = true
34
15
  end
35
16
 
36
- task :spec => :check_dependencies
17
+ task :spec
37
18
 
38
19
  task :default => :spec
39
20
 
data/ffi-icu.gemspec ADDED
@@ -0,0 +1,33 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ require File.expand_path("../lib/ffi-icu/version", __FILE__)
7
+
8
+ Gem::Specification.new do |s|
9
+ s.name = %q{ffi-icu}
10
+ s.version = ICU::VERSION
11
+
12
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
13
+ s.authors = ["Jari Bakken"]
14
+ s.date = %q{2010-08-23}
15
+ s.description = %q{Provides charset detection, locale sensitive collation and more. Depends on libicu.}
16
+ s.email = %q{jari.bakken@gmail.com}
17
+ s.extra_rdoc_files = [
18
+ "LICENSE",
19
+ "README.rdoc"
20
+ ]
21
+ s.files = `git ls-files`.split("\n")
22
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
23
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
24
+ s.require_paths = ["lib"]
25
+
26
+ s.homepage = %q{http://github.com/jarib/ffi-icu}
27
+ s.rdoc_options = ["--charset=UTF-8"]
28
+ s.summary = %q{Simple FFI wrappers for things I need from ICU.}
29
+
30
+ s.add_runtime_dependency(%q<ffi>, ["~> 1.0.9"])
31
+ s.add_development_dependency(%q<rspec>, ["~> 2.5.0"])
32
+ end
33
+
data/lib/ffi-icu.rb CHANGED
@@ -20,12 +20,14 @@ module ICU
20
20
  end
21
21
  end
22
22
 
23
+ require "ffi-icu/core_ext/string"
23
24
  require "ffi-icu/lib"
24
25
  require "ffi-icu/uchar"
25
26
  require "ffi-icu/chardet"
26
27
  require "ffi-icu/collation"
27
28
  require "ffi-icu/transliteration"
28
29
  require "ffi-icu/normalization"
30
+ require "ffi-icu/break_iterator"
29
31
 
30
32
  unless ICU.ruby19?
31
33
  require 'jcode'
@@ -0,0 +1,67 @@
1
+ module ICU
2
+ class BreakIterator
3
+ include Enumerable
4
+
5
+ UBRK_DONE = -1
6
+
7
+ def self.available_locales
8
+ (0...Lib.ubrk_countAvailable).map do |idx|
9
+ Lib.ubrk_getAvailable idx
10
+ end
11
+ end
12
+
13
+ def initialize(type, locale)
14
+ ptr = Lib.check_error { |err| Lib.ubrk_open(type, locale, nil, 0, err) }
15
+
16
+ @iterator = FFI::AutoPointer.new(ptr, Lib.method(:ubrk_close))
17
+ end
18
+
19
+ def text=(str)
20
+ Lib.check_error { |err|
21
+ Lib.ubrk_setText @iterator, UCharPointer.from_string(str), str.length, err
22
+ }
23
+ end
24
+
25
+ def each(&blk)
26
+ int = first
27
+
28
+ while int != UBRK_DONE
29
+ yield int
30
+ int = self.next
31
+ end
32
+ end
33
+
34
+ def next
35
+ Lib.ubrk_next @iterator
36
+ end
37
+
38
+ def previous
39
+ Lib.ubrk_next @iterator
40
+ end
41
+
42
+ def first
43
+ Lib.ubrk_first @iterator
44
+ end
45
+
46
+ def last
47
+ Lib.ubrk_last @iterator
48
+ end
49
+
50
+ def preceding
51
+ Lib.ubrk_preceding @iterator
52
+ end
53
+
54
+ def following
55
+ Lib.ubrk_following @iterator
56
+ end
57
+
58
+ def current
59
+ Lib.ubrk_current @iterator
60
+ end
61
+
62
+ def boundary?(index)
63
+ Lib.ubrk_isBoundary(@iterator, Integer(index)) != 0
64
+ end
65
+
66
+ end # BreakIterator
67
+ end # ICU
@@ -0,0 +1,5 @@
1
+ class String
2
+ unless method_defined?(:bytesize)
3
+ alias_method :bytesize, :length
4
+ end
5
+ end
data/lib/ffi-icu/lib.rb CHANGED
@@ -20,7 +20,7 @@ module ICU
20
20
  # let the user tell us where the lib is
21
21
  if ENV['FFI_ICU_LIB']
22
22
  libs = ENV['FFI_ICU_LIB'].split(",")
23
- ffi_lib *libs
23
+ ffi_lib(*libs)
24
24
 
25
25
  if ENV['FFI_ICU_VERSION_SUFFIX']
26
26
  return ENV['FFI_ICU_VERSION_SUFFIX']
@@ -145,7 +145,9 @@ module ICU
145
145
  :pre_context, :pointer,
146
146
  :post_context, :pointer
147
147
 
148
-
148
+ def to_s
149
+ "#<%s:%x line: %d offset: %d" % [self.class, hash*2, self[:line], self[:offset]]
150
+ end
149
151
  end
150
152
 
151
153
  class UTransPosition < FFI::Struct
@@ -179,5 +181,34 @@ module ICU
179
181
  ]
180
182
 
181
183
  attach_function :unorm_normalize, "unorm_normalize#{suffix}", [:pointer, :int32_t, :normalization_mode, :int32_t, :pointer, :int32_t, :pointer], :int32_t
184
+
185
+ #
186
+ # Text Boundary Analysis
187
+ #
188
+
189
+ enum :iterator_type, [ :character, :word, :line, :sentence, :title]
190
+ enum :word_break, [ :none, 0,
191
+ :none_limit, 100,
192
+ :number, 100,
193
+ :number_limit, 200,
194
+ :letter, 200,
195
+ :letter_limit, 300,
196
+ :kana, 300,
197
+ :kana_limit, 400,
198
+ :ideo, 400,
199
+ :ideo_limit, 400
200
+ ]
201
+
202
+ attach_function :ubrk_countAvailable, "ubrk_countAvailable#{suffix}", [], :int32_t
203
+ attach_function :ubrk_getAvailable, "ubrk_getAvailable#{suffix}", [:int32_t], :string
204
+
205
+ attach_function :ubrk_open, "ubrk_open#{suffix}", [:iterator_type, :string, :pointer, :int32_t, :pointer], :pointer
206
+ attach_function :ubrk_close, "ubrk_close#{suffix}", [:pointer], :void
207
+ attach_function :ubrk_setText, "ubrk_setText#{suffix}", [:pointer, :pointer, :int32_t, :pointer], :void
208
+ attach_function :ubrk_current, "ubrk_current#{suffix}", [:pointer], :int32_t
209
+ attach_function :ubrk_next, "ubrk_next#{suffix}", [:pointer], :int32_t
210
+ attach_function :ubrk_previous, "ubrk_previous#{suffix}", [:pointer], :int32_t
211
+ attach_function :ubrk_first, "ubrk_first#{suffix}", [:pointer], :int32_t
212
+ attach_function :ubrk_last, "ubrk_last#{suffix}", [:pointer], :int32_t
182
213
  end # Lib
183
214
  end # ICU
@@ -2,27 +2,28 @@ module ICU
2
2
  module Normalization
3
3
 
4
4
  def self.normalize(input, mode = :default)
5
- input_length = ICU.ruby19? ? input.length : input.jlength
6
- needed_length = 0
7
- result_length = 0
5
+ input_length = input.unpack("U*").size
6
+ needed_length = out_length = options = 0
7
+ in_ptr = UCharPointer.from_string(input)
8
+ out_ptr = UCharPointer.new(out_length)
8
9
 
9
10
  retried = false
10
- ptr = nil
11
11
 
12
12
  begin
13
13
  Lib.check_error do |error|
14
- needed_length = Lib.unorm_normalize(UCharPointer.from_string(input), input_length, mode, 0, ptr, result_length, error)
14
+ needed_length = Lib.unorm_normalize(in_ptr, input_length, mode, options, out_ptr, out_length, error)
15
15
  end
16
16
  rescue BufferOverflowError
17
- raise if retried
18
- ptr = UCharPointer.from_string("\0" * needed_length)
19
- result_length = needed_length + 1
17
+ raise BufferOverflowError, "needed: #{needed_length}" if retried
18
+
19
+ out_ptr = out_ptr.resized_to needed_length
20
+ out_length = needed_length + 1
20
21
 
21
22
  retried = true
22
23
  retry
23
24
  end
24
25
 
25
- ptr.string if ptr
26
+ out_ptr.string
26
27
  end
27
28
 
28
29
  end # Normalization
@@ -2,8 +2,8 @@ module ICU
2
2
  module Transliteration
3
3
 
4
4
  class << self
5
- def transliterate(translit_id, str)
6
- t = Transliterator.new translit_id
5
+ def transliterate(translit_id, str, rules = nil)
6
+ t = Transliterator.new translit_id, rules
7
7
  res = t.transliterate str
8
8
  t.close
9
9
 
@@ -25,11 +25,22 @@ module ICU
25
25
 
26
26
  class Transliterator
27
27
 
28
- def initialize(id, direction = :forward)
29
- @parse_error = Lib::UParseError.new
30
- Lib.check_error do |status|
31
- # couldn't get utrans_openU to work properly, so using deprecated utrans_open for now
32
- @tr = Lib.utrans_open(id, direction, nil, 0, @parse_error, status)
28
+ def initialize(id, rules = nil, direction = :forward)
29
+ if rules
30
+ rules_length = rules.length + 1
31
+ rules = UCharPointer.from_string(rules)
32
+ else
33
+ rules_length = 0
34
+ end
35
+
36
+ parse_error = Lib::UParseError.new
37
+ begin
38
+ Lib.check_error do |status|
39
+ # couldn't get utrans_openU to work properly, so using deprecated utrans_open for now
40
+ @tr = Lib.utrans_open(id, direction, rules, rules_length, @parse_error, status)
41
+ end
42
+ rescue ICU::Error => ex
43
+ raise ex, "#{ex.message} (#{parse_error})"
33
44
  end
34
45
  end
35
46
 
data/lib/ffi-icu/uchar.rb CHANGED
@@ -8,15 +8,19 @@ module ICU
8
8
  str = str.encode("UTF-8") if str.respond_to? :encode
9
9
  bytes = str.unpack("U*")
10
10
 
11
- ptr = new UCHAR_TYPE, bytes.size
11
+ ptr = new bytes.size
12
12
  ptr.put_array_of_uint16 0, bytes
13
13
 
14
14
  ptr
15
15
  end
16
16
 
17
+ def initialize(size)
18
+ super UCHAR_TYPE, size
19
+ end
20
+
17
21
  def resized_to(new_size)
18
22
  raise "new_size must be larger than current size" if new_size < size
19
- resized = self.class.new UCHAR_TYPE, new_size
23
+ resized = self.class.new new_size
20
24
  resized.put_bytes(0, get_bytes(0, size))
21
25
 
22
26
  resized
@@ -0,0 +1,3 @@
1
+ module ICU
2
+ VERSION = "0.0.3"
3
+ end
@@ -0,0 +1,28 @@
1
+ # encoding: utf-8
2
+
3
+ require "spec_helper"
4
+
5
+ module ICU
6
+ describe BreakIterator do
7
+
8
+ it "should return available locales" do
9
+ locales = ICU::BreakIterator.available_locales
10
+ locales.should be_kind_of(Array)
11
+ locales.should_not be_empty
12
+ locales.should include("en_US")
13
+ end
14
+
15
+ it "finds all word boundaries in an English string" do
16
+ iterator = BreakIterator.new :word, "en_US"
17
+ iterator.text = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
18
+ iterator.to_a.should == [0, 5, 6, 11, 12, 17, 18, 21, 22, 26, 27, 28, 39, 40, 51, 52, 56, 57, 58, 61, 62, 64, 65, 72, 73, 79, 80, 90, 91, 93, 94, 100, 101, 103, 104, 110, 111, 116, 117, 123, 124]
19
+ end
20
+
21
+ it "finds all sentence boundaries in an English string" do
22
+ iterator = BreakIterator.new :sentence, "en_US"
23
+ iterator.text = "This is a sentence. This is another sentence, with a comma in it."
24
+ iterator.to_a.should == [0, 20, 65]
25
+ end
26
+
27
+ end # BreakIterator
28
+ end # ICU
@@ -17,6 +17,7 @@ module ICU
17
17
  locales = ICU::Collation.available_locales
18
18
  locales.should be_kind_of(Array)
19
19
  locales.should_not be_empty
20
+ locales.should include("nb")
20
21
  end
21
22
 
22
23
  it "should return the locale of the collator" do
data/spec/spec_helper.rb CHANGED
@@ -3,10 +3,9 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
3
 
4
4
  require "rubygems"
5
5
  require 'ffi-icu'
6
- require 'spec'
7
- require 'spec/autorun'
6
+ require 'rspec'
8
7
 
9
- Spec::Runner.configure do |config|
8
+ RSpec.configure do |config|
10
9
 
11
10
  end
12
11
 
@@ -29,5 +29,10 @@ module ICU
29
29
  ids.should be_kind_of(Array)
30
30
  ids.should_not be_empty
31
31
  end
32
+
33
+ # it "should transliterate custom rules" do
34
+ # ICU::Transliteration.translit("Accents-Any", "âêîôû", "NFD; [:Nonspacing Mark:] Remove; NFC").should == "aeiou"
35
+ # end
36
+
32
37
  end # Transliteration
33
38
  end # ICU
metadata CHANGED
@@ -1,13 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ffi-icu
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
5
- prerelease: false
6
- segments:
7
- - 0
8
- - 0
9
- - 2
10
- version: 0.0.2
4
+ prerelease:
5
+ version: 0.0.3
11
6
  platform: ruby
12
7
  authors:
13
8
  - Jari Bakken
@@ -15,8 +10,7 @@ autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
12
 
18
- date: 2010-05-29 00:00:00 +02:00
19
- default_executable:
13
+ date: 2010-08-23 00:00:00 Z
20
14
  dependencies:
21
15
  - !ruby/object:Gem::Dependency
22
16
  name: ffi
@@ -24,14 +18,9 @@ dependencies:
24
18
  requirement: &id001 !ruby/object:Gem::Requirement
25
19
  none: false
26
20
  requirements:
27
- - - ">="
21
+ - - ~>
28
22
  - !ruby/object:Gem::Version
29
- hash: 1
30
- segments:
31
- - 0
32
- - 6
33
- - 3
34
- version: 0.6.3
23
+ version: 1.0.9
35
24
  type: :runtime
36
25
  version_requirements: *id001
37
26
  - !ruby/object:Gem::Dependency
@@ -40,17 +29,12 @@ dependencies:
40
29
  requirement: &id002 !ruby/object:Gem::Requirement
41
30
  none: false
42
31
  requirements:
43
- - - ">="
32
+ - - ~>
44
33
  - !ruby/object:Gem::Version
45
- hash: 27
46
- segments:
47
- - 1
48
- - 3
49
- - 0
50
- version: 1.3.0
34
+ version: 2.5.0
51
35
  type: :development
52
36
  version_requirements: *id002
53
- description: Provides charset detection, locale sensitive collation and more.
37
+ description: Provides charset detection, locale sensitive collation and more. Depends on libicu.
54
38
  email: jari.bakken@gmail.com
55
39
  executables: []
56
40
 
@@ -62,19 +46,24 @@ extra_rdoc_files:
62
46
  files:
63
47
  - .document
64
48
  - .gitignore
49
+ - Gemfile
65
50
  - LICENSE
66
51
  - README.rdoc
67
52
  - Rakefile
68
- - VERSION
69
53
  - benchmark/detect.rb
70
54
  - benchmark/shared.rb
55
+ - ffi-icu.gemspec
71
56
  - lib/ffi-icu.rb
57
+ - lib/ffi-icu/break_iterator.rb
72
58
  - lib/ffi-icu/chardet.rb
73
59
  - lib/ffi-icu/collation.rb
60
+ - lib/ffi-icu/core_ext/string.rb
74
61
  - lib/ffi-icu/lib.rb
75
62
  - lib/ffi-icu/normalization.rb
76
63
  - lib/ffi-icu/transliteration.rb
77
64
  - lib/ffi-icu/uchar.rb
65
+ - lib/ffi-icu/version.rb
66
+ - spec/break_iterator_spec.rb
78
67
  - spec/chardet_spec.rb
79
68
  - spec/collation_spec.rb
80
69
  - spec/normalization_spec.rb
@@ -82,7 +71,6 @@ files:
82
71
  - spec/spec_helper.rb
83
72
  - spec/transliteration_spec.rb
84
73
  - test.c
85
- has_rdoc: true
86
74
  homepage: http://github.com/jarib/ffi-icu
87
75
  licenses: []
88
76
 
@@ -96,29 +84,25 @@ required_ruby_version: !ruby/object:Gem::Requirement
96
84
  requirements:
97
85
  - - ">="
98
86
  - !ruby/object:Gem::Version
99
- hash: 3
100
- segments:
101
- - 0
102
87
  version: "0"
103
88
  required_rubygems_version: !ruby/object:Gem::Requirement
104
89
  none: false
105
90
  requirements:
106
91
  - - ">="
107
92
  - !ruby/object:Gem::Version
108
- hash: 3
109
- segments:
110
- - 0
111
93
  version: "0"
112
94
  requirements: []
113
95
 
114
96
  rubyforge_project:
115
- rubygems_version: 1.3.7
97
+ rubygems_version: 1.8.2
116
98
  signing_key:
117
99
  specification_version: 3
118
100
  summary: Simple FFI wrappers for things I need from ICU.
119
101
  test_files:
102
+ - spec/break_iterator_spec.rb
120
103
  - spec/chardet_spec.rb
104
+ - spec/collation_spec.rb
121
105
  - spec/normalization_spec.rb
122
- - spec/transliteration_spec.rb
106
+ - spec/spec.opts
123
107
  - spec/spec_helper.rb
124
- - spec/collation_spec.rb
108
+ - spec/transliteration_spec.rb
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.0.2