ffi-icu 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -20,3 +20,4 @@ pkg
20
20
 
21
21
  ## PROJECT::SPECIFIC
22
22
  *.rbc
23
+ Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source :rubygems
2
+
3
+ gemspec
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2009 Jari Bakken
1
+ Copyright (c) 2010-2011 Jari Bakken
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.rdoc CHANGED
@@ -82,4 +82,4 @@ Rubies:
82
82
 
83
83
  == Copyright
84
84
 
85
- Copyright (c) 2010 Jari Bakken. See LICENSE for details.
85
+ Copyright (c) 2010-2011 Jari Bakken. See LICENSE for details.
data/Rakefile CHANGED
@@ -1,39 +1,20 @@
1
1
  require 'rubygems'
2
2
  require 'rake'
3
3
 
4
- begin
5
- require 'jeweler'
6
- Jeweler::Tasks.new do |gem|
7
- gem.name = "ffi-icu"
8
- gem.summary = %Q{Simple FFI wrappers for things I need from ICU.}
9
- gem.description = %Q{Provides charset detection, locale sensitive collation and more.}
10
- gem.email = "jari.bakken@gmail.com"
11
- gem.homepage = "http://github.com/jarib/ffi-icu"
12
- gem.authors = ["Jari Bakken"]
13
-
14
- gem.add_dependency "ffi", ">= 0.6.3"
15
- gem.add_development_dependency "rspec", ">= 1.3.0"
16
- # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
17
- end
4
+ require 'bundler'
5
+ Bundler::GemHelper.install_tasks
18
6
 
19
- Jeweler::GemcutterTasks.new
20
- rescue LoadError
21
- puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
22
- end
23
-
24
- require 'spec/rake/spectask'
25
- Spec::Rake::SpecTask.new(:spec) do |spec|
26
- spec.libs << 'lib' << 'spec'
27
- spec.spec_files = FileList['spec/**/*_spec.rb']
7
+ require 'rspec/core/rake_task'
8
+ RSpec::Core::RakeTask.new(:spec) do |spec|
9
+ spec.pattern = 'spec/**/*_spec.rb'
28
10
  end
29
11
 
30
- Spec::Rake::SpecTask.new(:rcov) do |spec|
31
- spec.libs << 'lib' << 'spec'
12
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
32
13
  spec.pattern = 'spec/**/*_spec.rb'
33
14
  spec.rcov = true
34
15
  end
35
16
 
36
- task :spec => :check_dependencies
17
+ task :spec
37
18
 
38
19
  task :default => :spec
39
20
 
data/ffi-icu.gemspec ADDED
@@ -0,0 +1,33 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ require File.expand_path("../lib/ffi-icu/version", __FILE__)
7
+
8
+ Gem::Specification.new do |s|
9
+ s.name = %q{ffi-icu}
10
+ s.version = ICU::VERSION
11
+
12
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
13
+ s.authors = ["Jari Bakken"]
14
+ s.date = %q{2010-08-23}
15
+ s.description = %q{Provides charset detection, locale sensitive collation and more. Depends on libicu.}
16
+ s.email = %q{jari.bakken@gmail.com}
17
+ s.extra_rdoc_files = [
18
+ "LICENSE",
19
+ "README.rdoc"
20
+ ]
21
+ s.files = `git ls-files`.split("\n")
22
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
23
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
24
+ s.require_paths = ["lib"]
25
+
26
+ s.homepage = %q{http://github.com/jarib/ffi-icu}
27
+ s.rdoc_options = ["--charset=UTF-8"]
28
+ s.summary = %q{Simple FFI wrappers for things I need from ICU.}
29
+
30
+ s.add_runtime_dependency(%q<ffi>, ["~> 1.0.9"])
31
+ s.add_development_dependency(%q<rspec>, ["~> 2.5.0"])
32
+ end
33
+
data/lib/ffi-icu.rb CHANGED
@@ -20,12 +20,14 @@ module ICU
20
20
  end
21
21
  end
22
22
 
23
+ require "ffi-icu/core_ext/string"
23
24
  require "ffi-icu/lib"
24
25
  require "ffi-icu/uchar"
25
26
  require "ffi-icu/chardet"
26
27
  require "ffi-icu/collation"
27
28
  require "ffi-icu/transliteration"
28
29
  require "ffi-icu/normalization"
30
+ require "ffi-icu/break_iterator"
29
31
 
30
32
  unless ICU.ruby19?
31
33
  require 'jcode'
@@ -0,0 +1,67 @@
1
+ module ICU
2
+ class BreakIterator
3
+ include Enumerable
4
+
5
+ UBRK_DONE = -1
6
+
7
+ def self.available_locales
8
+ (0...Lib.ubrk_countAvailable).map do |idx|
9
+ Lib.ubrk_getAvailable idx
10
+ end
11
+ end
12
+
13
+ def initialize(type, locale)
14
+ ptr = Lib.check_error { |err| Lib.ubrk_open(type, locale, nil, 0, err) }
15
+
16
+ @iterator = FFI::AutoPointer.new(ptr, Lib.method(:ubrk_close))
17
+ end
18
+
19
+ def text=(str)
20
+ Lib.check_error { |err|
21
+ Lib.ubrk_setText @iterator, UCharPointer.from_string(str), str.length, err
22
+ }
23
+ end
24
+
25
+ def each(&blk)
26
+ int = first
27
+
28
+ while int != UBRK_DONE
29
+ yield int
30
+ int = self.next
31
+ end
32
+ end
33
+
34
+ def next
35
+ Lib.ubrk_next @iterator
36
+ end
37
+
38
+ def previous
39
+ Lib.ubrk_next @iterator
40
+ end
41
+
42
+ def first
43
+ Lib.ubrk_first @iterator
44
+ end
45
+
46
+ def last
47
+ Lib.ubrk_last @iterator
48
+ end
49
+
50
+ def preceding
51
+ Lib.ubrk_preceding @iterator
52
+ end
53
+
54
+ def following
55
+ Lib.ubrk_following @iterator
56
+ end
57
+
58
+ def current
59
+ Lib.ubrk_current @iterator
60
+ end
61
+
62
+ def boundary?(index)
63
+ Lib.ubrk_isBoundary(@iterator, Integer(index)) != 0
64
+ end
65
+
66
+ end # BreakIterator
67
+ end # ICU
@@ -0,0 +1,5 @@
1
+ class String
2
+ unless method_defined?(:bytesize)
3
+ alias_method :bytesize, :length
4
+ end
5
+ end
data/lib/ffi-icu/lib.rb CHANGED
@@ -20,7 +20,7 @@ module ICU
20
20
  # let the user tell us where the lib is
21
21
  if ENV['FFI_ICU_LIB']
22
22
  libs = ENV['FFI_ICU_LIB'].split(",")
23
- ffi_lib *libs
23
+ ffi_lib(*libs)
24
24
 
25
25
  if ENV['FFI_ICU_VERSION_SUFFIX']
26
26
  return ENV['FFI_ICU_VERSION_SUFFIX']
@@ -145,7 +145,9 @@ module ICU
145
145
  :pre_context, :pointer,
146
146
  :post_context, :pointer
147
147
 
148
-
148
+ def to_s
149
+ "#<%s:%x line: %d offset: %d" % [self.class, hash*2, self[:line], self[:offset]]
150
+ end
149
151
  end
150
152
 
151
153
  class UTransPosition < FFI::Struct
@@ -179,5 +181,34 @@ module ICU
179
181
  ]
180
182
 
181
183
  attach_function :unorm_normalize, "unorm_normalize#{suffix}", [:pointer, :int32_t, :normalization_mode, :int32_t, :pointer, :int32_t, :pointer], :int32_t
184
+
185
+ #
186
+ # Text Boundary Analysis
187
+ #
188
+
189
+ enum :iterator_type, [ :character, :word, :line, :sentence, :title]
190
+ enum :word_break, [ :none, 0,
191
+ :none_limit, 100,
192
+ :number, 100,
193
+ :number_limit, 200,
194
+ :letter, 200,
195
+ :letter_limit, 300,
196
+ :kana, 300,
197
+ :kana_limit, 400,
198
+ :ideo, 400,
199
+ :ideo_limit, 400
200
+ ]
201
+
202
+ attach_function :ubrk_countAvailable, "ubrk_countAvailable#{suffix}", [], :int32_t
203
+ attach_function :ubrk_getAvailable, "ubrk_getAvailable#{suffix}", [:int32_t], :string
204
+
205
+ attach_function :ubrk_open, "ubrk_open#{suffix}", [:iterator_type, :string, :pointer, :int32_t, :pointer], :pointer
206
+ attach_function :ubrk_close, "ubrk_close#{suffix}", [:pointer], :void
207
+ attach_function :ubrk_setText, "ubrk_setText#{suffix}", [:pointer, :pointer, :int32_t, :pointer], :void
208
+ attach_function :ubrk_current, "ubrk_current#{suffix}", [:pointer], :int32_t
209
+ attach_function :ubrk_next, "ubrk_next#{suffix}", [:pointer], :int32_t
210
+ attach_function :ubrk_previous, "ubrk_previous#{suffix}", [:pointer], :int32_t
211
+ attach_function :ubrk_first, "ubrk_first#{suffix}", [:pointer], :int32_t
212
+ attach_function :ubrk_last, "ubrk_last#{suffix}", [:pointer], :int32_t
182
213
  end # Lib
183
214
  end # ICU
@@ -2,27 +2,28 @@ module ICU
2
2
  module Normalization
3
3
 
4
4
  def self.normalize(input, mode = :default)
5
- input_length = ICU.ruby19? ? input.length : input.jlength
6
- needed_length = 0
7
- result_length = 0
5
+ input_length = input.unpack("U*").size
6
+ needed_length = out_length = options = 0
7
+ in_ptr = UCharPointer.from_string(input)
8
+ out_ptr = UCharPointer.new(out_length)
8
9
 
9
10
  retried = false
10
- ptr = nil
11
11
 
12
12
  begin
13
13
  Lib.check_error do |error|
14
- needed_length = Lib.unorm_normalize(UCharPointer.from_string(input), input_length, mode, 0, ptr, result_length, error)
14
+ needed_length = Lib.unorm_normalize(in_ptr, input_length, mode, options, out_ptr, out_length, error)
15
15
  end
16
16
  rescue BufferOverflowError
17
- raise if retried
18
- ptr = UCharPointer.from_string("\0" * needed_length)
19
- result_length = needed_length + 1
17
+ raise BufferOverflowError, "needed: #{needed_length}" if retried
18
+
19
+ out_ptr = out_ptr.resized_to needed_length
20
+ out_length = needed_length + 1
20
21
 
21
22
  retried = true
22
23
  retry
23
24
  end
24
25
 
25
- ptr.string if ptr
26
+ out_ptr.string
26
27
  end
27
28
 
28
29
  end # Normalization
@@ -2,8 +2,8 @@ module ICU
2
2
  module Transliteration
3
3
 
4
4
  class << self
5
- def transliterate(translit_id, str)
6
- t = Transliterator.new translit_id
5
+ def transliterate(translit_id, str, rules = nil)
6
+ t = Transliterator.new translit_id, rules
7
7
  res = t.transliterate str
8
8
  t.close
9
9
 
@@ -25,11 +25,22 @@ module ICU
25
25
 
26
26
  class Transliterator
27
27
 
28
- def initialize(id, direction = :forward)
29
- @parse_error = Lib::UParseError.new
30
- Lib.check_error do |status|
31
- # couldn't get utrans_openU to work properly, so using deprecated utrans_open for now
32
- @tr = Lib.utrans_open(id, direction, nil, 0, @parse_error, status)
28
+ def initialize(id, rules = nil, direction = :forward)
29
+ if rules
30
+ rules_length = rules.length + 1
31
+ rules = UCharPointer.from_string(rules)
32
+ else
33
+ rules_length = 0
34
+ end
35
+
36
+ parse_error = Lib::UParseError.new
37
+ begin
38
+ Lib.check_error do |status|
39
+ # couldn't get utrans_openU to work properly, so using deprecated utrans_open for now
40
+ @tr = Lib.utrans_open(id, direction, rules, rules_length, @parse_error, status)
41
+ end
42
+ rescue ICU::Error => ex
43
+ raise ex, "#{ex.message} (#{parse_error})"
33
44
  end
34
45
  end
35
46
 
data/lib/ffi-icu/uchar.rb CHANGED
@@ -8,15 +8,19 @@ module ICU
8
8
  str = str.encode("UTF-8") if str.respond_to? :encode
9
9
  bytes = str.unpack("U*")
10
10
 
11
- ptr = new UCHAR_TYPE, bytes.size
11
+ ptr = new bytes.size
12
12
  ptr.put_array_of_uint16 0, bytes
13
13
 
14
14
  ptr
15
15
  end
16
16
 
17
+ def initialize(size)
18
+ super UCHAR_TYPE, size
19
+ end
20
+
17
21
  def resized_to(new_size)
18
22
  raise "new_size must be larger than current size" if new_size < size
19
- resized = self.class.new UCHAR_TYPE, new_size
23
+ resized = self.class.new new_size
20
24
  resized.put_bytes(0, get_bytes(0, size))
21
25
 
22
26
  resized
@@ -0,0 +1,3 @@
1
+ module ICU
2
+ VERSION = "0.0.3"
3
+ end
@@ -0,0 +1,28 @@
1
+ # encoding: utf-8
2
+
3
+ require "spec_helper"
4
+
5
+ module ICU
6
+ describe BreakIterator do
7
+
8
+ it "should return available locales" do
9
+ locales = ICU::BreakIterator.available_locales
10
+ locales.should be_kind_of(Array)
11
+ locales.should_not be_empty
12
+ locales.should include("en_US")
13
+ end
14
+
15
+ it "finds all word boundaries in an English string" do
16
+ iterator = BreakIterator.new :word, "en_US"
17
+ iterator.text = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
18
+ iterator.to_a.should == [0, 5, 6, 11, 12, 17, 18, 21, 22, 26, 27, 28, 39, 40, 51, 52, 56, 57, 58, 61, 62, 64, 65, 72, 73, 79, 80, 90, 91, 93, 94, 100, 101, 103, 104, 110, 111, 116, 117, 123, 124]
19
+ end
20
+
21
+ it "finds all sentence boundaries in an English string" do
22
+ iterator = BreakIterator.new :sentence, "en_US"
23
+ iterator.text = "This is a sentence. This is another sentence, with a comma in it."
24
+ iterator.to_a.should == [0, 20, 65]
25
+ end
26
+
27
+ end # BreakIterator
28
+ end # ICU
@@ -17,6 +17,7 @@ module ICU
17
17
  locales = ICU::Collation.available_locales
18
18
  locales.should be_kind_of(Array)
19
19
  locales.should_not be_empty
20
+ locales.should include("nb")
20
21
  end
21
22
 
22
23
  it "should return the locale of the collator" do
data/spec/spec_helper.rb CHANGED
@@ -3,10 +3,9 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
3
 
4
4
  require "rubygems"
5
5
  require 'ffi-icu'
6
- require 'spec'
7
- require 'spec/autorun'
6
+ require 'rspec'
8
7
 
9
- Spec::Runner.configure do |config|
8
+ RSpec.configure do |config|
10
9
 
11
10
  end
12
11
 
@@ -29,5 +29,10 @@ module ICU
29
29
  ids.should be_kind_of(Array)
30
30
  ids.should_not be_empty
31
31
  end
32
+
33
+ # it "should transliterate custom rules" do
34
+ # ICU::Transliteration.translit("Accents-Any", "âêîôû", "NFD; [:Nonspacing Mark:] Remove; NFC").should == "aeiou"
35
+ # end
36
+
32
37
  end # Transliteration
33
38
  end # ICU
metadata CHANGED
@@ -1,13 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ffi-icu
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
5
- prerelease: false
6
- segments:
7
- - 0
8
- - 0
9
- - 2
10
- version: 0.0.2
4
+ prerelease:
5
+ version: 0.0.3
11
6
  platform: ruby
12
7
  authors:
13
8
  - Jari Bakken
@@ -15,8 +10,7 @@ autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
12
 
18
- date: 2010-05-29 00:00:00 +02:00
19
- default_executable:
13
+ date: 2010-08-23 00:00:00 Z
20
14
  dependencies:
21
15
  - !ruby/object:Gem::Dependency
22
16
  name: ffi
@@ -24,14 +18,9 @@ dependencies:
24
18
  requirement: &id001 !ruby/object:Gem::Requirement
25
19
  none: false
26
20
  requirements:
27
- - - ">="
21
+ - - ~>
28
22
  - !ruby/object:Gem::Version
29
- hash: 1
30
- segments:
31
- - 0
32
- - 6
33
- - 3
34
- version: 0.6.3
23
+ version: 1.0.9
35
24
  type: :runtime
36
25
  version_requirements: *id001
37
26
  - !ruby/object:Gem::Dependency
@@ -40,17 +29,12 @@ dependencies:
40
29
  requirement: &id002 !ruby/object:Gem::Requirement
41
30
  none: false
42
31
  requirements:
43
- - - ">="
32
+ - - ~>
44
33
  - !ruby/object:Gem::Version
45
- hash: 27
46
- segments:
47
- - 1
48
- - 3
49
- - 0
50
- version: 1.3.0
34
+ version: 2.5.0
51
35
  type: :development
52
36
  version_requirements: *id002
53
- description: Provides charset detection, locale sensitive collation and more.
37
+ description: Provides charset detection, locale sensitive collation and more. Depends on libicu.
54
38
  email: jari.bakken@gmail.com
55
39
  executables: []
56
40
 
@@ -62,19 +46,24 @@ extra_rdoc_files:
62
46
  files:
63
47
  - .document
64
48
  - .gitignore
49
+ - Gemfile
65
50
  - LICENSE
66
51
  - README.rdoc
67
52
  - Rakefile
68
- - VERSION
69
53
  - benchmark/detect.rb
70
54
  - benchmark/shared.rb
55
+ - ffi-icu.gemspec
71
56
  - lib/ffi-icu.rb
57
+ - lib/ffi-icu/break_iterator.rb
72
58
  - lib/ffi-icu/chardet.rb
73
59
  - lib/ffi-icu/collation.rb
60
+ - lib/ffi-icu/core_ext/string.rb
74
61
  - lib/ffi-icu/lib.rb
75
62
  - lib/ffi-icu/normalization.rb
76
63
  - lib/ffi-icu/transliteration.rb
77
64
  - lib/ffi-icu/uchar.rb
65
+ - lib/ffi-icu/version.rb
66
+ - spec/break_iterator_spec.rb
78
67
  - spec/chardet_spec.rb
79
68
  - spec/collation_spec.rb
80
69
  - spec/normalization_spec.rb
@@ -82,7 +71,6 @@ files:
82
71
  - spec/spec_helper.rb
83
72
  - spec/transliteration_spec.rb
84
73
  - test.c
85
- has_rdoc: true
86
74
  homepage: http://github.com/jarib/ffi-icu
87
75
  licenses: []
88
76
 
@@ -96,29 +84,25 @@ required_ruby_version: !ruby/object:Gem::Requirement
96
84
  requirements:
97
85
  - - ">="
98
86
  - !ruby/object:Gem::Version
99
- hash: 3
100
- segments:
101
- - 0
102
87
  version: "0"
103
88
  required_rubygems_version: !ruby/object:Gem::Requirement
104
89
  none: false
105
90
  requirements:
106
91
  - - ">="
107
92
  - !ruby/object:Gem::Version
108
- hash: 3
109
- segments:
110
- - 0
111
93
  version: "0"
112
94
  requirements: []
113
95
 
114
96
  rubyforge_project:
115
- rubygems_version: 1.3.7
97
+ rubygems_version: 1.8.2
116
98
  signing_key:
117
99
  specification_version: 3
118
100
  summary: Simple FFI wrappers for things I need from ICU.
119
101
  test_files:
102
+ - spec/break_iterator_spec.rb
120
103
  - spec/chardet_spec.rb
104
+ - spec/collation_spec.rb
121
105
  - spec/normalization_spec.rb
122
- - spec/transliteration_spec.rb
106
+ - spec/spec.opts
123
107
  - spec/spec_helper.rb
124
- - spec/collation_spec.rb
108
+ - spec/transliteration_spec.rb
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.0.2