ffi-icu 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/README.rdoc +71 -0
- data/Rakefile +48 -0
- data/VERSION +1 -0
- data/benchmark/detect.rb +14 -0
- data/benchmark/shared.rb +17 -0
- data/lib/ffi-icu.rb +23 -0
- data/lib/ffi-icu/chardet.rb +90 -0
- data/lib/ffi-icu/collation.rb +78 -0
- data/lib/ffi-icu/lib.rb +112 -0
- data/lib/ffi-icu/uchar.rb +11 -0
- data/spec/chardet_spec.rb +39 -0
- data/spec/collation_spec.rb +51 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +12 -0
- metadata +108 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Jari Bakken
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
= ffi-icu
|
2
|
+
|
3
|
+
Simple FFI wrappers for things I need from ICU.
|
4
|
+
|
5
|
+
= Dependencies
|
6
|
+
|
7
|
+
ICU - you might need to hack the ffi_lib call to make it work. Please send a patch if you do!
|
8
|
+
|
9
|
+
= Features
|
10
|
+
|
11
|
+
== Character Encoding Detection
|
12
|
+
|
13
|
+
=== Examples:
|
14
|
+
|
15
|
+
match = ICU::CharDet.detect(str)
|
16
|
+
match.name # => "UTF-8"
|
17
|
+
match.confidence # => 80
|
18
|
+
|
19
|
+
or
|
20
|
+
|
21
|
+
detector = ICU::CharDet::Detector.new
|
22
|
+
detector.detect(str)
|
23
|
+
detector.close
|
24
|
+
|
25
|
+
=== Why not just use rchardet?
|
26
|
+
|
27
|
+
* this is faster
|
28
|
+
* rchardet does not work well on 1.9
|
29
|
+
* none of the rchardet forks claiming to work on 1.9 actually does
|
30
|
+
|
31
|
+
== Locale Sensitive Collation
|
32
|
+
|
33
|
+
=== Examples:
|
34
|
+
|
35
|
+
ICU::Collation.collate("nb", %w[å æ ø]) == %w[æ ø å] #=> true
|
36
|
+
|
37
|
+
or
|
38
|
+
|
39
|
+
collator = ICU::Collation::Collator.new("nb")
|
40
|
+
collator.compare("a", "b") #=> -1
|
41
|
+
collator.greater?("z", "a") #=> true
|
42
|
+
collator.collate(%w[å æ ø]) #=> ["æ", "ø", "å"]
|
43
|
+
|
44
|
+
= Tested on:
|
45
|
+
|
46
|
+
Platforms:
|
47
|
+
|
48
|
+
* OS X 10.6
|
49
|
+
* Debian Linux
|
50
|
+
* Arch Linux
|
51
|
+
|
52
|
+
Rubies:
|
53
|
+
|
54
|
+
* MRI 1.9.1
|
55
|
+
* MRI 1.8.7
|
56
|
+
|
57
|
+
YMMV.
|
58
|
+
|
59
|
+
== Note on Patches/Pull Requests
|
60
|
+
|
61
|
+
* Fork the project.
|
62
|
+
* Make your feature addition or bug fix.
|
63
|
+
* Add tests for it. This is important so I don't break it in a
|
64
|
+
future version unintentionally.
|
65
|
+
* Commit, do not mess with rakefile, version, or history.
|
66
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
67
|
+
* Send me a pull request. Bonus points for topic branches.
|
68
|
+
|
69
|
+
== Copyright
|
70
|
+
|
71
|
+
Copyright (c) 2010 Jari Bakken. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "ffi-icu"
|
8
|
+
gem.summary = %Q{Simple FFI wrappers for things I need from ICU.}
|
9
|
+
gem.description = %Q{Provides charset detection, locale sensitive collation and more.}
|
10
|
+
gem.email = "jari.bakken@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/jarib/ffi-icu"
|
12
|
+
gem.authors = ["Jari Bakken"]
|
13
|
+
|
14
|
+
gem.add_dependency "ffi", ">= 0.6.3"
|
15
|
+
gem.add_development_dependency "rspec", ">= 1.3.0"
|
16
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
17
|
+
end
|
18
|
+
|
19
|
+
Jeweler::GemcutterTasks.new
|
20
|
+
rescue LoadError
|
21
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
22
|
+
end
|
23
|
+
|
24
|
+
require 'spec/rake/spectask'
|
25
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
26
|
+
spec.libs << 'lib' << 'spec'
|
27
|
+
spec.spec_files = FileList['spec/**/*_spec.rb']
|
28
|
+
end
|
29
|
+
|
30
|
+
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
31
|
+
spec.libs << 'lib' << 'spec'
|
32
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
33
|
+
spec.rcov = true
|
34
|
+
end
|
35
|
+
|
36
|
+
task :spec => :check_dependencies
|
37
|
+
|
38
|
+
task :default => :spec
|
39
|
+
|
40
|
+
begin
|
41
|
+
require 'yard'
|
42
|
+
YARD::Rake::YardocTask.new
|
43
|
+
rescue LoadError
|
44
|
+
task :yardoc do
|
45
|
+
abort "YARD is not available. In order to run yardoc, you must: sudo gem install yard"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
data/benchmark/detect.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require "benchmark"
|
4
|
+
|
5
|
+
$LOAD_PATH.unshift "lib"
|
6
|
+
require "ffi-icu"
|
7
|
+
require "rchardet"
|
8
|
+
|
9
|
+
TESTS = 1000
|
10
|
+
|
11
|
+
Benchmark.bmbm do |results|
|
12
|
+
results.report("rchardet:") { TESTS.times { CharDet.detect("æåø") } }
|
13
|
+
results.report("ffi-icu:") { TESTS.times { ICU::CharDet.detect("æåø") } }
|
14
|
+
end
|
data/benchmark/shared.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require "benchmark"
|
4
|
+
|
5
|
+
$LOAD_PATH.unshift "lib"
|
6
|
+
require "ffi-icu"
|
7
|
+
require "rchardet"
|
8
|
+
|
9
|
+
TESTS = 1000
|
10
|
+
|
11
|
+
$rchardet = CharDet::UniversalDetector.new
|
12
|
+
$icu = ICU::CharDet::Detector.new
|
13
|
+
|
14
|
+
Benchmark.bmbm do |results|
|
15
|
+
results.report("rchardet instance:") { TESTS.times { $rchardet.reset; $rchardet.feed("æåø"); $rchardet.result } }
|
16
|
+
results.report("ffi-icu instance:") { TESTS.times { $icu.detect("æåø") } }
|
17
|
+
end
|
data/lib/ffi-icu.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require "rbconfig"
|
2
|
+
require "ffi"
|
3
|
+
|
4
|
+
module ICU
|
5
|
+
def self.platform
|
6
|
+
os = RbConfig::CONFIG["host_os"]
|
7
|
+
|
8
|
+
case os
|
9
|
+
when /darwin/
|
10
|
+
:osx
|
11
|
+
when /linux/
|
12
|
+
:linux
|
13
|
+
else
|
14
|
+
os
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
require "ffi-icu/lib"
|
20
|
+
require "ffi-icu/uchar"
|
21
|
+
require "ffi-icu/chardet"
|
22
|
+
require "ffi-icu/collation"
|
23
|
+
|
@@ -0,0 +1,90 @@
|
|
1
|
+
module ICU
|
2
|
+
module CharDet
|
3
|
+
|
4
|
+
def self.detect(string)
|
5
|
+
detector = Detector.new
|
6
|
+
res = detector.detect string
|
7
|
+
detector.close
|
8
|
+
|
9
|
+
res
|
10
|
+
end
|
11
|
+
|
12
|
+
class Detector
|
13
|
+
Match = Struct.new(:name, :confidence, :language)
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
@detector = Lib.check_error { |ptr| Lib.ucsdet_open(ptr) }
|
17
|
+
end
|
18
|
+
|
19
|
+
def input_filter_enabled?
|
20
|
+
Lib.ucsdet_isInputFilterEnabled @detector
|
21
|
+
end
|
22
|
+
|
23
|
+
def input_filter_enabled=(bool)
|
24
|
+
Lib.ucsdet_enableInputFilter(@detector, !!bool)
|
25
|
+
end
|
26
|
+
|
27
|
+
def declared_encoding=(str)
|
28
|
+
Lib.check_error do |ptr|
|
29
|
+
Lib.ucsdet_setDeclaredEncoding(@detector, str, str.bytesize, ptr)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def close
|
34
|
+
Lib.ucsdet_close @detector
|
35
|
+
end
|
36
|
+
|
37
|
+
def detect(str)
|
38
|
+
set_text(str)
|
39
|
+
|
40
|
+
match_ptr = Lib.check_error { |ptr| Lib.ucsdet_detect(@detector, ptr) }
|
41
|
+
match_ptr_to_ruby(match_ptr) unless match_ptr.null?
|
42
|
+
end
|
43
|
+
|
44
|
+
def detect_all(str)
|
45
|
+
set_text(str)
|
46
|
+
|
47
|
+
matches_found_ptr = FFI::MemoryPointer.new :int32
|
48
|
+
array_ptr = Lib.check_error do |status|
|
49
|
+
Lib.ucsdet_detectAll(@detector, matches_found_ptr, status)
|
50
|
+
end
|
51
|
+
|
52
|
+
length = matches_found_ptr.read_int
|
53
|
+
array_ptr.read_array_of_pointer(length).map do |match|
|
54
|
+
match_ptr_to_ruby(match)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def detectable_charsets
|
59
|
+
enum_ptr = Lib.check_error do |ptr|
|
60
|
+
Lib.ucsdet_getAllDetectableCharsets(@detector, ptr)
|
61
|
+
end
|
62
|
+
|
63
|
+
result = Lib.enum_ptr_to_array(enum_ptr)
|
64
|
+
Lib.uenum_close(enum_ptr)
|
65
|
+
|
66
|
+
result
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
def match_ptr_to_ruby(match_ptr)
|
72
|
+
result = Match.new
|
73
|
+
|
74
|
+
result.name = Lib.check_error { |ptr| Lib.ucsdet_getName(match_ptr, ptr) }
|
75
|
+
result.confidence = Lib.check_error { |ptr| Lib.ucsdet_getConfidence(match_ptr, ptr) }
|
76
|
+
result.language = Lib.check_error { |ptr| Lib.ucsdet_getLanguage(match_ptr, ptr) }
|
77
|
+
|
78
|
+
result
|
79
|
+
end
|
80
|
+
|
81
|
+
def set_text(text)
|
82
|
+
Lib.check_error do |status|
|
83
|
+
Lib.ucsdet_setText(@detector, text, text.bytesize, status)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
end # Detector
|
88
|
+
end # CharDet
|
89
|
+
end # ICU
|
90
|
+
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module ICU
|
2
|
+
module Collation
|
3
|
+
|
4
|
+
def self.collate(locale, arr)
|
5
|
+
collator = Collator.new(locale)
|
6
|
+
res = collator.collate(arr)
|
7
|
+
collator.close
|
8
|
+
|
9
|
+
res
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.keywords
|
13
|
+
enum_ptr = Lib.check_error { |error| Lib.ucol_getKeywords(error) }
|
14
|
+
keywords = Lib.enum_ptr_to_array(enum_ptr)
|
15
|
+
Lib.uenum_close enum_ptr
|
16
|
+
|
17
|
+
hash = {}
|
18
|
+
keywords.each do |keyword|
|
19
|
+
enum_ptr = Lib.check_error { |error| Lib.ucol_getKeywordValues(keyword, error) }
|
20
|
+
hash[keyword] = Lib.enum_ptr_to_array(enum_ptr)
|
21
|
+
Lib.uenum_close(enum_ptr)
|
22
|
+
end
|
23
|
+
|
24
|
+
hash
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.available_locales
|
28
|
+
(0...Lib.ucol_countAvailable).map do |idx|
|
29
|
+
Lib.ucol_getAvailable idx
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
class Collator
|
34
|
+
ULOC_VALID_LOCALE = 1
|
35
|
+
|
36
|
+
def initialize(locale)
|
37
|
+
@c = Lib.check_error { |error| Lib.ucol_open(locale, error) }
|
38
|
+
end
|
39
|
+
|
40
|
+
def locale
|
41
|
+
Lib.check_error { |error| Lib.ucol_getLocale(@c, ULOC_VALID_LOCALE, error) }
|
42
|
+
end
|
43
|
+
|
44
|
+
def compare(a, b)
|
45
|
+
Lib.ucol_strcoll(
|
46
|
+
@c,
|
47
|
+
UCharPointer.from_string(a), a.length,
|
48
|
+
UCharPointer.from_string(b), b.length
|
49
|
+
)
|
50
|
+
end
|
51
|
+
|
52
|
+
def greater?(a, b)
|
53
|
+
Lib.ucol_greater(@c, UCharPointer.from_string(a), a.length,
|
54
|
+
UCharPointer.from_string(b), b.length)
|
55
|
+
end
|
56
|
+
|
57
|
+
def greater_or_equal?(a, b)
|
58
|
+
Lib.ucol_greaterOrEqual(@c, UCharPointer.from_string(a), a.length,
|
59
|
+
UCharPointer.from_string(b), b.length)
|
60
|
+
end
|
61
|
+
|
62
|
+
# can't override Object#equal? - suggestions welcome
|
63
|
+
def same?(a, b)
|
64
|
+
Lib.ucol_equal(@c, UCharPointer.from_string(a), a.length,
|
65
|
+
UCharPointer.from_string(b), b.length)
|
66
|
+
end
|
67
|
+
|
68
|
+
def collate(array)
|
69
|
+
array.sort { |a,b| compare a, b }
|
70
|
+
end
|
71
|
+
|
72
|
+
def close
|
73
|
+
Lib.ucol_close(@c)
|
74
|
+
end
|
75
|
+
end # Collator
|
76
|
+
|
77
|
+
end # Collate
|
78
|
+
end # ICU
|
data/lib/ffi-icu/lib.rb
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
module ICU
|
2
|
+
class Error < StandardError
|
3
|
+
end
|
4
|
+
|
5
|
+
module Lib
|
6
|
+
extend FFI::Library
|
7
|
+
|
8
|
+
VERSIONS = {
|
9
|
+
"42" => "_4_2",
|
10
|
+
"44" => "_44"
|
11
|
+
}
|
12
|
+
|
13
|
+
# FIXME: this is incredibly ugly, figure out some better way
|
14
|
+
def self.find_icu
|
15
|
+
suffix = ''
|
16
|
+
|
17
|
+
case ICU.platform
|
18
|
+
when :osx
|
19
|
+
ffi_lib "icucore"
|
20
|
+
when :linux
|
21
|
+
versions = VERSIONS.keys
|
22
|
+
libs = ffi_lib versions.map { |v| "libicui18n.so.#{v}"},
|
23
|
+
versions.map { |v| "libicutu.so.#{v}"}
|
24
|
+
|
25
|
+
VERSIONS.find do |so_version, func_version|
|
26
|
+
if libs.first.name =~ /#{so_version}$/
|
27
|
+
suffix = func_version
|
28
|
+
end
|
29
|
+
end
|
30
|
+
else
|
31
|
+
raise "no idea how to load ICU on #{ICU.platform}, patches appreciated!"
|
32
|
+
end
|
33
|
+
|
34
|
+
suffix
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.check_error
|
38
|
+
ptr = FFI::MemoryPointer.new(:int)
|
39
|
+
ret = yield(ptr)
|
40
|
+
error_code = ptr.read_int
|
41
|
+
|
42
|
+
if error_code > 0
|
43
|
+
raise Error, "#{Lib.u_errorName error_code}"
|
44
|
+
elsif error_code < 0
|
45
|
+
warn "ffi-icu: #{Lib.u_errorName error_code}"
|
46
|
+
end
|
47
|
+
|
48
|
+
ret
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.enum_ptr_to_array(enum_ptr)
|
52
|
+
length = Lib.check_error do |status|
|
53
|
+
Lib.uenum_count(enum_ptr, status)
|
54
|
+
end
|
55
|
+
|
56
|
+
(0...length).map do |idx|
|
57
|
+
Lib.check_error { |st| Lib.uenum_next(enum_ptr, nil, st) }
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.not_available(func_name)
|
62
|
+
self.class.send :define_method, func_name do |*args|
|
63
|
+
raise Error, "#{func_name} not available on platform #{ICU.platform.inspect}"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
suffix = find_icu()
|
69
|
+
|
70
|
+
attach_function :u_errorName, "u_errorName#{suffix}", [:int], :string
|
71
|
+
attach_function :uenum_count, "uenum_count#{suffix}", [:pointer, :pointer], :int
|
72
|
+
attach_function :uenum_close, "uenum_close#{suffix}", [:pointer], :void
|
73
|
+
attach_function :uenum_next, "uenum_next#{suffix}", [:pointer, :pointer, :pointer], :string
|
74
|
+
|
75
|
+
|
76
|
+
# CharDet
|
77
|
+
#
|
78
|
+
# http://icu-project.org/apiref/icu4c/ucsdet_8h.html
|
79
|
+
#
|
80
|
+
|
81
|
+
attach_function :ucsdet_open, "ucsdet_open#{suffix}", [:pointer], :pointer
|
82
|
+
attach_function :ucsdet_close, "ucsdet_close#{suffix}", [:pointer], :void
|
83
|
+
attach_function :ucsdet_setText, "ucsdet_setText#{suffix}", [:pointer, :string, :int32, :pointer], :void
|
84
|
+
attach_function :ucsdet_setDeclaredEncoding, "ucsdet_setDeclaredEncoding#{suffix}", [:pointer, :string, :int32, :pointer], :void
|
85
|
+
attach_function :ucsdet_detect, "ucsdet_detect#{suffix}", [:pointer, :pointer], :pointer
|
86
|
+
attach_function :ucsdet_detectAll, "ucsdet_detectAll#{suffix}", [:pointer, :pointer, :pointer], :pointer
|
87
|
+
attach_function :ucsdet_getName, "ucsdet_getName#{suffix}", [:pointer, :pointer], :string
|
88
|
+
attach_function :ucsdet_getConfidence, "ucsdet_getConfidence#{suffix}", [:pointer, :pointer], :int32
|
89
|
+
attach_function :ucsdet_getLanguage, "ucsdet_getLanguage#{suffix}", [:pointer, :pointer], :string
|
90
|
+
attach_function :ucsdet_getAllDetectableCharsets, "ucsdet_getAllDetectableCharsets#{suffix}", [:pointer, :pointer], :pointer
|
91
|
+
attach_function :ucsdet_isInputFilterEnabled, "ucsdet_isInputFilterEnabled#{suffix}", [:pointer], :bool
|
92
|
+
attach_function :ucsdet_enableInputFilter, "ucsdet_enableInputFilter#{suffix}", [:pointer, :bool], :bool
|
93
|
+
|
94
|
+
# Collation
|
95
|
+
#
|
96
|
+
# http://icu-project.org/apiref/icu4c/ucol_8h.html
|
97
|
+
#
|
98
|
+
|
99
|
+
attach_function :ucol_open, "ucol_open#{suffix}", [:string, :pointer], :pointer
|
100
|
+
attach_function :ucol_close, "ucol_close#{suffix}", [:pointer], :void
|
101
|
+
attach_function :ucol_strcoll, "ucol_strcoll#{suffix}", [:pointer, :pointer, :int32, :pointer, :int32], :int
|
102
|
+
attach_function :ucol_getKeywords, "ucol_getKeywords#{suffix}", [:pointer], :pointer
|
103
|
+
attach_function :ucol_getKeywordValues, "ucol_getKeywordValues#{suffix}", [:string, :pointer], :pointer
|
104
|
+
attach_function :ucol_getAvailable, "ucol_getAvailable#{suffix}", [:int32], :string
|
105
|
+
attach_function :ucol_countAvailable, "ucol_countAvailable#{suffix}", [], :int32
|
106
|
+
attach_function :ucol_getLocale, "ucol_getLocale#{suffix}", [:pointer, :int, :pointer], :string
|
107
|
+
attach_function :ucol_greater, "ucol_greater#{suffix}", [:pointer, :pointer, :int32, :pointer, :int32], :bool
|
108
|
+
attach_function :ucol_greaterOrEqual, "ucol_greaterOrEqual#{suffix}", [:pointer, :pointer, :int32, :pointer, :int32], :bool
|
109
|
+
attach_function :ucol_equal, "ucol_equal#{suffix}", [:pointer, :pointer, :int32, :pointer, :int32], :bool
|
110
|
+
|
111
|
+
end # Lib
|
112
|
+
end # ICU
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe ICU::CharDet::Detector do
|
6
|
+
|
7
|
+
before { @d = ICU::CharDet::Detector.new }
|
8
|
+
after { @d.close }
|
9
|
+
|
10
|
+
it "should recognize UTF-8" do
|
11
|
+
m = @d.detect("æåø")
|
12
|
+
m.name.should == "UTF-8"
|
13
|
+
m.language.should be_kind_of(String)
|
14
|
+
end
|
15
|
+
|
16
|
+
it "has a list of detectable charsets" do
|
17
|
+
cs = @d.detectable_charsets
|
18
|
+
cs.should be_kind_of(Array)
|
19
|
+
cs.should_not be_empty
|
20
|
+
|
21
|
+
cs.first.should be_kind_of(String)
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should disable / enable the input filter" do
|
25
|
+
@d.input_filter_enabled?.should be_false
|
26
|
+
@d.input_filter_enabled = true
|
27
|
+
@d.input_filter_enabled?.should be_true
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should should set declared encoding" do
|
31
|
+
@d.declared_encoding = "UTF-8"
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should detect several matching encodings" do
|
35
|
+
r = @d.detect_all("foo bar")
|
36
|
+
r.should be_instance_of(Array)
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
module ICU
|
6
|
+
module Collation
|
7
|
+
describe Collator do
|
8
|
+
|
9
|
+
before { @c = Collator.new("no") }
|
10
|
+
after { @c.close }
|
11
|
+
|
12
|
+
it "should collate an array of strings" do
|
13
|
+
@c.collate(%w[å ø æ]).should == %w[æ ø å]
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should return available locales" do
|
17
|
+
locales = ICU::Collation.available_locales
|
18
|
+
locales.should be_kind_of(Array)
|
19
|
+
locales.should_not be_empty
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should return the locale of the collator" do
|
23
|
+
l = @c.locale
|
24
|
+
l.should be_kind_of(String)
|
25
|
+
l.should == "nb"
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should compare two strings" do
|
29
|
+
@c.compare("blåbærsyltetøy", "blah").should == 1
|
30
|
+
@c.compare("blah", "blah").should == 0
|
31
|
+
@c.compare("baah", "blah").should == -1
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should know if a string is greater than another" do
|
35
|
+
@c.should be_greater("z", "a")
|
36
|
+
@c.should_not be_greater("a", "z")
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should know if a string is greater or equal to another" do
|
40
|
+
@c.should be_greater_or_equal("z", "a")
|
41
|
+
@c.should be_greater_or_equal("z", "z")
|
42
|
+
@c.should_not be_greater_or_equal("a", "z")
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should know if a string is equal to another" do
|
46
|
+
@c.should be_same("a", "a")
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
end # Collate
|
51
|
+
end # ICU
|
data/spec/spec.opts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ffi-icu
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Jari Bakken
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-05-11 00:00:00 +02:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: ffi
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 0
|
29
|
+
- 6
|
30
|
+
- 3
|
31
|
+
version: 0.6.3
|
32
|
+
type: :runtime
|
33
|
+
version_requirements: *id001
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
name: rspec
|
36
|
+
prerelease: false
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
segments:
|
42
|
+
- 1
|
43
|
+
- 3
|
44
|
+
- 0
|
45
|
+
version: 1.3.0
|
46
|
+
type: :development
|
47
|
+
version_requirements: *id002
|
48
|
+
description: Provides charset detection, locale sensitive collation and more.
|
49
|
+
email: jari.bakken@gmail.com
|
50
|
+
executables: []
|
51
|
+
|
52
|
+
extensions: []
|
53
|
+
|
54
|
+
extra_rdoc_files:
|
55
|
+
- LICENSE
|
56
|
+
- README.rdoc
|
57
|
+
files:
|
58
|
+
- .document
|
59
|
+
- .gitignore
|
60
|
+
- LICENSE
|
61
|
+
- README.rdoc
|
62
|
+
- Rakefile
|
63
|
+
- VERSION
|
64
|
+
- benchmark/detect.rb
|
65
|
+
- benchmark/shared.rb
|
66
|
+
- lib/ffi-icu.rb
|
67
|
+
- lib/ffi-icu/chardet.rb
|
68
|
+
- lib/ffi-icu/collation.rb
|
69
|
+
- lib/ffi-icu/lib.rb
|
70
|
+
- lib/ffi-icu/uchar.rb
|
71
|
+
- spec/chardet_spec.rb
|
72
|
+
- spec/collation_spec.rb
|
73
|
+
- spec/spec.opts
|
74
|
+
- spec/spec_helper.rb
|
75
|
+
has_rdoc: true
|
76
|
+
homepage: http://github.com/jarib/ffi-icu
|
77
|
+
licenses: []
|
78
|
+
|
79
|
+
post_install_message:
|
80
|
+
rdoc_options:
|
81
|
+
- --charset=UTF-8
|
82
|
+
require_paths:
|
83
|
+
- lib
|
84
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
segments:
|
89
|
+
- 0
|
90
|
+
version: "0"
|
91
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
segments:
|
96
|
+
- 0
|
97
|
+
version: "0"
|
98
|
+
requirements: []
|
99
|
+
|
100
|
+
rubyforge_project:
|
101
|
+
rubygems_version: 1.3.6
|
102
|
+
signing_key:
|
103
|
+
specification_version: 3
|
104
|
+
summary: Simple FFI wrappers for things I need from ICU.
|
105
|
+
test_files:
|
106
|
+
- spec/chardet_spec.rb
|
107
|
+
- spec/spec_helper.rb
|
108
|
+
- spec/collation_spec.rb
|