ffi-icu 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/README.rdoc +71 -0
- data/Rakefile +48 -0
- data/VERSION +1 -0
- data/benchmark/detect.rb +14 -0
- data/benchmark/shared.rb +17 -0
- data/lib/ffi-icu.rb +23 -0
- data/lib/ffi-icu/chardet.rb +90 -0
- data/lib/ffi-icu/collation.rb +78 -0
- data/lib/ffi-icu/lib.rb +112 -0
- data/lib/ffi-icu/uchar.rb +11 -0
- data/spec/chardet_spec.rb +39 -0
- data/spec/collation_spec.rb +51 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +12 -0
- metadata +108 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Jari Bakken
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
= ffi-icu
|
2
|
+
|
3
|
+
Simple FFI wrappers for things I need from ICU.
|
4
|
+
|
5
|
+
= Dependencies
|
6
|
+
|
7
|
+
ICU - you might need to hack the ffi_lib call to make it work. Please send a patch if you do!
|
8
|
+
|
9
|
+
= Features
|
10
|
+
|
11
|
+
== Character Encoding Detection
|
12
|
+
|
13
|
+
=== Examples:
|
14
|
+
|
15
|
+
match = ICU::CharDet.detect(str)
|
16
|
+
match.name # => "UTF-8"
|
17
|
+
match.confidence # => 80
|
18
|
+
|
19
|
+
or
|
20
|
+
|
21
|
+
detector = ICU::CharDet::Detector.new
|
22
|
+
detector.detect(str)
|
23
|
+
detector.close
|
24
|
+
|
25
|
+
=== Why not just use rchardet?
|
26
|
+
|
27
|
+
* this is faster
|
28
|
+
* rchardet does not work well on 1.9
|
29
|
+
* none of the rchardet forks claiming to work on 1.9 actually does
|
30
|
+
|
31
|
+
== Locale Sensitive Collation
|
32
|
+
|
33
|
+
=== Examples:
|
34
|
+
|
35
|
+
ICU::Collation.collate("nb", %w[å æ ø]) == %w[æ ø å] #=> true
|
36
|
+
|
37
|
+
or
|
38
|
+
|
39
|
+
collator = ICU::Collation::Collator.new("nb")
|
40
|
+
collator.compare("a", "b") #=> -1
|
41
|
+
collator.greater?("z", "a") #=> true
|
42
|
+
collator.collate(%w[å æ ø]) #=> ["æ", "ø", "å"]
|
43
|
+
|
44
|
+
= Tested on:
|
45
|
+
|
46
|
+
Platforms:
|
47
|
+
|
48
|
+
* OS X 10.6
|
49
|
+
* Debian Linux
|
50
|
+
* Arch Linux
|
51
|
+
|
52
|
+
Rubies:
|
53
|
+
|
54
|
+
* MRI 1.9.1
|
55
|
+
* MRI 1.8.7
|
56
|
+
|
57
|
+
YMMV.
|
58
|
+
|
59
|
+
== Note on Patches/Pull Requests
|
60
|
+
|
61
|
+
* Fork the project.
|
62
|
+
* Make your feature addition or bug fix.
|
63
|
+
* Add tests for it. This is important so I don't break it in a
|
64
|
+
future version unintentionally.
|
65
|
+
* Commit, do not mess with rakefile, version, or history.
|
66
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
67
|
+
* Send me a pull request. Bonus points for topic branches.
|
68
|
+
|
69
|
+
== Copyright
|
70
|
+
|
71
|
+
Copyright (c) 2010 Jari Bakken. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "ffi-icu"
|
8
|
+
gem.summary = %Q{Simple FFI wrappers for things I need from ICU.}
|
9
|
+
gem.description = %Q{Provides charset detection, locale sensitive collation and more.}
|
10
|
+
gem.email = "jari.bakken@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/jarib/ffi-icu"
|
12
|
+
gem.authors = ["Jari Bakken"]
|
13
|
+
|
14
|
+
gem.add_dependency "ffi", ">= 0.6.3"
|
15
|
+
gem.add_development_dependency "rspec", ">= 1.3.0"
|
16
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
17
|
+
end
|
18
|
+
|
19
|
+
Jeweler::GemcutterTasks.new
|
20
|
+
rescue LoadError
|
21
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
22
|
+
end
|
23
|
+
|
24
|
+
require 'spec/rake/spectask'
|
25
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
26
|
+
spec.libs << 'lib' << 'spec'
|
27
|
+
spec.spec_files = FileList['spec/**/*_spec.rb']
|
28
|
+
end
|
29
|
+
|
30
|
+
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
31
|
+
spec.libs << 'lib' << 'spec'
|
32
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
33
|
+
spec.rcov = true
|
34
|
+
end
|
35
|
+
|
36
|
+
task :spec => :check_dependencies
|
37
|
+
|
38
|
+
task :default => :spec
|
39
|
+
|
40
|
+
begin
|
41
|
+
require 'yard'
|
42
|
+
YARD::Rake::YardocTask.new
|
43
|
+
rescue LoadError
|
44
|
+
task :yardoc do
|
45
|
+
abort "YARD is not available. In order to run yardoc, you must: sudo gem install yard"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
data/benchmark/detect.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require "benchmark"
|
4
|
+
|
5
|
+
$LOAD_PATH.unshift "lib"
|
6
|
+
require "ffi-icu"
|
7
|
+
require "rchardet"
|
8
|
+
|
9
|
+
TESTS = 1000
|
10
|
+
|
11
|
+
Benchmark.bmbm do |results|
|
12
|
+
results.report("rchardet:") { TESTS.times { CharDet.detect("æåø") } }
|
13
|
+
results.report("ffi-icu:") { TESTS.times { ICU::CharDet.detect("æåø") } }
|
14
|
+
end
|
data/benchmark/shared.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require "benchmark"
|
4
|
+
|
5
|
+
$LOAD_PATH.unshift "lib"
|
6
|
+
require "ffi-icu"
|
7
|
+
require "rchardet"
|
8
|
+
|
9
|
+
TESTS = 1000
|
10
|
+
|
11
|
+
$rchardet = CharDet::UniversalDetector.new
|
12
|
+
$icu = ICU::CharDet::Detector.new
|
13
|
+
|
14
|
+
Benchmark.bmbm do |results|
|
15
|
+
results.report("rchardet instance:") { TESTS.times { $rchardet.reset; $rchardet.feed("æåø"); $rchardet.result } }
|
16
|
+
results.report("ffi-icu instance:") { TESTS.times { $icu.detect("æåø") } }
|
17
|
+
end
|
data/lib/ffi-icu.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require "rbconfig"
|
2
|
+
require "ffi"
|
3
|
+
|
4
|
+
module ICU
|
5
|
+
def self.platform
|
6
|
+
os = RbConfig::CONFIG["host_os"]
|
7
|
+
|
8
|
+
case os
|
9
|
+
when /darwin/
|
10
|
+
:osx
|
11
|
+
when /linux/
|
12
|
+
:linux
|
13
|
+
else
|
14
|
+
os
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
require "ffi-icu/lib"
|
20
|
+
require "ffi-icu/uchar"
|
21
|
+
require "ffi-icu/chardet"
|
22
|
+
require "ffi-icu/collation"
|
23
|
+
|
@@ -0,0 +1,90 @@
|
|
1
|
+
module ICU
|
2
|
+
module CharDet
|
3
|
+
|
4
|
+
def self.detect(string)
|
5
|
+
detector = Detector.new
|
6
|
+
res = detector.detect string
|
7
|
+
detector.close
|
8
|
+
|
9
|
+
res
|
10
|
+
end
|
11
|
+
|
12
|
+
class Detector
|
13
|
+
Match = Struct.new(:name, :confidence, :language)
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
@detector = Lib.check_error { |ptr| Lib.ucsdet_open(ptr) }
|
17
|
+
end
|
18
|
+
|
19
|
+
def input_filter_enabled?
|
20
|
+
Lib.ucsdet_isInputFilterEnabled @detector
|
21
|
+
end
|
22
|
+
|
23
|
+
def input_filter_enabled=(bool)
|
24
|
+
Lib.ucsdet_enableInputFilter(@detector, !!bool)
|
25
|
+
end
|
26
|
+
|
27
|
+
def declared_encoding=(str)
|
28
|
+
Lib.check_error do |ptr|
|
29
|
+
Lib.ucsdet_setDeclaredEncoding(@detector, str, str.bytesize, ptr)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def close
|
34
|
+
Lib.ucsdet_close @detector
|
35
|
+
end
|
36
|
+
|
37
|
+
def detect(str)
|
38
|
+
set_text(str)
|
39
|
+
|
40
|
+
match_ptr = Lib.check_error { |ptr| Lib.ucsdet_detect(@detector, ptr) }
|
41
|
+
match_ptr_to_ruby(match_ptr) unless match_ptr.null?
|
42
|
+
end
|
43
|
+
|
44
|
+
def detect_all(str)
|
45
|
+
set_text(str)
|
46
|
+
|
47
|
+
matches_found_ptr = FFI::MemoryPointer.new :int32
|
48
|
+
array_ptr = Lib.check_error do |status|
|
49
|
+
Lib.ucsdet_detectAll(@detector, matches_found_ptr, status)
|
50
|
+
end
|
51
|
+
|
52
|
+
length = matches_found_ptr.read_int
|
53
|
+
array_ptr.read_array_of_pointer(length).map do |match|
|
54
|
+
match_ptr_to_ruby(match)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def detectable_charsets
|
59
|
+
enum_ptr = Lib.check_error do |ptr|
|
60
|
+
Lib.ucsdet_getAllDetectableCharsets(@detector, ptr)
|
61
|
+
end
|
62
|
+
|
63
|
+
result = Lib.enum_ptr_to_array(enum_ptr)
|
64
|
+
Lib.uenum_close(enum_ptr)
|
65
|
+
|
66
|
+
result
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
def match_ptr_to_ruby(match_ptr)
|
72
|
+
result = Match.new
|
73
|
+
|
74
|
+
result.name = Lib.check_error { |ptr| Lib.ucsdet_getName(match_ptr, ptr) }
|
75
|
+
result.confidence = Lib.check_error { |ptr| Lib.ucsdet_getConfidence(match_ptr, ptr) }
|
76
|
+
result.language = Lib.check_error { |ptr| Lib.ucsdet_getLanguage(match_ptr, ptr) }
|
77
|
+
|
78
|
+
result
|
79
|
+
end
|
80
|
+
|
81
|
+
def set_text(text)
|
82
|
+
Lib.check_error do |status|
|
83
|
+
Lib.ucsdet_setText(@detector, text, text.bytesize, status)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
end # Detector
|
88
|
+
end # CharDet
|
89
|
+
end # ICU
|
90
|
+
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module ICU
|
2
|
+
module Collation
|
3
|
+
|
4
|
+
def self.collate(locale, arr)
|
5
|
+
collator = Collator.new(locale)
|
6
|
+
res = collator.collate(arr)
|
7
|
+
collator.close
|
8
|
+
|
9
|
+
res
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.keywords
|
13
|
+
enum_ptr = Lib.check_error { |error| Lib.ucol_getKeywords(error) }
|
14
|
+
keywords = Lib.enum_ptr_to_array(enum_ptr)
|
15
|
+
Lib.uenum_close enum_ptr
|
16
|
+
|
17
|
+
hash = {}
|
18
|
+
keywords.each do |keyword|
|
19
|
+
enum_ptr = Lib.check_error { |error| Lib.ucol_getKeywordValues(keyword, error) }
|
20
|
+
hash[keyword] = Lib.enum_ptr_to_array(enum_ptr)
|
21
|
+
Lib.uenum_close(enum_ptr)
|
22
|
+
end
|
23
|
+
|
24
|
+
hash
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.available_locales
|
28
|
+
(0...Lib.ucol_countAvailable).map do |idx|
|
29
|
+
Lib.ucol_getAvailable idx
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
class Collator
|
34
|
+
ULOC_VALID_LOCALE = 1
|
35
|
+
|
36
|
+
def initialize(locale)
|
37
|
+
@c = Lib.check_error { |error| Lib.ucol_open(locale, error) }
|
38
|
+
end
|
39
|
+
|
40
|
+
def locale
|
41
|
+
Lib.check_error { |error| Lib.ucol_getLocale(@c, ULOC_VALID_LOCALE, error) }
|
42
|
+
end
|
43
|
+
|
44
|
+
def compare(a, b)
|
45
|
+
Lib.ucol_strcoll(
|
46
|
+
@c,
|
47
|
+
UCharPointer.from_string(a), a.length,
|
48
|
+
UCharPointer.from_string(b), b.length
|
49
|
+
)
|
50
|
+
end
|
51
|
+
|
52
|
+
def greater?(a, b)
|
53
|
+
Lib.ucol_greater(@c, UCharPointer.from_string(a), a.length,
|
54
|
+
UCharPointer.from_string(b), b.length)
|
55
|
+
end
|
56
|
+
|
57
|
+
def greater_or_equal?(a, b)
|
58
|
+
Lib.ucol_greaterOrEqual(@c, UCharPointer.from_string(a), a.length,
|
59
|
+
UCharPointer.from_string(b), b.length)
|
60
|
+
end
|
61
|
+
|
62
|
+
# can't override Object#equal? - suggestions welcome
|
63
|
+
def same?(a, b)
|
64
|
+
Lib.ucol_equal(@c, UCharPointer.from_string(a), a.length,
|
65
|
+
UCharPointer.from_string(b), b.length)
|
66
|
+
end
|
67
|
+
|
68
|
+
def collate(array)
|
69
|
+
array.sort { |a,b| compare a, b }
|
70
|
+
end
|
71
|
+
|
72
|
+
def close
|
73
|
+
Lib.ucol_close(@c)
|
74
|
+
end
|
75
|
+
end # Collator
|
76
|
+
|
77
|
+
end # Collate
|
78
|
+
end # ICU
|
data/lib/ffi-icu/lib.rb
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
module ICU
|
2
|
+
class Error < StandardError
|
3
|
+
end
|
4
|
+
|
5
|
+
module Lib
|
6
|
+
extend FFI::Library
|
7
|
+
|
8
|
+
VERSIONS = {
|
9
|
+
"42" => "_4_2",
|
10
|
+
"44" => "_44"
|
11
|
+
}
|
12
|
+
|
13
|
+
# FIXME: this is incredibly ugly, figure out some better way
|
14
|
+
def self.find_icu
|
15
|
+
suffix = ''
|
16
|
+
|
17
|
+
case ICU.platform
|
18
|
+
when :osx
|
19
|
+
ffi_lib "icucore"
|
20
|
+
when :linux
|
21
|
+
versions = VERSIONS.keys
|
22
|
+
libs = ffi_lib versions.map { |v| "libicui18n.so.#{v}"},
|
23
|
+
versions.map { |v| "libicutu.so.#{v}"}
|
24
|
+
|
25
|
+
VERSIONS.find do |so_version, func_version|
|
26
|
+
if libs.first.name =~ /#{so_version}$/
|
27
|
+
suffix = func_version
|
28
|
+
end
|
29
|
+
end
|
30
|
+
else
|
31
|
+
raise "no idea how to load ICU on #{ICU.platform}, patches appreciated!"
|
32
|
+
end
|
33
|
+
|
34
|
+
suffix
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.check_error
|
38
|
+
ptr = FFI::MemoryPointer.new(:int)
|
39
|
+
ret = yield(ptr)
|
40
|
+
error_code = ptr.read_int
|
41
|
+
|
42
|
+
if error_code > 0
|
43
|
+
raise Error, "#{Lib.u_errorName error_code}"
|
44
|
+
elsif error_code < 0
|
45
|
+
warn "ffi-icu: #{Lib.u_errorName error_code}"
|
46
|
+
end
|
47
|
+
|
48
|
+
ret
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.enum_ptr_to_array(enum_ptr)
|
52
|
+
length = Lib.check_error do |status|
|
53
|
+
Lib.uenum_count(enum_ptr, status)
|
54
|
+
end
|
55
|
+
|
56
|
+
(0...length).map do |idx|
|
57
|
+
Lib.check_error { |st| Lib.uenum_next(enum_ptr, nil, st) }
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.not_available(func_name)
|
62
|
+
self.class.send :define_method, func_name do |*args|
|
63
|
+
raise Error, "#{func_name} not available on platform #{ICU.platform.inspect}"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
suffix = find_icu()
|
69
|
+
|
70
|
+
attach_function :u_errorName, "u_errorName#{suffix}", [:int], :string
|
71
|
+
attach_function :uenum_count, "uenum_count#{suffix}", [:pointer, :pointer], :int
|
72
|
+
attach_function :uenum_close, "uenum_close#{suffix}", [:pointer], :void
|
73
|
+
attach_function :uenum_next, "uenum_next#{suffix}", [:pointer, :pointer, :pointer], :string
|
74
|
+
|
75
|
+
|
76
|
+
# CharDet
|
77
|
+
#
|
78
|
+
# http://icu-project.org/apiref/icu4c/ucsdet_8h.html
|
79
|
+
#
|
80
|
+
|
81
|
+
attach_function :ucsdet_open, "ucsdet_open#{suffix}", [:pointer], :pointer
|
82
|
+
attach_function :ucsdet_close, "ucsdet_close#{suffix}", [:pointer], :void
|
83
|
+
attach_function :ucsdet_setText, "ucsdet_setText#{suffix}", [:pointer, :string, :int32, :pointer], :void
|
84
|
+
attach_function :ucsdet_setDeclaredEncoding, "ucsdet_setDeclaredEncoding#{suffix}", [:pointer, :string, :int32, :pointer], :void
|
85
|
+
attach_function :ucsdet_detect, "ucsdet_detect#{suffix}", [:pointer, :pointer], :pointer
|
86
|
+
attach_function :ucsdet_detectAll, "ucsdet_detectAll#{suffix}", [:pointer, :pointer, :pointer], :pointer
|
87
|
+
attach_function :ucsdet_getName, "ucsdet_getName#{suffix}", [:pointer, :pointer], :string
|
88
|
+
attach_function :ucsdet_getConfidence, "ucsdet_getConfidence#{suffix}", [:pointer, :pointer], :int32
|
89
|
+
attach_function :ucsdet_getLanguage, "ucsdet_getLanguage#{suffix}", [:pointer, :pointer], :string
|
90
|
+
attach_function :ucsdet_getAllDetectableCharsets, "ucsdet_getAllDetectableCharsets#{suffix}", [:pointer, :pointer], :pointer
|
91
|
+
attach_function :ucsdet_isInputFilterEnabled, "ucsdet_isInputFilterEnabled#{suffix}", [:pointer], :bool
|
92
|
+
attach_function :ucsdet_enableInputFilter, "ucsdet_enableInputFilter#{suffix}", [:pointer, :bool], :bool
|
93
|
+
|
94
|
+
# Collation
|
95
|
+
#
|
96
|
+
# http://icu-project.org/apiref/icu4c/ucol_8h.html
|
97
|
+
#
|
98
|
+
|
99
|
+
attach_function :ucol_open, "ucol_open#{suffix}", [:string, :pointer], :pointer
|
100
|
+
attach_function :ucol_close, "ucol_close#{suffix}", [:pointer], :void
|
101
|
+
attach_function :ucol_strcoll, "ucol_strcoll#{suffix}", [:pointer, :pointer, :int32, :pointer, :int32], :int
|
102
|
+
attach_function :ucol_getKeywords, "ucol_getKeywords#{suffix}", [:pointer], :pointer
|
103
|
+
attach_function :ucol_getKeywordValues, "ucol_getKeywordValues#{suffix}", [:string, :pointer], :pointer
|
104
|
+
attach_function :ucol_getAvailable, "ucol_getAvailable#{suffix}", [:int32], :string
|
105
|
+
attach_function :ucol_countAvailable, "ucol_countAvailable#{suffix}", [], :int32
|
106
|
+
attach_function :ucol_getLocale, "ucol_getLocale#{suffix}", [:pointer, :int, :pointer], :string
|
107
|
+
attach_function :ucol_greater, "ucol_greater#{suffix}", [:pointer, :pointer, :int32, :pointer, :int32], :bool
|
108
|
+
attach_function :ucol_greaterOrEqual, "ucol_greaterOrEqual#{suffix}", [:pointer, :pointer, :int32, :pointer, :int32], :bool
|
109
|
+
attach_function :ucol_equal, "ucol_equal#{suffix}", [:pointer, :pointer, :int32, :pointer, :int32], :bool
|
110
|
+
|
111
|
+
end # Lib
|
112
|
+
end # ICU
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe ICU::CharDet::Detector do
|
6
|
+
|
7
|
+
before { @d = ICU::CharDet::Detector.new }
|
8
|
+
after { @d.close }
|
9
|
+
|
10
|
+
it "should recognize UTF-8" do
|
11
|
+
m = @d.detect("æåø")
|
12
|
+
m.name.should == "UTF-8"
|
13
|
+
m.language.should be_kind_of(String)
|
14
|
+
end
|
15
|
+
|
16
|
+
it "has a list of detectable charsets" do
|
17
|
+
cs = @d.detectable_charsets
|
18
|
+
cs.should be_kind_of(Array)
|
19
|
+
cs.should_not be_empty
|
20
|
+
|
21
|
+
cs.first.should be_kind_of(String)
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should disable / enable the input filter" do
|
25
|
+
@d.input_filter_enabled?.should be_false
|
26
|
+
@d.input_filter_enabled = true
|
27
|
+
@d.input_filter_enabled?.should be_true
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should should set declared encoding" do
|
31
|
+
@d.declared_encoding = "UTF-8"
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should detect several matching encodings" do
|
35
|
+
r = @d.detect_all("foo bar")
|
36
|
+
r.should be_instance_of(Array)
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
module ICU
|
6
|
+
module Collation
|
7
|
+
describe Collator do
|
8
|
+
|
9
|
+
before { @c = Collator.new("no") }
|
10
|
+
after { @c.close }
|
11
|
+
|
12
|
+
it "should collate an array of strings" do
|
13
|
+
@c.collate(%w[å ø æ]).should == %w[æ ø å]
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should return available locales" do
|
17
|
+
locales = ICU::Collation.available_locales
|
18
|
+
locales.should be_kind_of(Array)
|
19
|
+
locales.should_not be_empty
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should return the locale of the collator" do
|
23
|
+
l = @c.locale
|
24
|
+
l.should be_kind_of(String)
|
25
|
+
l.should == "nb"
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should compare two strings" do
|
29
|
+
@c.compare("blåbærsyltetøy", "blah").should == 1
|
30
|
+
@c.compare("blah", "blah").should == 0
|
31
|
+
@c.compare("baah", "blah").should == -1
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should know if a string is greater than another" do
|
35
|
+
@c.should be_greater("z", "a")
|
36
|
+
@c.should_not be_greater("a", "z")
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should know if a string is greater or equal to another" do
|
40
|
+
@c.should be_greater_or_equal("z", "a")
|
41
|
+
@c.should be_greater_or_equal("z", "z")
|
42
|
+
@c.should_not be_greater_or_equal("a", "z")
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should know if a string is equal to another" do
|
46
|
+
@c.should be_same("a", "a")
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
end # Collate
|
51
|
+
end # ICU
|
data/spec/spec.opts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ffi-icu
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Jari Bakken
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-05-11 00:00:00 +02:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: ffi
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 0
|
29
|
+
- 6
|
30
|
+
- 3
|
31
|
+
version: 0.6.3
|
32
|
+
type: :runtime
|
33
|
+
version_requirements: *id001
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
name: rspec
|
36
|
+
prerelease: false
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
segments:
|
42
|
+
- 1
|
43
|
+
- 3
|
44
|
+
- 0
|
45
|
+
version: 1.3.0
|
46
|
+
type: :development
|
47
|
+
version_requirements: *id002
|
48
|
+
description: Provides charset detection, locale sensitive collation and more.
|
49
|
+
email: jari.bakken@gmail.com
|
50
|
+
executables: []
|
51
|
+
|
52
|
+
extensions: []
|
53
|
+
|
54
|
+
extra_rdoc_files:
|
55
|
+
- LICENSE
|
56
|
+
- README.rdoc
|
57
|
+
files:
|
58
|
+
- .document
|
59
|
+
- .gitignore
|
60
|
+
- LICENSE
|
61
|
+
- README.rdoc
|
62
|
+
- Rakefile
|
63
|
+
- VERSION
|
64
|
+
- benchmark/detect.rb
|
65
|
+
- benchmark/shared.rb
|
66
|
+
- lib/ffi-icu.rb
|
67
|
+
- lib/ffi-icu/chardet.rb
|
68
|
+
- lib/ffi-icu/collation.rb
|
69
|
+
- lib/ffi-icu/lib.rb
|
70
|
+
- lib/ffi-icu/uchar.rb
|
71
|
+
- spec/chardet_spec.rb
|
72
|
+
- spec/collation_spec.rb
|
73
|
+
- spec/spec.opts
|
74
|
+
- spec/spec_helper.rb
|
75
|
+
has_rdoc: true
|
76
|
+
homepage: http://github.com/jarib/ffi-icu
|
77
|
+
licenses: []
|
78
|
+
|
79
|
+
post_install_message:
|
80
|
+
rdoc_options:
|
81
|
+
- --charset=UTF-8
|
82
|
+
require_paths:
|
83
|
+
- lib
|
84
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
segments:
|
89
|
+
- 0
|
90
|
+
version: "0"
|
91
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
segments:
|
96
|
+
- 0
|
97
|
+
version: "0"
|
98
|
+
requirements: []
|
99
|
+
|
100
|
+
rubyforge_project:
|
101
|
+
rubygems_version: 1.3.6
|
102
|
+
signing_key:
|
103
|
+
specification_version: 3
|
104
|
+
summary: Simple FFI wrappers for things I need from ICU.
|
105
|
+
test_files:
|
106
|
+
- spec/chardet_spec.rb
|
107
|
+
- spec/spec_helper.rb
|
108
|
+
- spec/collation_spec.rb
|