ffi-icu 0.1.9 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.rspec +2 -0
- data/.travis.yml +17 -4
- data/README.md +22 -15
- data/ffi-icu.gemspec +4 -4
- data/lib/ffi-icu.rb +0 -9
- data/lib/ffi-icu/lib.rb +16 -7
- data/lib/ffi-icu/locale.rb +4 -4
- data/lib/ffi-icu/time_formatting.rb +45 -7
- data/lib/ffi-icu/version.rb +1 -1
- data/spec/break_iterator_spec.rb +20 -19
- data/spec/chardet_spec.rb +10 -12
- data/spec/collation_spec.rb +19 -22
- data/spec/lib/version_info_spec.rb +11 -6
- data/spec/lib_spec.rb +11 -11
- data/spec/locale_spec.rb +105 -80
- data/spec/normalization_spec.rb +2 -4
- data/spec/normalizer_spec.rb +24 -26
- data/spec/number_formatting_spec.rb +28 -25
- data/spec/time_spec.rb +48 -38
- data/spec/transliteration_spec.rb +6 -7
- data/spec/uchar_spec.rb +8 -10
- metadata +27 -28
- data/spec/spec.opts +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 2066f9d1a113f07fb761d2c97f742e6cad8dde11b26c0e4d1da2ed8a85567297
|
4
|
+
data.tar.gz: e6df31c42fc3e518d8d19a98dec0b5aeed37ae90da3fd7570f32ef58397d531d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bf39178c2893939c57a8e7810d407da635ceff9b4cc5ddbe60360df53a0bf7f35b17d8f4036cf001d85edf4fa83efd8f4ed1b91d6cdb21e54d52810f536bf7ce
|
7
|
+
data.tar.gz: ee8c887487e3c3a0c0511e7017753b4801da3fc6a73e4d476211452026fd0ae3d8ae0d4c35be9eb5ed8827df8b757bc985dc5f5eafc77a12c872ca37347e9924
|
data/.rspec
ADDED
data/.travis.yml
CHANGED
@@ -1,7 +1,20 @@
|
|
1
|
+
language: ruby
|
2
|
+
os: linux
|
3
|
+
dist: focal
|
4
|
+
|
5
|
+
arch:
|
6
|
+
- amd64
|
7
|
+
- arm64
|
8
|
+
|
1
9
|
rvm:
|
2
|
-
-
|
3
|
-
- 2.
|
4
|
-
-
|
10
|
+
- 2.6
|
11
|
+
- 2.7
|
12
|
+
- 3.0
|
5
13
|
- ruby-head
|
14
|
+
|
6
15
|
before_script:
|
7
|
-
- sudo apt-get install -y
|
16
|
+
- sudo apt-get install -y libicu-dev
|
17
|
+
|
18
|
+
jobs:
|
19
|
+
allow_failures:
|
20
|
+
- arch: arm64
|
data/README.md
CHANGED
@@ -1,9 +1,7 @@
|
|
1
|
-
ffi-icu
|
1
|
+
ffi-icu [](https://travis-ci.org/erickguan/ffi-icu)
|
2
2
|
=======
|
3
3
|
|
4
|
-
Simple FFI wrappers for
|
5
|
-
|
6
|
-
[](http://travis-ci.org/jarib/ffi-icu)
|
4
|
+
Simple FFI wrappers for ICU. Checkout the renovated [ICU gem](https://github.com/fantasticfears/icu4r) instead which supports various of encoding and distributed with packaged source. FFI-ICU needs some love with ICU gem's transcoding method.
|
7
5
|
|
8
6
|
Gem
|
9
7
|
---
|
@@ -18,7 +16,7 @@ Dependencies
|
|
18
16
|
ICU.
|
19
17
|
|
20
18
|
If you get messages that the library or functions are not found, you can
|
21
|
-
set some environment
|
19
|
+
set some environment variables to tell ffi-icu where to find it, e.g.:
|
22
20
|
|
23
21
|
$ export FFI_ICU_LIB="icui18n.so"
|
24
22
|
$ export FFI_ICU_VERSION_SUFFIX="_3_8"
|
@@ -49,7 +47,6 @@ or
|
|
49
47
|
Why not just use rchardet?
|
50
48
|
|
51
49
|
* speed
|
52
|
-
* 1.9 support
|
53
50
|
|
54
51
|
Locale Sensitive Collation
|
55
52
|
--------------------------
|
@@ -112,14 +109,24 @@ Examples:
|
|
112
109
|
f #=> "12.11.15 15:21"
|
113
110
|
|
114
111
|
# reusable formatting objects
|
115
|
-
|
116
|
-
|
112
|
+
formatter = ICU::TimeFormatting.create(:locale => 'cs_CZ', :zone => 'Europe/Prague', :date => :long, :time => :none)
|
113
|
+
formatter.format(Time.now) #=> "25. února 2015"
|
117
114
|
```
|
118
115
|
|
119
116
|
```ruby
|
120
117
|
# reusable formatting objects
|
121
|
-
|
122
|
-
|
118
|
+
formatter = ICU::TimeFormatting.create(:locale => 'cs_CZ', :zone => 'Europe/Prague', :date => :long, :time => :none)
|
119
|
+
formatter.parse("25. února 2015") #=> Wed Feb 25 00:00:00 +0100 2015
|
120
|
+
```
|
121
|
+
|
122
|
+
For skeleton formatting, visit the [Unicode date field symbol table](https://unicode-org.github.io/icu/userguide/format_parse/datetime/#date-field-symbol-table) page to help find the pattern characters to use.
|
123
|
+
|
124
|
+
```ruby
|
125
|
+
formatter = ICU::TimeFormatting.create(:locale => 'cs_CZ', :date => :pattern, :time => :pattern, :skeleton => 'MMMMY')
|
126
|
+
formatter.format(Time.now) #=> "únor 2015"
|
127
|
+
|
128
|
+
formatter = ICU::TimeFormatting.create(:locale => 'cs_CZ', :date => :pattern, :time => :pattern, :skeleton => 'Y')
|
129
|
+
formatter.format(Time.now) #=> "2015"
|
123
130
|
```
|
124
131
|
|
125
132
|
Tested on:
|
@@ -132,15 +139,15 @@ Platforms:
|
|
132
139
|
|
133
140
|
Rubies:
|
134
141
|
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
142
|
+
- 2.5
|
143
|
+
- 2.6
|
144
|
+
- 2.7
|
145
|
+
- ruby-head
|
139
146
|
|
140
147
|
TODO:
|
141
148
|
=====
|
142
149
|
|
143
|
-
* Any other useful part of ICU?
|
150
|
+
* Any other useful part of ICU?
|
144
151
|
* Windows?!
|
145
152
|
|
146
153
|
Note on Patches/Pull Requests
|
data/ffi-icu.gemspec
CHANGED
@@ -6,7 +6,8 @@ Gem::Specification.new do |s|
|
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Jari Bakken"]
|
9
|
-
s.date = %q{
|
9
|
+
s.date = %q{2019-10-15}
|
10
|
+
s.licenses = ['MIT']
|
10
11
|
s.description = %q{Provides charset detection, locale sensitive collation and more. Depends on libicu.}
|
11
12
|
s.email = %q{jari.bakken@gmail.com}
|
12
13
|
s.extra_rdoc_files = ["LICENSE", "README.md"]
|
@@ -20,7 +21,6 @@ Gem::Specification.new do |s|
|
|
20
21
|
s.summary = %q{Simple Ruby FFI wrappers for things I need from ICU.}
|
21
22
|
|
22
23
|
s.add_runtime_dependency "ffi", "~> 1.0", ">= 1.0.9"
|
23
|
-
s.add_development_dependency
|
24
|
-
s.add_development_dependency "rake", ["
|
24
|
+
s.add_development_dependency 'rspec', '~> 3.9'
|
25
|
+
s.add_development_dependency "rake", [">= 12.3.3"]
|
25
26
|
end
|
26
|
-
|
data/lib/ffi-icu.rb
CHANGED
data/lib/ffi-icu/lib.rb
CHANGED
@@ -39,7 +39,12 @@ module ICU
|
|
39
39
|
[find_lib("libicui18n.#{FFI::Platform::LIBSUFFIX}.??"),
|
40
40
|
find_lib("libicutu.#{FFI::Platform::LIBSUFFIX}.??")]
|
41
41
|
when :osx
|
42
|
-
|
42
|
+
# See https://developer.apple.com/documentation/macos-release-notes/macos-big-sur-11_0_1-release-notes (62986286)
|
43
|
+
if Gem::Version.new(`sw_vers -productVersion`) >= Gem::Version.new('11')
|
44
|
+
["libicucore.#{FFI::Platform::LIBSUFFIX}"]
|
45
|
+
else
|
46
|
+
[find_lib("libicucore.#{FFI::Platform::LIBSUFFIX}")]
|
47
|
+
end
|
43
48
|
when :linux
|
44
49
|
[find_lib("libicui18n.#{FFI::Platform::LIBSUFFIX}.??"),
|
45
50
|
find_lib("libicutu.#{FFI::Platform::LIBSUFFIX}.??")]
|
@@ -99,7 +104,7 @@ module ICU
|
|
99
104
|
# Here are the possible suffixes
|
100
105
|
suffixes = [""]
|
101
106
|
if version
|
102
|
-
suffixes << "_#{version}" << "_#{version[0].chr}_#{version[1].chr}"
|
107
|
+
suffixes << "_#{version}" << "_#{version[0].chr}_#{version[1].chr}" << "_#{version.split('.')[0]}"
|
103
108
|
end
|
104
109
|
|
105
110
|
# Try to find the u_errorName function using the possible suffixes
|
@@ -420,11 +425,12 @@ module ICU
|
|
420
425
|
attach_function :unum_set_attribute, "unum_setAttribute#{suffix}", [:pointer, :number_format_attribute, :int32_t], :void
|
421
426
|
# date
|
422
427
|
enum :date_format_style, [
|
423
|
-
:
|
424
|
-
:
|
425
|
-
:
|
426
|
-
:
|
427
|
-
:
|
428
|
+
:pattern, -2,
|
429
|
+
:none, -1,
|
430
|
+
:full, 0,
|
431
|
+
:long, 1,
|
432
|
+
:medium, 2,
|
433
|
+
:short, 3,
|
428
434
|
]
|
429
435
|
attach_function :udat_open, "udat_open#{suffix}", [:date_format_style, :date_format_style, :string, :pointer, :int32_t, :pointer, :int32_t, :pointer ], :pointer
|
430
436
|
attach_function :udat_close, "unum_close#{suffix}", [:pointer], :void
|
@@ -432,6 +438,9 @@ module ICU
|
|
432
438
|
attach_function :udat_parse, "udat_parse#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :pointer], :double
|
433
439
|
attach_function :udat_toPattern, "udat_toPattern#{suffix}", [:pointer, :bool , :pointer, :int32_t , :pointer], :int32_t
|
434
440
|
attach_function :udat_applyPattern, "udat_applyPattern#{suffix}", [:pointer, :bool , :pointer, :int32_t ], :void
|
441
|
+
# skeleton pattern
|
442
|
+
attach_function :udatpg_open, "udatpg_open#{suffix}", [:string, :pointer], :pointer
|
443
|
+
attach_function :udatpg_getBestPattern, "udatpg_getBestPattern#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :int32_t, :pointer], :int32_t
|
435
444
|
# tz
|
436
445
|
attach_function :ucal_setDefaultTimeZone, "ucal_setDefaultTimeZone#{suffix}", [:pointer, :pointer], :int32_t
|
437
446
|
attach_function :ucal_getDefaultTimeZone, "ucal_getDefaultTimeZone#{suffix}", [:pointer, :int32_t, :pointer], :int32_t
|
data/lib/ffi-icu/locale.rb
CHANGED
@@ -75,7 +75,7 @@ module ICU
|
|
75
75
|
def display_country(locale = nil)
|
76
76
|
locale = locale.to_s unless locale.nil?
|
77
77
|
|
78
|
-
Lib::Util.read_uchar_buffer(
|
78
|
+
Lib::Util.read_uchar_buffer(256) do |buffer, status|
|
79
79
|
Lib.uloc_getDisplayCountry(@id, locale, buffer, buffer.size, status)
|
80
80
|
end
|
81
81
|
end
|
@@ -83,7 +83,7 @@ module ICU
|
|
83
83
|
def display_language(locale = nil)
|
84
84
|
locale = locale.to_s unless locale.nil?
|
85
85
|
|
86
|
-
Lib::Util.read_uchar_buffer(
|
86
|
+
Lib::Util.read_uchar_buffer(192) do |buffer, status|
|
87
87
|
Lib.uloc_getDisplayLanguage(@id, locale, buffer, buffer.size, status)
|
88
88
|
end
|
89
89
|
end
|
@@ -91,7 +91,7 @@ module ICU
|
|
91
91
|
def display_name(locale = nil)
|
92
92
|
locale = locale.to_s unless locale.nil?
|
93
93
|
|
94
|
-
Lib::Util.read_uchar_buffer(
|
94
|
+
Lib::Util.read_uchar_buffer(256) do |buffer, status|
|
95
95
|
Lib.uloc_getDisplayName(@id, locale, buffer, buffer.size, status)
|
96
96
|
end
|
97
97
|
end
|
@@ -99,7 +99,7 @@ module ICU
|
|
99
99
|
def display_script(locale = nil)
|
100
100
|
locale = locale.to_s unless locale.nil?
|
101
101
|
|
102
|
-
Lib::Util.read_uchar_buffer(
|
102
|
+
Lib::Util.read_uchar_buffer(128) do |buffer, status|
|
103
103
|
Lib.uloc_getDisplayScript(@id, locale, buffer, buffer.size, status)
|
104
104
|
end
|
105
105
|
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'date'
|
1
2
|
|
2
3
|
module ICU
|
3
4
|
module TimeFormatting
|
@@ -64,21 +65,31 @@ module ICU
|
|
64
65
|
|
65
66
|
private
|
66
67
|
|
67
|
-
def make_formatter(time_style, date_style, locale, time_zone_str)
|
68
|
-
time_zone
|
69
|
-
|
68
|
+
def make_formatter(time_style, date_style, locale, time_zone_str, skeleton)
|
69
|
+
time_zone = nil
|
70
|
+
tz_len = 0
|
71
|
+
pattern_len = -1
|
72
|
+
pattern_ptr = FFI::MemoryPointer.new(4)
|
73
|
+
|
70
74
|
if time_zone_str
|
71
75
|
time_zone = UCharPointer.from_string(time_zone_str)
|
72
|
-
|
76
|
+
tz_len = time_zone_str.size
|
73
77
|
else
|
74
78
|
Lib.check_error { | error|
|
75
79
|
i_len = 150
|
76
80
|
time_zone = UCharPointer.new(i_len)
|
77
|
-
|
81
|
+
tz_len = Lib.ucal_getDefaultTimeZone(time_zone, i_len, error)
|
78
82
|
}
|
79
83
|
end
|
80
84
|
|
81
|
-
|
85
|
+
if skeleton
|
86
|
+
date_style = :pattern
|
87
|
+
time_style = :pattern
|
88
|
+
|
89
|
+
pattern_len, pattern_ptr = skeleton_format(skeleton, locale)
|
90
|
+
end
|
91
|
+
|
92
|
+
ptr = Lib.check_error { | error| Lib.udat_open(time_style, date_style, locale, time_zone, tz_len, pattern_ptr, pattern_len, error) }
|
82
93
|
FFI::AutoPointer.new(ptr, Lib.method(:udat_close))
|
83
94
|
end
|
84
95
|
end
|
@@ -90,7 +101,9 @@ module ICU
|
|
90
101
|
locale = options[:locale] || 'C'
|
91
102
|
tz_style = options[:tz_style]
|
92
103
|
time_zone = options[:zone]
|
93
|
-
|
104
|
+
skeleton = options[:skeleton]
|
105
|
+
|
106
|
+
@f = make_formatter(time_style, date_style, locale, time_zone, skeleton)
|
94
107
|
if tz_style
|
95
108
|
f0 = date_format(true)
|
96
109
|
f1 = update_tz_format(f0, tz_style)
|
@@ -176,6 +189,31 @@ module ICU
|
|
176
189
|
needed_length = Lib.udat_applyPattern(@f, localized, pattern, pattern_len)
|
177
190
|
end
|
178
191
|
end
|
192
|
+
|
193
|
+
def skeleton_format(pattern, locale)
|
194
|
+
pattern = UCharPointer.from_string(pattern)
|
195
|
+
|
196
|
+
needed_length = 0
|
197
|
+
pattern_ptr = UCharPointer.new(needed_length)
|
198
|
+
|
199
|
+
udatpg_ptr = Lib.check_error { |error| Lib.udatpg_open(locale, error) }
|
200
|
+
generator = FFI::AutoPointer.new(udatpg_ptr, Lib.method(:udat_close))
|
201
|
+
|
202
|
+
retried = false
|
203
|
+
|
204
|
+
begin
|
205
|
+
Lib.check_error do |error|
|
206
|
+
needed_length = Lib.udatpg_getBestPattern(generator, pattern, pattern.size, pattern_ptr, needed_length, error)
|
207
|
+
end
|
208
|
+
|
209
|
+
return needed_length, pattern_ptr
|
210
|
+
rescue BufferOverflowError
|
211
|
+
raise BufferOverflowError, "needed: #{needed_length}" if retried
|
212
|
+
pattern_ptr = pattern_ptr.resized_to needed_length
|
213
|
+
retried = true
|
214
|
+
retry
|
215
|
+
end
|
216
|
+
end
|
179
217
|
end # DateTimeFormatter
|
180
218
|
end # Formatting
|
181
219
|
end # ICU
|
data/lib/ffi-icu/version.rb
CHANGED
data/spec/break_iterator_spec.rb
CHANGED
@@ -1,75 +1,76 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
require "spec_helper"
|
4
|
-
|
5
3
|
module ICU
|
6
4
|
describe BreakIterator do
|
7
5
|
|
8
6
|
it "should return available locales" do
|
9
7
|
locales = ICU::BreakIterator.available_locales
|
10
|
-
locales.
|
11
|
-
locales.
|
12
|
-
locales.
|
8
|
+
expect(locales).to be_an(Array)
|
9
|
+
expect(locales).to_not be_empty
|
10
|
+
expect(locales).to include("en_US")
|
13
11
|
end
|
14
12
|
|
15
13
|
it "finds all word boundaries in an English string" do
|
16
14
|
iterator = BreakIterator.new :word, "en_US"
|
17
15
|
iterator.text = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
|
18
|
-
iterator.to_a.
|
16
|
+
expect(iterator.to_a).to eq(
|
17
|
+
[0, 5, 6, 11, 12, 17, 18, 21, 22, 26, 27, 28, 39, 40, 51, 52, 56, 57, 58, 61, 62, 64, 65, 72, 73, 79, 80, 90, 91, 93, 94, 100, 101, 103, 104, 110, 111, 116, 117, 123, 124]
|
18
|
+
)
|
19
19
|
end
|
20
20
|
|
21
21
|
it "returns each substring" do
|
22
22
|
iterator = BreakIterator.new :word, "en_US"
|
23
23
|
iterator.text = "Lorem ipsum dolor sit amet."
|
24
24
|
|
25
|
-
iterator.substrings.
|
25
|
+
expect(iterator.substrings).to eq(["Lorem", " ", "ipsum", " ", "dolor", " ", "sit", " ", "amet", "."])
|
26
26
|
end
|
27
27
|
|
28
28
|
it "returns the substrings of a non-ASCII string" do
|
29
29
|
iterator = BreakIterator.new :word, "th_TH"
|
30
30
|
iterator.text = "รู้อะไรไม่สู้รู้วิชา รู้รักษาตัวรอดเป็นยอดดี"
|
31
31
|
|
32
|
-
iterator.substrings.
|
32
|
+
expect(iterator.substrings).to eq(
|
33
|
+
["รู้", "อะไร", "ไม่สู้", "รู้", "วิชา", " ", "รู้", "รักษา", "ตัว", "รอด", "เป็น", "ยอดดี"]
|
34
|
+
)
|
33
35
|
end
|
34
36
|
|
35
37
|
it "finds all word boundaries in a non-ASCII string" do
|
36
38
|
iterator = BreakIterator.new :word, "th_TH"
|
37
39
|
iterator.text = "การทดลอง"
|
38
|
-
iterator.to_a.
|
40
|
+
expect(iterator.to_a).to eq([0, 3, 8])
|
39
41
|
end
|
40
42
|
|
41
43
|
it "finds all sentence boundaries in an English string" do
|
42
44
|
iterator = BreakIterator.new :sentence, "en_US"
|
43
45
|
iterator.text = "This is a sentence. This is another sentence, with a comma in it."
|
44
|
-
iterator.to_a.
|
46
|
+
expect(iterator.to_a).to eq([0, 20, 65])
|
45
47
|
end
|
46
48
|
|
47
49
|
it "can navigate back and forward" do
|
48
50
|
iterator = BreakIterator.new :word, "en_US"
|
49
51
|
iterator.text = "Lorem ipsum dolor sit amet."
|
50
52
|
|
51
|
-
iterator.first.
|
53
|
+
expect(iterator.first).to eq(0)
|
52
54
|
iterator.next
|
53
|
-
iterator.current.
|
54
|
-
iterator.last.
|
55
|
+
expect(iterator.current).to eq(5)
|
56
|
+
expect(iterator.last).to eq(27)
|
55
57
|
end
|
56
58
|
|
57
59
|
it "fetches info about given offset" do
|
58
60
|
iterator = BreakIterator.new :word, "en_US"
|
59
61
|
iterator.text = "Lorem ipsum dolor sit amet."
|
60
62
|
|
61
|
-
iterator.following(3).
|
62
|
-
iterator.preceding(6).
|
63
|
+
expect(iterator.following(3)).to eq(5)
|
64
|
+
expect(iterator.preceding(6)).to eq(5)
|
63
65
|
|
64
|
-
iterator.
|
65
|
-
iterator.
|
66
|
+
expect(iterator).to be_boundary(5)
|
67
|
+
expect(iterator).to_not be_boundary(10)
|
66
68
|
end
|
67
69
|
|
68
70
|
it "returns an Enumerator if no block was given" do
|
69
71
|
iterator = BreakIterator.new :word, "nb"
|
70
|
-
expected = ICU.ruby19? ? Enumerator : Enumerable::Enumerator
|
71
72
|
|
72
|
-
iterator.each.
|
73
|
+
expect(iterator.each).to be_kind_of(Enumerator)
|
73
74
|
end
|
74
75
|
|
75
76
|
end # BreakIterator
|
data/spec/chardet_spec.rb
CHANGED
@@ -1,29 +1,27 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
-
require 'spec_helper'
|
4
|
-
|
5
3
|
describe ICU::CharDet::Detector do
|
6
4
|
|
7
5
|
let(:detector) { ICU::CharDet::Detector.new }
|
8
6
|
|
9
7
|
it "should recognize UTF-8" do
|
10
8
|
m = detector.detect("æåø")
|
11
|
-
m.name.
|
12
|
-
m.language.
|
9
|
+
expect(m.name).to eq("UTF-8")
|
10
|
+
expect(m.language).to be_a(String)
|
13
11
|
end
|
14
12
|
|
15
13
|
it "has a list of detectable charsets" do
|
16
14
|
cs = detector.detectable_charsets
|
17
|
-
cs.
|
18
|
-
cs.
|
15
|
+
expect(cs).to be_an(Array)
|
16
|
+
expect(cs).to_not be_empty
|
19
17
|
|
20
|
-
cs.first.
|
18
|
+
expect(cs.first).to be_a(String)
|
21
19
|
end
|
22
20
|
|
23
21
|
it "should disable / enable the input filter" do
|
24
|
-
detector.input_filter_enabled
|
22
|
+
expect(detector.input_filter_enabled?).to be_falsey
|
25
23
|
detector.input_filter_enabled = true
|
26
|
-
detector.input_filter_enabled
|
24
|
+
expect(detector.input_filter_enabled?).to be_truthy
|
27
25
|
end
|
28
26
|
|
29
27
|
it "should should set declared encoding" do
|
@@ -31,14 +29,14 @@ describe ICU::CharDet::Detector do
|
|
31
29
|
end
|
32
30
|
|
33
31
|
it "should detect several matching encodings" do
|
34
|
-
detector.detect_all("foo bar").
|
32
|
+
expect(detector.detect_all("foo bar")).to be_an(Array)
|
35
33
|
end
|
36
34
|
|
37
35
|
it "should support null bytes" do
|
38
36
|
# Create a utf-16 string and then force it to binary (ascii) to mimic data from net/http
|
39
37
|
string = "foo".encode("UTF-16").force_encoding("binary")
|
40
38
|
m = detector.detect(string)
|
41
|
-
m.name.
|
42
|
-
m.language.
|
39
|
+
expect(m.name).to eq("UTF-16BE")
|
40
|
+
expect(m.language).to be_a(String)
|
43
41
|
end
|
44
42
|
end
|