ffi-icu 0.1.10 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.rspec +2 -0
- data/.travis.yml +10 -4
- data/README.md +7 -10
- data/ffi-icu.gemspec +4 -4
- data/lib/ffi-icu.rb +0 -9
- data/lib/ffi-icu/lib.rb +6 -1
- data/lib/ffi-icu/locale.rb +4 -4
- data/lib/ffi-icu/time_formatting.rb +1 -0
- data/lib/ffi-icu/version.rb +1 -1
- data/spec/break_iterator_spec.rb +20 -19
- data/spec/chardet_spec.rb +10 -12
- data/spec/collation_spec.rb +19 -22
- data/spec/lib/version_info_spec.rb +11 -6
- data/spec/lib_spec.rb +11 -11
- data/spec/locale_spec.rb +105 -80
- data/spec/normalization_spec.rb +2 -4
- data/spec/normalizer_spec.rb +24 -26
- data/spec/number_formatting_spec.rb +28 -25
- data/spec/time_spec.rb +34 -37
- data/spec/transliteration_spec.rb +5 -6
- data/spec/uchar_spec.rb +8 -10
- metadata +27 -28
- data/spec/spec.opts +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3e1817bf83d85197ea62937c274ff98c71984ef4a432a6f76fc3cca764bf45dc
|
4
|
+
data.tar.gz: 2a21f2177dc151831fe89782be8cfe53d2f13a6d1038961eb1efe5c3970a27a0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6fb2d659678226632c485ea24de6e79f7d20ccf7b80b03833f680bb295ae07736957f3a9037ac1d3639b432f6974c279b580767d08315ad005c57cc6be79ed8b
|
7
|
+
data.tar.gz: 9c49ddef23ae27f88d4eaffbaf0cf42b670e7f07939b7c19b6255a4ac157faae25e8eb5bc973764e767b3b32d40226c4b1cc95772c629fbe4b678d4cff5ebccb
|
data/.rspec
ADDED
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -1,9 +1,7 @@
|
|
1
|
-
ffi-icu
|
1
|
+
ffi-icu [](https://travis-ci.org/erickguan/ffi-icu)
|
2
2
|
=======
|
3
3
|
|
4
|
-
Simple FFI wrappers for
|
5
|
-
|
6
|
-
[](http://travis-ci.org/jarib/ffi-icu)
|
4
|
+
Simple FFI wrappers for ICU. Checkout the renovated [ICU gem](https://github.com/fantasticfears/icu4r) instead which supports various of encoding and distributed with packaged source. FFI-ICU needs some love with ICU gem's transcoding method.
|
7
5
|
|
8
6
|
Gem
|
9
7
|
---
|
@@ -49,7 +47,6 @@ or
|
|
49
47
|
Why not just use rchardet?
|
50
48
|
|
51
49
|
* speed
|
52
|
-
* 1.9 support
|
53
50
|
|
54
51
|
Locale Sensitive Collation
|
55
52
|
--------------------------
|
@@ -132,15 +129,15 @@ Platforms:
|
|
132
129
|
|
133
130
|
Rubies:
|
134
131
|
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
132
|
+
- 2.5
|
133
|
+
- 2.6
|
134
|
+
- 2.7
|
135
|
+
- ruby-head
|
139
136
|
|
140
137
|
TODO:
|
141
138
|
=====
|
142
139
|
|
143
|
-
* Any other useful part of ICU?
|
140
|
+
* Any other useful part of ICU?
|
144
141
|
* Windows?!
|
145
142
|
|
146
143
|
Note on Patches/Pull Requests
|
data/ffi-icu.gemspec
CHANGED
@@ -6,7 +6,8 @@ Gem::Specification.new do |s|
|
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Jari Bakken"]
|
9
|
-
s.date = %q{
|
9
|
+
s.date = %q{2019-10-15}
|
10
|
+
s.licenses = ['MIT']
|
10
11
|
s.description = %q{Provides charset detection, locale sensitive collation and more. Depends on libicu.}
|
11
12
|
s.email = %q{jari.bakken@gmail.com}
|
12
13
|
s.extra_rdoc_files = ["LICENSE", "README.md"]
|
@@ -20,7 +21,6 @@ Gem::Specification.new do |s|
|
|
20
21
|
s.summary = %q{Simple Ruby FFI wrappers for things I need from ICU.}
|
21
22
|
|
22
23
|
s.add_runtime_dependency "ffi", "~> 1.0", ">= 1.0.9"
|
23
|
-
s.add_development_dependency
|
24
|
-
s.add_development_dependency "rake", ["
|
24
|
+
s.add_development_dependency 'rspec', '~> 3.9'
|
25
|
+
s.add_development_dependency "rake", [">= 12.3.3"]
|
25
26
|
end
|
26
|
-
|
data/lib/ffi-icu.rb
CHANGED
data/lib/ffi-icu/lib.rb
CHANGED
@@ -39,7 +39,12 @@ module ICU
|
|
39
39
|
[find_lib("libicui18n.#{FFI::Platform::LIBSUFFIX}.??"),
|
40
40
|
find_lib("libicutu.#{FFI::Platform::LIBSUFFIX}.??")]
|
41
41
|
when :osx
|
42
|
-
|
42
|
+
# See https://developer.apple.com/documentation/macos-release-notes/macos-big-sur-11_0_1-release-notes (62986286)
|
43
|
+
if Gem::Version.new(`sw_vers -productVersion`) >= Gem::Version.new('11')
|
44
|
+
["libicucore.#{FFI::Platform::LIBSUFFIX}"]
|
45
|
+
else
|
46
|
+
[find_lib("libicucore.#{FFI::Platform::LIBSUFFIX}")]
|
47
|
+
end
|
43
48
|
when :linux
|
44
49
|
[find_lib("libicui18n.#{FFI::Platform::LIBSUFFIX}.??"),
|
45
50
|
find_lib("libicutu.#{FFI::Platform::LIBSUFFIX}.??")]
|
data/lib/ffi-icu/locale.rb
CHANGED
@@ -75,7 +75,7 @@ module ICU
|
|
75
75
|
def display_country(locale = nil)
|
76
76
|
locale = locale.to_s unless locale.nil?
|
77
77
|
|
78
|
-
Lib::Util.read_uchar_buffer(
|
78
|
+
Lib::Util.read_uchar_buffer(256) do |buffer, status|
|
79
79
|
Lib.uloc_getDisplayCountry(@id, locale, buffer, buffer.size, status)
|
80
80
|
end
|
81
81
|
end
|
@@ -83,7 +83,7 @@ module ICU
|
|
83
83
|
def display_language(locale = nil)
|
84
84
|
locale = locale.to_s unless locale.nil?
|
85
85
|
|
86
|
-
Lib::Util.read_uchar_buffer(
|
86
|
+
Lib::Util.read_uchar_buffer(192) do |buffer, status|
|
87
87
|
Lib.uloc_getDisplayLanguage(@id, locale, buffer, buffer.size, status)
|
88
88
|
end
|
89
89
|
end
|
@@ -91,7 +91,7 @@ module ICU
|
|
91
91
|
def display_name(locale = nil)
|
92
92
|
locale = locale.to_s unless locale.nil?
|
93
93
|
|
94
|
-
Lib::Util.read_uchar_buffer(
|
94
|
+
Lib::Util.read_uchar_buffer(256) do |buffer, status|
|
95
95
|
Lib.uloc_getDisplayName(@id, locale, buffer, buffer.size, status)
|
96
96
|
end
|
97
97
|
end
|
@@ -99,7 +99,7 @@ module ICU
|
|
99
99
|
def display_script(locale = nil)
|
100
100
|
locale = locale.to_s unless locale.nil?
|
101
101
|
|
102
|
-
Lib::Util.read_uchar_buffer(
|
102
|
+
Lib::Util.read_uchar_buffer(128) do |buffer, status|
|
103
103
|
Lib.uloc_getDisplayScript(@id, locale, buffer, buffer.size, status)
|
104
104
|
end
|
105
105
|
end
|
data/lib/ffi-icu/version.rb
CHANGED
data/spec/break_iterator_spec.rb
CHANGED
@@ -1,75 +1,76 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
require "spec_helper"
|
4
|
-
|
5
3
|
module ICU
|
6
4
|
describe BreakIterator do
|
7
5
|
|
8
6
|
it "should return available locales" do
|
9
7
|
locales = ICU::BreakIterator.available_locales
|
10
|
-
locales.
|
11
|
-
locales.
|
12
|
-
locales.
|
8
|
+
expect(locales).to be_an(Array)
|
9
|
+
expect(locales).to_not be_empty
|
10
|
+
expect(locales).to include("en_US")
|
13
11
|
end
|
14
12
|
|
15
13
|
it "finds all word boundaries in an English string" do
|
16
14
|
iterator = BreakIterator.new :word, "en_US"
|
17
15
|
iterator.text = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
|
18
|
-
iterator.to_a.
|
16
|
+
expect(iterator.to_a).to eq(
|
17
|
+
[0, 5, 6, 11, 12, 17, 18, 21, 22, 26, 27, 28, 39, 40, 51, 52, 56, 57, 58, 61, 62, 64, 65, 72, 73, 79, 80, 90, 91, 93, 94, 100, 101, 103, 104, 110, 111, 116, 117, 123, 124]
|
18
|
+
)
|
19
19
|
end
|
20
20
|
|
21
21
|
it "returns each substring" do
|
22
22
|
iterator = BreakIterator.new :word, "en_US"
|
23
23
|
iterator.text = "Lorem ipsum dolor sit amet."
|
24
24
|
|
25
|
-
iterator.substrings.
|
25
|
+
expect(iterator.substrings).to eq(["Lorem", " ", "ipsum", " ", "dolor", " ", "sit", " ", "amet", "."])
|
26
26
|
end
|
27
27
|
|
28
28
|
it "returns the substrings of a non-ASCII string" do
|
29
29
|
iterator = BreakIterator.new :word, "th_TH"
|
30
30
|
iterator.text = "รู้อะไรไม่สู้รู้วิชา รู้รักษาตัวรอดเป็นยอดดี"
|
31
31
|
|
32
|
-
iterator.substrings.
|
32
|
+
expect(iterator.substrings).to eq(
|
33
|
+
["รู้", "อะไร", "ไม่สู้", "รู้", "วิชา", " ", "รู้", "รักษา", "ตัว", "รอด", "เป็น", "ยอดดี"]
|
34
|
+
)
|
33
35
|
end
|
34
36
|
|
35
37
|
it "finds all word boundaries in a non-ASCII string" do
|
36
38
|
iterator = BreakIterator.new :word, "th_TH"
|
37
39
|
iterator.text = "การทดลอง"
|
38
|
-
iterator.to_a.
|
40
|
+
expect(iterator.to_a).to eq([0, 3, 8])
|
39
41
|
end
|
40
42
|
|
41
43
|
it "finds all sentence boundaries in an English string" do
|
42
44
|
iterator = BreakIterator.new :sentence, "en_US"
|
43
45
|
iterator.text = "This is a sentence. This is another sentence, with a comma in it."
|
44
|
-
iterator.to_a.
|
46
|
+
expect(iterator.to_a).to eq([0, 20, 65])
|
45
47
|
end
|
46
48
|
|
47
49
|
it "can navigate back and forward" do
|
48
50
|
iterator = BreakIterator.new :word, "en_US"
|
49
51
|
iterator.text = "Lorem ipsum dolor sit amet."
|
50
52
|
|
51
|
-
iterator.first.
|
53
|
+
expect(iterator.first).to eq(0)
|
52
54
|
iterator.next
|
53
|
-
iterator.current.
|
54
|
-
iterator.last.
|
55
|
+
expect(iterator.current).to eq(5)
|
56
|
+
expect(iterator.last).to eq(27)
|
55
57
|
end
|
56
58
|
|
57
59
|
it "fetches info about given offset" do
|
58
60
|
iterator = BreakIterator.new :word, "en_US"
|
59
61
|
iterator.text = "Lorem ipsum dolor sit amet."
|
60
62
|
|
61
|
-
iterator.following(3).
|
62
|
-
iterator.preceding(6).
|
63
|
+
expect(iterator.following(3)).to eq(5)
|
64
|
+
expect(iterator.preceding(6)).to eq(5)
|
63
65
|
|
64
|
-
iterator.
|
65
|
-
iterator.
|
66
|
+
expect(iterator).to be_boundary(5)
|
67
|
+
expect(iterator).to_not be_boundary(10)
|
66
68
|
end
|
67
69
|
|
68
70
|
it "returns an Enumerator if no block was given" do
|
69
71
|
iterator = BreakIterator.new :word, "nb"
|
70
|
-
expected = ICU.ruby19? ? Enumerator : Enumerable::Enumerator
|
71
72
|
|
72
|
-
iterator.each.
|
73
|
+
expect(iterator.each).to be_kind_of(Enumerator)
|
73
74
|
end
|
74
75
|
|
75
76
|
end # BreakIterator
|
data/spec/chardet_spec.rb
CHANGED
@@ -1,29 +1,27 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
-
require 'spec_helper'
|
4
|
-
|
5
3
|
describe ICU::CharDet::Detector do
|
6
4
|
|
7
5
|
let(:detector) { ICU::CharDet::Detector.new }
|
8
6
|
|
9
7
|
it "should recognize UTF-8" do
|
10
8
|
m = detector.detect("æåø")
|
11
|
-
m.name.
|
12
|
-
m.language.
|
9
|
+
expect(m.name).to eq("UTF-8")
|
10
|
+
expect(m.language).to be_a(String)
|
13
11
|
end
|
14
12
|
|
15
13
|
it "has a list of detectable charsets" do
|
16
14
|
cs = detector.detectable_charsets
|
17
|
-
cs.
|
18
|
-
cs.
|
15
|
+
expect(cs).to be_an(Array)
|
16
|
+
expect(cs).to_not be_empty
|
19
17
|
|
20
|
-
cs.first.
|
18
|
+
expect(cs.first).to be_a(String)
|
21
19
|
end
|
22
20
|
|
23
21
|
it "should disable / enable the input filter" do
|
24
|
-
detector.input_filter_enabled
|
22
|
+
expect(detector.input_filter_enabled?).to be_falsey
|
25
23
|
detector.input_filter_enabled = true
|
26
|
-
detector.input_filter_enabled
|
24
|
+
expect(detector.input_filter_enabled?).to be_truthy
|
27
25
|
end
|
28
26
|
|
29
27
|
it "should should set declared encoding" do
|
@@ -31,14 +29,14 @@ describe ICU::CharDet::Detector do
|
|
31
29
|
end
|
32
30
|
|
33
31
|
it "should detect several matching encodings" do
|
34
|
-
detector.detect_all("foo bar").
|
32
|
+
expect(detector.detect_all("foo bar")).to be_an(Array)
|
35
33
|
end
|
36
34
|
|
37
35
|
it "should support null bytes" do
|
38
36
|
# Create a utf-16 string and then force it to binary (ascii) to mimic data from net/http
|
39
37
|
string = "foo".encode("UTF-16").force_encoding("binary")
|
40
38
|
m = detector.detect(string)
|
41
|
-
m.name.
|
42
|
-
m.language.
|
39
|
+
expect(m.name).to eq("UTF-16BE")
|
40
|
+
expect(m.language).to be_a(String)
|
43
41
|
end
|
44
42
|
end
|
data/spec/collation_spec.rb
CHANGED
@@ -1,12 +1,10 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
-
require 'spec_helper'
|
4
|
-
|
5
3
|
module ICU
|
6
4
|
module Collation
|
7
5
|
describe "Collation" do
|
8
6
|
it "should collate an array of strings" do
|
9
|
-
Collation.collate("nb", %w[æ å ø]).
|
7
|
+
expect(Collation.collate("nb", %w[æ å ø])).to eq(%w[æ ø å])
|
10
8
|
end
|
11
9
|
end
|
12
10
|
|
@@ -14,51 +12,50 @@ module ICU
|
|
14
12
|
let(:collator) { Collator.new("nb") }
|
15
13
|
|
16
14
|
it "should collate an array of strings" do
|
17
|
-
collator.collate(%w[å ø æ]).
|
15
|
+
expect(collator.collate(%w[å ø æ])).to eq(%w[æ ø å])
|
18
16
|
end
|
19
17
|
|
20
18
|
it "raises an error if argument does not respond to :sort" do
|
21
|
-
|
19
|
+
expect { collator.collate(1) }.to raise_error(ArgumentError)
|
22
20
|
end
|
23
21
|
|
24
22
|
it "should return available locales" do
|
25
23
|
locales = ICU::Collation.available_locales
|
26
|
-
locales.
|
27
|
-
locales.
|
28
|
-
locales.
|
24
|
+
expect(locales).to be_an(Array)
|
25
|
+
expect(locales).to_not be_empty
|
26
|
+
expect(locales).to include("nb")
|
29
27
|
end
|
30
28
|
|
31
29
|
it "should return the locale of the collator" do
|
32
|
-
|
33
|
-
l.should == "nb"
|
30
|
+
expect(collator.locale).to eq('nb')
|
34
31
|
end
|
35
32
|
|
36
33
|
it "should compare two strings" do
|
37
|
-
collator.compare("blåbærsyltetøy", "blah").
|
38
|
-
collator.compare("blah", "blah").
|
39
|
-
collator.compare("ba", "bl").
|
34
|
+
expect(collator.compare("blåbærsyltetøy", "blah")).to eq(1)
|
35
|
+
expect(collator.compare("blah", "blah")).to eq(0)
|
36
|
+
expect(collator.compare("ba", "bl")).to eq(-1)
|
40
37
|
end
|
41
38
|
|
42
39
|
it "should know if a string is greater than another" do
|
43
|
-
collator.
|
44
|
-
collator.
|
40
|
+
expect(collator).to be_greater("z", "a")
|
41
|
+
expect(collator).to_not be_greater("a", "z")
|
45
42
|
end
|
46
43
|
|
47
44
|
it "should know if a string is greater or equal to another" do
|
48
|
-
collator.
|
49
|
-
collator.
|
50
|
-
collator.
|
45
|
+
expect(collator).to be_greater_or_equal("z", "a")
|
46
|
+
expect(collator).to be_greater_or_equal("z", "z")
|
47
|
+
expect(collator).to_not be_greater_or_equal("a", "z")
|
51
48
|
end
|
52
49
|
|
53
50
|
it "should know if a string is equal to another" do
|
54
|
-
collator.
|
55
|
-
collator.
|
51
|
+
expect(collator).to be_equal("a", "a")
|
52
|
+
expect(collator).to_not be_equal("a", "b")
|
56
53
|
end
|
57
54
|
|
58
55
|
it "should return rules" do
|
59
|
-
collator.rules.
|
56
|
+
expect(collator.rules).to_not be_empty
|
60
57
|
# ö sorts before Ö
|
61
|
-
collator.rules.include
|
58
|
+
expect(collator.rules).to include('ö<<<Ö')
|
62
59
|
end
|
63
60
|
|
64
61
|
end
|
@@ -1,14 +1,19 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
-
require 'spec_helper'
|
4
|
-
|
5
3
|
module ICU
|
6
4
|
module Lib
|
7
5
|
describe VersionInfo do
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
6
|
+
describe '.to_a' do
|
7
|
+
subject { described_class.new.to_a }
|
8
|
+
|
9
|
+
it { is_expected.to be_an(Array) }
|
10
|
+
end
|
11
|
+
|
12
|
+
describe '.to_s' do
|
13
|
+
subject { described_class.new.to_s }
|
14
|
+
|
15
|
+
it { is_expected.to be_a(String) }
|
16
|
+
it { is_expected.to match(/^[0-9.]+$/) }
|
12
17
|
end
|
13
18
|
end
|
14
19
|
end
|
data/spec/lib_spec.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
-
require 'spec_helper'
|
4
|
-
|
5
3
|
module ICU
|
6
4
|
describe Lib do
|
7
5
|
describe 'error checking' do
|
@@ -9,8 +7,8 @@ module ICU
|
|
9
7
|
|
10
8
|
context 'upon success' do
|
11
9
|
it 'returns the block result' do
|
12
|
-
Lib.check_error { |status| return_value }.
|
13
|
-
Lib.check_error { |status| status.write_int(0); return_value }.
|
10
|
+
expect(Lib.check_error { |status| return_value }).to eq(return_value)
|
11
|
+
expect(Lib.check_error { |status| status.write_int(0); return_value }).to eq(return_value)
|
14
12
|
end
|
15
13
|
end
|
16
14
|
|
@@ -28,8 +26,9 @@ module ICU
|
|
28
26
|
before(:each) { $VERBOSE = true }
|
29
27
|
|
30
28
|
it 'prints to STDERR and returns the block result' do
|
31
|
-
$stderr.
|
32
|
-
Lib.check_error { |status| status.write_int(-127); return_value }
|
29
|
+
expect($stderr).to receive(:puts) { |message| expect(message).to match /U_.*_WARNING/ }
|
30
|
+
error_check = Lib.check_error { |status| status.write_int(-127); return_value }
|
31
|
+
expect(error_check).to eq(return_value)
|
33
32
|
end
|
34
33
|
end
|
35
34
|
|
@@ -37,8 +36,9 @@ module ICU
|
|
37
36
|
before(:each) { $VERBOSE = false }
|
38
37
|
|
39
38
|
it 'returns the block result' do
|
40
|
-
$stderr.
|
41
|
-
Lib.check_error { |status| status.write_int(-127); return_value }
|
39
|
+
expect($stderr).to_not receive(:puts)
|
40
|
+
error_check = Lib.check_error { |status| status.write_int(-127); return_value }
|
41
|
+
expect(error_check).to eq(return_value)
|
42
42
|
end
|
43
43
|
end
|
44
44
|
end
|
@@ -49,15 +49,15 @@ module ICU
|
|
49
49
|
subject { Lib.cldr_version }
|
50
50
|
|
51
51
|
it { should be_a Lib::VersionInfo }
|
52
|
-
it('is populated') { subject.to_a.
|
52
|
+
it('is populated') { expect(subject.to_a).to_not eq([0,0,0,0]) }
|
53
53
|
end
|
54
54
|
end
|
55
55
|
|
56
56
|
describe 'ICU version' do
|
57
57
|
subject { Lib.version }
|
58
58
|
|
59
|
-
it {
|
60
|
-
it('is populated') { subject.to_a.
|
59
|
+
it { is_expected.to be_a Lib::VersionInfo }
|
60
|
+
it('is populated') { expect(subject.to_a).to_not eq([0,0,0,0]) }
|
61
61
|
end
|
62
62
|
end
|
63
63
|
end
|