ffi-icu 0.1.10 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.rspec +2 -0
- data/.travis.yml +10 -4
- data/README.md +7 -10
- data/ffi-icu.gemspec +4 -4
- data/lib/ffi-icu.rb +0 -9
- data/lib/ffi-icu/lib.rb +6 -1
- data/lib/ffi-icu/locale.rb +4 -4
- data/lib/ffi-icu/time_formatting.rb +1 -0
- data/lib/ffi-icu/version.rb +1 -1
- data/spec/break_iterator_spec.rb +20 -19
- data/spec/chardet_spec.rb +10 -12
- data/spec/collation_spec.rb +19 -22
- data/spec/lib/version_info_spec.rb +11 -6
- data/spec/lib_spec.rb +11 -11
- data/spec/locale_spec.rb +105 -80
- data/spec/normalization_spec.rb +2 -4
- data/spec/normalizer_spec.rb +24 -26
- data/spec/number_formatting_spec.rb +28 -25
- data/spec/time_spec.rb +34 -37
- data/spec/transliteration_spec.rb +5 -6
- data/spec/uchar_spec.rb +8 -10
- metadata +27 -28
- data/spec/spec.opts +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3e1817bf83d85197ea62937c274ff98c71984ef4a432a6f76fc3cca764bf45dc
|
4
|
+
data.tar.gz: 2a21f2177dc151831fe89782be8cfe53d2f13a6d1038961eb1efe5c3970a27a0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6fb2d659678226632c485ea24de6e79f7d20ccf7b80b03833f680bb295ae07736957f3a9037ac1d3639b432f6974c279b580767d08315ad005c57cc6be79ed8b
|
7
|
+
data.tar.gz: 9c49ddef23ae27f88d4eaffbaf0cf42b670e7f07939b7c19b6255a4ac157faae25e8eb5bc973764e767b3b32d40226c4b1cc95772c629fbe4b678d4cff5ebccb
|
data/.rspec
ADDED
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -1,9 +1,7 @@
|
|
1
|
-
ffi-icu
|
1
|
+
ffi-icu [![Build Status](https://travis-ci.org/erickguan/ffi-icu.svg?branch=master)](https://travis-ci.org/erickguan/ffi-icu)
|
2
2
|
=======
|
3
3
|
|
4
|
-
Simple FFI wrappers for
|
5
|
-
|
6
|
-
[![Build Status](https://secure.travis-ci.org/jarib/ffi-icu.png)](http://travis-ci.org/jarib/ffi-icu)
|
4
|
+
Simple FFI wrappers for ICU. Checkout the renovated [ICU gem](https://github.com/fantasticfears/icu4r) instead which supports various of encoding and distributed with packaged source. FFI-ICU needs some love with ICU gem's transcoding method.
|
7
5
|
|
8
6
|
Gem
|
9
7
|
---
|
@@ -49,7 +47,6 @@ or
|
|
49
47
|
Why not just use rchardet?
|
50
48
|
|
51
49
|
* speed
|
52
|
-
* 1.9 support
|
53
50
|
|
54
51
|
Locale Sensitive Collation
|
55
52
|
--------------------------
|
@@ -132,15 +129,15 @@ Platforms:
|
|
132
129
|
|
133
130
|
Rubies:
|
134
131
|
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
132
|
+
- 2.5
|
133
|
+
- 2.6
|
134
|
+
- 2.7
|
135
|
+
- ruby-head
|
139
136
|
|
140
137
|
TODO:
|
141
138
|
=====
|
142
139
|
|
143
|
-
* Any other useful part of ICU?
|
140
|
+
* Any other useful part of ICU?
|
144
141
|
* Windows?!
|
145
142
|
|
146
143
|
Note on Patches/Pull Requests
|
data/ffi-icu.gemspec
CHANGED
@@ -6,7 +6,8 @@ Gem::Specification.new do |s|
|
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Jari Bakken"]
|
9
|
-
s.date = %q{
|
9
|
+
s.date = %q{2019-10-15}
|
10
|
+
s.licenses = ['MIT']
|
10
11
|
s.description = %q{Provides charset detection, locale sensitive collation and more. Depends on libicu.}
|
11
12
|
s.email = %q{jari.bakken@gmail.com}
|
12
13
|
s.extra_rdoc_files = ["LICENSE", "README.md"]
|
@@ -20,7 +21,6 @@ Gem::Specification.new do |s|
|
|
20
21
|
s.summary = %q{Simple Ruby FFI wrappers for things I need from ICU.}
|
21
22
|
|
22
23
|
s.add_runtime_dependency "ffi", "~> 1.0", ">= 1.0.9"
|
23
|
-
s.add_development_dependency
|
24
|
-
s.add_development_dependency "rake", ["
|
24
|
+
s.add_development_dependency 'rspec', '~> 3.9'
|
25
|
+
s.add_development_dependency "rake", [">= 12.3.3"]
|
25
26
|
end
|
26
|
-
|
data/lib/ffi-icu.rb
CHANGED
data/lib/ffi-icu/lib.rb
CHANGED
@@ -39,7 +39,12 @@ module ICU
|
|
39
39
|
[find_lib("libicui18n.#{FFI::Platform::LIBSUFFIX}.??"),
|
40
40
|
find_lib("libicutu.#{FFI::Platform::LIBSUFFIX}.??")]
|
41
41
|
when :osx
|
42
|
-
|
42
|
+
# See https://developer.apple.com/documentation/macos-release-notes/macos-big-sur-11_0_1-release-notes (62986286)
|
43
|
+
if Gem::Version.new(`sw_vers -productVersion`) >= Gem::Version.new('11')
|
44
|
+
["libicucore.#{FFI::Platform::LIBSUFFIX}"]
|
45
|
+
else
|
46
|
+
[find_lib("libicucore.#{FFI::Platform::LIBSUFFIX}")]
|
47
|
+
end
|
43
48
|
when :linux
|
44
49
|
[find_lib("libicui18n.#{FFI::Platform::LIBSUFFIX}.??"),
|
45
50
|
find_lib("libicutu.#{FFI::Platform::LIBSUFFIX}.??")]
|
data/lib/ffi-icu/locale.rb
CHANGED
@@ -75,7 +75,7 @@ module ICU
|
|
75
75
|
def display_country(locale = nil)
|
76
76
|
locale = locale.to_s unless locale.nil?
|
77
77
|
|
78
|
-
Lib::Util.read_uchar_buffer(
|
78
|
+
Lib::Util.read_uchar_buffer(256) do |buffer, status|
|
79
79
|
Lib.uloc_getDisplayCountry(@id, locale, buffer, buffer.size, status)
|
80
80
|
end
|
81
81
|
end
|
@@ -83,7 +83,7 @@ module ICU
|
|
83
83
|
def display_language(locale = nil)
|
84
84
|
locale = locale.to_s unless locale.nil?
|
85
85
|
|
86
|
-
Lib::Util.read_uchar_buffer(
|
86
|
+
Lib::Util.read_uchar_buffer(192) do |buffer, status|
|
87
87
|
Lib.uloc_getDisplayLanguage(@id, locale, buffer, buffer.size, status)
|
88
88
|
end
|
89
89
|
end
|
@@ -91,7 +91,7 @@ module ICU
|
|
91
91
|
def display_name(locale = nil)
|
92
92
|
locale = locale.to_s unless locale.nil?
|
93
93
|
|
94
|
-
Lib::Util.read_uchar_buffer(
|
94
|
+
Lib::Util.read_uchar_buffer(256) do |buffer, status|
|
95
95
|
Lib.uloc_getDisplayName(@id, locale, buffer, buffer.size, status)
|
96
96
|
end
|
97
97
|
end
|
@@ -99,7 +99,7 @@ module ICU
|
|
99
99
|
def display_script(locale = nil)
|
100
100
|
locale = locale.to_s unless locale.nil?
|
101
101
|
|
102
|
-
Lib::Util.read_uchar_buffer(
|
102
|
+
Lib::Util.read_uchar_buffer(128) do |buffer, status|
|
103
103
|
Lib.uloc_getDisplayScript(@id, locale, buffer, buffer.size, status)
|
104
104
|
end
|
105
105
|
end
|
data/lib/ffi-icu/version.rb
CHANGED
data/spec/break_iterator_spec.rb
CHANGED
@@ -1,75 +1,76 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
require "spec_helper"
|
4
|
-
|
5
3
|
module ICU
|
6
4
|
describe BreakIterator do
|
7
5
|
|
8
6
|
it "should return available locales" do
|
9
7
|
locales = ICU::BreakIterator.available_locales
|
10
|
-
locales.
|
11
|
-
locales.
|
12
|
-
locales.
|
8
|
+
expect(locales).to be_an(Array)
|
9
|
+
expect(locales).to_not be_empty
|
10
|
+
expect(locales).to include("en_US")
|
13
11
|
end
|
14
12
|
|
15
13
|
it "finds all word boundaries in an English string" do
|
16
14
|
iterator = BreakIterator.new :word, "en_US"
|
17
15
|
iterator.text = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
|
18
|
-
iterator.to_a.
|
16
|
+
expect(iterator.to_a).to eq(
|
17
|
+
[0, 5, 6, 11, 12, 17, 18, 21, 22, 26, 27, 28, 39, 40, 51, 52, 56, 57, 58, 61, 62, 64, 65, 72, 73, 79, 80, 90, 91, 93, 94, 100, 101, 103, 104, 110, 111, 116, 117, 123, 124]
|
18
|
+
)
|
19
19
|
end
|
20
20
|
|
21
21
|
it "returns each substring" do
|
22
22
|
iterator = BreakIterator.new :word, "en_US"
|
23
23
|
iterator.text = "Lorem ipsum dolor sit amet."
|
24
24
|
|
25
|
-
iterator.substrings.
|
25
|
+
expect(iterator.substrings).to eq(["Lorem", " ", "ipsum", " ", "dolor", " ", "sit", " ", "amet", "."])
|
26
26
|
end
|
27
27
|
|
28
28
|
it "returns the substrings of a non-ASCII string" do
|
29
29
|
iterator = BreakIterator.new :word, "th_TH"
|
30
30
|
iterator.text = "รู้อะไรไม่สู้รู้วิชา รู้รักษาตัวรอดเป็นยอดดี"
|
31
31
|
|
32
|
-
iterator.substrings.
|
32
|
+
expect(iterator.substrings).to eq(
|
33
|
+
["รู้", "อะไร", "ไม่สู้", "รู้", "วิชา", " ", "รู้", "รักษา", "ตัว", "รอด", "เป็น", "ยอดดี"]
|
34
|
+
)
|
33
35
|
end
|
34
36
|
|
35
37
|
it "finds all word boundaries in a non-ASCII string" do
|
36
38
|
iterator = BreakIterator.new :word, "th_TH"
|
37
39
|
iterator.text = "การทดลอง"
|
38
|
-
iterator.to_a.
|
40
|
+
expect(iterator.to_a).to eq([0, 3, 8])
|
39
41
|
end
|
40
42
|
|
41
43
|
it "finds all sentence boundaries in an English string" do
|
42
44
|
iterator = BreakIterator.new :sentence, "en_US"
|
43
45
|
iterator.text = "This is a sentence. This is another sentence, with a comma in it."
|
44
|
-
iterator.to_a.
|
46
|
+
expect(iterator.to_a).to eq([0, 20, 65])
|
45
47
|
end
|
46
48
|
|
47
49
|
it "can navigate back and forward" do
|
48
50
|
iterator = BreakIterator.new :word, "en_US"
|
49
51
|
iterator.text = "Lorem ipsum dolor sit amet."
|
50
52
|
|
51
|
-
iterator.first.
|
53
|
+
expect(iterator.first).to eq(0)
|
52
54
|
iterator.next
|
53
|
-
iterator.current.
|
54
|
-
iterator.last.
|
55
|
+
expect(iterator.current).to eq(5)
|
56
|
+
expect(iterator.last).to eq(27)
|
55
57
|
end
|
56
58
|
|
57
59
|
it "fetches info about given offset" do
|
58
60
|
iterator = BreakIterator.new :word, "en_US"
|
59
61
|
iterator.text = "Lorem ipsum dolor sit amet."
|
60
62
|
|
61
|
-
iterator.following(3).
|
62
|
-
iterator.preceding(6).
|
63
|
+
expect(iterator.following(3)).to eq(5)
|
64
|
+
expect(iterator.preceding(6)).to eq(5)
|
63
65
|
|
64
|
-
iterator.
|
65
|
-
iterator.
|
66
|
+
expect(iterator).to be_boundary(5)
|
67
|
+
expect(iterator).to_not be_boundary(10)
|
66
68
|
end
|
67
69
|
|
68
70
|
it "returns an Enumerator if no block was given" do
|
69
71
|
iterator = BreakIterator.new :word, "nb"
|
70
|
-
expected = ICU.ruby19? ? Enumerator : Enumerable::Enumerator
|
71
72
|
|
72
|
-
iterator.each.
|
73
|
+
expect(iterator.each).to be_kind_of(Enumerator)
|
73
74
|
end
|
74
75
|
|
75
76
|
end # BreakIterator
|
data/spec/chardet_spec.rb
CHANGED
@@ -1,29 +1,27 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
-
require 'spec_helper'
|
4
|
-
|
5
3
|
describe ICU::CharDet::Detector do
|
6
4
|
|
7
5
|
let(:detector) { ICU::CharDet::Detector.new }
|
8
6
|
|
9
7
|
it "should recognize UTF-8" do
|
10
8
|
m = detector.detect("æåø")
|
11
|
-
m.name.
|
12
|
-
m.language.
|
9
|
+
expect(m.name).to eq("UTF-8")
|
10
|
+
expect(m.language).to be_a(String)
|
13
11
|
end
|
14
12
|
|
15
13
|
it "has a list of detectable charsets" do
|
16
14
|
cs = detector.detectable_charsets
|
17
|
-
cs.
|
18
|
-
cs.
|
15
|
+
expect(cs).to be_an(Array)
|
16
|
+
expect(cs).to_not be_empty
|
19
17
|
|
20
|
-
cs.first.
|
18
|
+
expect(cs.first).to be_a(String)
|
21
19
|
end
|
22
20
|
|
23
21
|
it "should disable / enable the input filter" do
|
24
|
-
detector.input_filter_enabled
|
22
|
+
expect(detector.input_filter_enabled?).to be_falsey
|
25
23
|
detector.input_filter_enabled = true
|
26
|
-
detector.input_filter_enabled
|
24
|
+
expect(detector.input_filter_enabled?).to be_truthy
|
27
25
|
end
|
28
26
|
|
29
27
|
it "should should set declared encoding" do
|
@@ -31,14 +29,14 @@ describe ICU::CharDet::Detector do
|
|
31
29
|
end
|
32
30
|
|
33
31
|
it "should detect several matching encodings" do
|
34
|
-
detector.detect_all("foo bar").
|
32
|
+
expect(detector.detect_all("foo bar")).to be_an(Array)
|
35
33
|
end
|
36
34
|
|
37
35
|
it "should support null bytes" do
|
38
36
|
# Create a utf-16 string and then force it to binary (ascii) to mimic data from net/http
|
39
37
|
string = "foo".encode("UTF-16").force_encoding("binary")
|
40
38
|
m = detector.detect(string)
|
41
|
-
m.name.
|
42
|
-
m.language.
|
39
|
+
expect(m.name).to eq("UTF-16BE")
|
40
|
+
expect(m.language).to be_a(String)
|
43
41
|
end
|
44
42
|
end
|
data/spec/collation_spec.rb
CHANGED
@@ -1,12 +1,10 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
-
require 'spec_helper'
|
4
|
-
|
5
3
|
module ICU
|
6
4
|
module Collation
|
7
5
|
describe "Collation" do
|
8
6
|
it "should collate an array of strings" do
|
9
|
-
Collation.collate("nb", %w[æ å ø]).
|
7
|
+
expect(Collation.collate("nb", %w[æ å ø])).to eq(%w[æ ø å])
|
10
8
|
end
|
11
9
|
end
|
12
10
|
|
@@ -14,51 +12,50 @@ module ICU
|
|
14
12
|
let(:collator) { Collator.new("nb") }
|
15
13
|
|
16
14
|
it "should collate an array of strings" do
|
17
|
-
collator.collate(%w[å ø æ]).
|
15
|
+
expect(collator.collate(%w[å ø æ])).to eq(%w[æ ø å])
|
18
16
|
end
|
19
17
|
|
20
18
|
it "raises an error if argument does not respond to :sort" do
|
21
|
-
|
19
|
+
expect { collator.collate(1) }.to raise_error(ArgumentError)
|
22
20
|
end
|
23
21
|
|
24
22
|
it "should return available locales" do
|
25
23
|
locales = ICU::Collation.available_locales
|
26
|
-
locales.
|
27
|
-
locales.
|
28
|
-
locales.
|
24
|
+
expect(locales).to be_an(Array)
|
25
|
+
expect(locales).to_not be_empty
|
26
|
+
expect(locales).to include("nb")
|
29
27
|
end
|
30
28
|
|
31
29
|
it "should return the locale of the collator" do
|
32
|
-
|
33
|
-
l.should == "nb"
|
30
|
+
expect(collator.locale).to eq('nb')
|
34
31
|
end
|
35
32
|
|
36
33
|
it "should compare two strings" do
|
37
|
-
collator.compare("blåbærsyltetøy", "blah").
|
38
|
-
collator.compare("blah", "blah").
|
39
|
-
collator.compare("ba", "bl").
|
34
|
+
expect(collator.compare("blåbærsyltetøy", "blah")).to eq(1)
|
35
|
+
expect(collator.compare("blah", "blah")).to eq(0)
|
36
|
+
expect(collator.compare("ba", "bl")).to eq(-1)
|
40
37
|
end
|
41
38
|
|
42
39
|
it "should know if a string is greater than another" do
|
43
|
-
collator.
|
44
|
-
collator.
|
40
|
+
expect(collator).to be_greater("z", "a")
|
41
|
+
expect(collator).to_not be_greater("a", "z")
|
45
42
|
end
|
46
43
|
|
47
44
|
it "should know if a string is greater or equal to another" do
|
48
|
-
collator.
|
49
|
-
collator.
|
50
|
-
collator.
|
45
|
+
expect(collator).to be_greater_or_equal("z", "a")
|
46
|
+
expect(collator).to be_greater_or_equal("z", "z")
|
47
|
+
expect(collator).to_not be_greater_or_equal("a", "z")
|
51
48
|
end
|
52
49
|
|
53
50
|
it "should know if a string is equal to another" do
|
54
|
-
collator.
|
55
|
-
collator.
|
51
|
+
expect(collator).to be_equal("a", "a")
|
52
|
+
expect(collator).to_not be_equal("a", "b")
|
56
53
|
end
|
57
54
|
|
58
55
|
it "should return rules" do
|
59
|
-
collator.rules.
|
56
|
+
expect(collator.rules).to_not be_empty
|
60
57
|
# ö sorts before Ö
|
61
|
-
collator.rules.include
|
58
|
+
expect(collator.rules).to include('ö<<<Ö')
|
62
59
|
end
|
63
60
|
|
64
61
|
end
|
@@ -1,14 +1,19 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
-
require 'spec_helper'
|
4
|
-
|
5
3
|
module ICU
|
6
4
|
module Lib
|
7
5
|
describe VersionInfo do
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
6
|
+
describe '.to_a' do
|
7
|
+
subject { described_class.new.to_a }
|
8
|
+
|
9
|
+
it { is_expected.to be_an(Array) }
|
10
|
+
end
|
11
|
+
|
12
|
+
describe '.to_s' do
|
13
|
+
subject { described_class.new.to_s }
|
14
|
+
|
15
|
+
it { is_expected.to be_a(String) }
|
16
|
+
it { is_expected.to match(/^[0-9.]+$/) }
|
12
17
|
end
|
13
18
|
end
|
14
19
|
end
|
data/spec/lib_spec.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
-
require 'spec_helper'
|
4
|
-
|
5
3
|
module ICU
|
6
4
|
describe Lib do
|
7
5
|
describe 'error checking' do
|
@@ -9,8 +7,8 @@ module ICU
|
|
9
7
|
|
10
8
|
context 'upon success' do
|
11
9
|
it 'returns the block result' do
|
12
|
-
Lib.check_error { |status| return_value }.
|
13
|
-
Lib.check_error { |status| status.write_int(0); return_value }.
|
10
|
+
expect(Lib.check_error { |status| return_value }).to eq(return_value)
|
11
|
+
expect(Lib.check_error { |status| status.write_int(0); return_value }).to eq(return_value)
|
14
12
|
end
|
15
13
|
end
|
16
14
|
|
@@ -28,8 +26,9 @@ module ICU
|
|
28
26
|
before(:each) { $VERBOSE = true }
|
29
27
|
|
30
28
|
it 'prints to STDERR and returns the block result' do
|
31
|
-
$stderr.
|
32
|
-
Lib.check_error { |status| status.write_int(-127); return_value }
|
29
|
+
expect($stderr).to receive(:puts) { |message| expect(message).to match /U_.*_WARNING/ }
|
30
|
+
error_check = Lib.check_error { |status| status.write_int(-127); return_value }
|
31
|
+
expect(error_check).to eq(return_value)
|
33
32
|
end
|
34
33
|
end
|
35
34
|
|
@@ -37,8 +36,9 @@ module ICU
|
|
37
36
|
before(:each) { $VERBOSE = false }
|
38
37
|
|
39
38
|
it 'returns the block result' do
|
40
|
-
$stderr.
|
41
|
-
Lib.check_error { |status| status.write_int(-127); return_value }
|
39
|
+
expect($stderr).to_not receive(:puts)
|
40
|
+
error_check = Lib.check_error { |status| status.write_int(-127); return_value }
|
41
|
+
expect(error_check).to eq(return_value)
|
42
42
|
end
|
43
43
|
end
|
44
44
|
end
|
@@ -49,15 +49,15 @@ module ICU
|
|
49
49
|
subject { Lib.cldr_version }
|
50
50
|
|
51
51
|
it { should be_a Lib::VersionInfo }
|
52
|
-
it('is populated') { subject.to_a.
|
52
|
+
it('is populated') { expect(subject.to_a).to_not eq([0,0,0,0]) }
|
53
53
|
end
|
54
54
|
end
|
55
55
|
|
56
56
|
describe 'ICU version' do
|
57
57
|
subject { Lib.version }
|
58
58
|
|
59
|
-
it {
|
60
|
-
it('is populated') { subject.to_a.
|
59
|
+
it { is_expected.to be_a Lib::VersionInfo }
|
60
|
+
it('is populated') { expect(subject.to_a).to_not eq([0,0,0,0]) }
|
61
61
|
end
|
62
62
|
end
|
63
63
|
end
|