ffi-icu 0.1.7 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 61818359548264beab4f803a6d4b6873eb9796f9
4
- data.tar.gz: 5cffff3be406e40154bc45c51b2e199d3b36eb9c
2
+ SHA256:
3
+ metadata.gz: 3e1817bf83d85197ea62937c274ff98c71984ef4a432a6f76fc3cca764bf45dc
4
+ data.tar.gz: 2a21f2177dc151831fe89782be8cfe53d2f13a6d1038961eb1efe5c3970a27a0
5
5
  SHA512:
6
- metadata.gz: 4f212ef09319ea71ea83493578d659c8e44d8be59a5d9afeb72f14bfd08ca834e00026cf397eba9ebb8737011455bc4c717fd66b3684c00e6515409d80560ccb
7
- data.tar.gz: 0ab9f362d3d94351fc720f95d90ab3718a299e0061a6fec80a2ae690d2a8d44556a8ef73d22d25b50f2d684c1a9dc2bd625030b9c05f2548406c48bde810b5ae
6
+ metadata.gz: 6fb2d659678226632c485ea24de6e79f7d20ccf7b80b03833f680bb295ae07736957f3a9037ac1d3639b432f6974c279b580767d08315ad005c57cc6be79ed8b
7
+ data.tar.gz: 9c49ddef23ae27f88d4eaffbaf0cf42b670e7f07939b7c19b6255a4ac157faae25e8eb5bc973764e767b3b32d40226c4b1cc95772c629fbe4b678d4cff5ebccb
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
@@ -1,7 +1,13 @@
1
+ language: ruby
2
+ os: linux
3
+ dist: xenial
4
+
1
5
  rvm:
2
- - 1.9.3
3
- - 2.0.0
4
- - 2.1.0
6
+ - 2.5
7
+ - 2.6
8
+ - 2.7
5
9
  - ruby-head
10
+
6
11
  before_script:
7
- - sudo apt-get install -y libicu48
12
+ - sudo apt-get install -y libicu-dev
13
+
data/README.md CHANGED
@@ -1,9 +1,7 @@
1
- ffi-icu
1
+ ffi-icu [![Build Status](https://travis-ci.org/erickguan/ffi-icu.svg?branch=master)](https://travis-ci.org/erickguan/ffi-icu)
2
2
  =======
3
3
 
4
- Simple FFI wrappers for things I need from ICU. For the full thing, check out [ICU4R](http://icu4r.rubyforge.org/) instead.
5
-
6
- [![Build Status](https://secure.travis-ci.org/jarib/ffi-icu.png)](http://travis-ci.org/jarib/ffi-icu)
4
+ Simple FFI wrappers for ICU. Checkout the renovated [ICU gem](https://github.com/fantasticfears/icu4r) instead which supports various of encoding and distributed with packaged source. FFI-ICU needs some love with ICU gem's transcoding method.
7
5
 
8
6
  Gem
9
7
  ---
@@ -49,7 +47,6 @@ or
49
47
  Why not just use rchardet?
50
48
 
51
49
  * speed
52
- * 1.9 support
53
50
 
54
51
  Locale Sensitive Collation
55
52
  --------------------------
@@ -127,19 +124,20 @@ Tested on:
127
124
 
128
125
  Platforms:
129
126
 
130
- * OS X 10.6
131
- * Arch Linux
127
+ * OS X 10.6 - 10.10
128
+ * Travis' Linux
132
129
 
133
130
  Rubies:
134
131
 
135
- * MRI 1.9.1
136
- * MRI 1.8.7
132
+ - 2.5
133
+ - 2.6
134
+ - 2.7
135
+ - ruby-head
137
136
 
138
137
  TODO:
139
138
  =====
140
139
 
141
- * Useful ICU stuff:
142
- - date formatting
140
+ * Any other useful part of ICU?
143
141
  * Windows?!
144
142
 
145
143
  Note on Patches/Pull Requests
@@ -6,7 +6,8 @@ Gem::Specification.new do |s|
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Jari Bakken"]
9
- s.date = %q{2010-08-23}
9
+ s.date = %q{2019-10-15}
10
+ s.licenses = ['MIT']
10
11
  s.description = %q{Provides charset detection, locale sensitive collation and more. Depends on libicu.}
11
12
  s.email = %q{jari.bakken@gmail.com}
12
13
  s.extra_rdoc_files = ["LICENSE", "README.md"]
@@ -20,7 +21,6 @@ Gem::Specification.new do |s|
20
21
  s.summary = %q{Simple Ruby FFI wrappers for things I need from ICU.}
21
22
 
22
23
  s.add_runtime_dependency "ffi", "~> 1.0", ">= 1.0.9"
23
- s.add_development_dependency "rspec", ["~> 2.5.0"]
24
- s.add_development_dependency "rake", ["~> 0.9.2"]
24
+ s.add_development_dependency 'rspec', '~> 3.9'
25
+ s.add_development_dependency "rake", [">= 12.3.3"]
25
26
  end
26
-
@@ -18,15 +18,6 @@ module ICU
18
18
  os
19
19
  end
20
20
  end
21
-
22
- def self.ruby19?
23
- RUBY_VERSION >= '1.9'
24
- end
25
- end
26
-
27
- unless ICU.ruby19?
28
- require 'jcode'
29
- $KCODE = 'u'
30
21
  end
31
22
 
32
23
  require "ffi-icu/core_ext/string"
@@ -38,7 +29,7 @@ require "ffi-icu/collation"
38
29
  require "ffi-icu/locale"
39
30
  require "ffi-icu/transliteration"
40
31
  require "ffi-icu/normalization"
32
+ require "ffi-icu/normalizer"
41
33
  require "ffi-icu/break_iterator"
42
34
  require "ffi-icu/number_formatting"
43
35
  require "ffi-icu/time_formatting"
44
-
@@ -39,7 +39,12 @@ module ICU
39
39
  [find_lib("libicui18n.#{FFI::Platform::LIBSUFFIX}.??"),
40
40
  find_lib("libicutu.#{FFI::Platform::LIBSUFFIX}.??")]
41
41
  when :osx
42
- [find_lib("libicucore.#{FFI::Platform::LIBSUFFIX}")]
42
+ # See https://developer.apple.com/documentation/macos-release-notes/macos-big-sur-11_0_1-release-notes (62986286)
43
+ if Gem::Version.new(`sw_vers -productVersion`) >= Gem::Version.new('11')
44
+ ["libicucore.#{FFI::Platform::LIBSUFFIX}"]
45
+ else
46
+ [find_lib("libicucore.#{FFI::Platform::LIBSUFFIX}")]
47
+ end
43
48
  when :linux
44
49
  [find_lib("libicui18n.#{FFI::Platform::LIBSUFFIX}.??"),
45
50
  find_lib("libicutu.#{FFI::Platform::LIBSUFFIX}.??")]
@@ -99,7 +104,7 @@ module ICU
99
104
  # Here are the possible suffixes
100
105
  suffixes = [""]
101
106
  if version
102
- suffixes << "_#{version}" << "_#{version[0].chr}_#{version[1].chr}"
107
+ suffixes << "_#{version}" << "_#{version[0].chr}_#{version[1].chr}" << "_#{version.split('.')[0]}"
103
108
  end
104
109
 
105
110
  # Try to find the u_errorName function using the possible suffixes
@@ -339,6 +344,15 @@ module ICU
339
344
 
340
345
  attach_function :unorm_normalize, "unorm_normalize#{suffix}", [:pointer, :int32_t, :normalization_mode, :int32_t, :pointer, :int32_t, :pointer], :int32_t
341
346
 
347
+ # http://icu-project.org/apiref/icu4c/unorm2_8h.html
348
+
349
+ if Gem::Version.new('4.4') <= Gem::Version.new(self.version)
350
+ enum :normalization2_mode, [ :compose, :decompose, :fcd, :compose_contiguous ]
351
+ attach_function :unorm2_getInstance, "unorm2_getInstance#{suffix}", [:pointer, :pointer, :normalization2_mode, :pointer], :pointer
352
+ attach_function :unorm2_normalize, "unorm2_normalize#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :int32_t, :pointer], :int32_t
353
+ attach_function :unorm2_isNormalized, "unorm2_isNormalized#{suffix}", [:pointer, :pointer, :int32_t, :pointer], :bool
354
+ end
355
+
342
356
  #
343
357
  # Text Boundary Analysis
344
358
  #
@@ -392,10 +406,10 @@ module ICU
392
406
  :ignore
393
407
  ]
394
408
  enum :number_format_attribute, [
395
- :parse_int_only, :grouping_used, :decimal_always_show, :max_integer_digits,
396
- :min_integer_digits, :integer_digits, :max_fraction_digits, :min_fraction_digits,
397
- :fraction_digits, :multiplier, :grouping_size, :rounding_mode,
398
- :rounding_increment, :format_width, :padding_position, :secondary_grouping_size,
409
+ :parse_int_only, :grouping_used, :decimal_always_show, :max_integer_digits,
410
+ :min_integer_digits, :integer_digits, :max_fraction_digits, :min_fraction_digits,
411
+ :fraction_digits, :multiplier, :grouping_size, :rounding_mode,
412
+ :rounding_increment, :format_width, :padding_position, :secondary_grouping_size,
399
413
  :significant_digits_used, :min_significant_digits, :max_significant_digits, :lenient_parse
400
414
  ]
401
415
  attach_function :unum_open, "unum_open#{suffix}", [:number_format_style, :pointer, :int32_t, :string, :pointer, :pointer ], :pointer
@@ -75,7 +75,7 @@ module ICU
75
75
  def display_country(locale = nil)
76
76
  locale = locale.to_s unless locale.nil?
77
77
 
78
- Lib::Util.read_uchar_buffer(64) do |buffer, status|
78
+ Lib::Util.read_uchar_buffer(256) do |buffer, status|
79
79
  Lib.uloc_getDisplayCountry(@id, locale, buffer, buffer.size, status)
80
80
  end
81
81
  end
@@ -83,7 +83,7 @@ module ICU
83
83
  def display_language(locale = nil)
84
84
  locale = locale.to_s unless locale.nil?
85
85
 
86
- Lib::Util.read_uchar_buffer(64) do |buffer, status|
86
+ Lib::Util.read_uchar_buffer(192) do |buffer, status|
87
87
  Lib.uloc_getDisplayLanguage(@id, locale, buffer, buffer.size, status)
88
88
  end
89
89
  end
@@ -91,7 +91,7 @@ module ICU
91
91
  def display_name(locale = nil)
92
92
  locale = locale.to_s unless locale.nil?
93
93
 
94
- Lib::Util.read_uchar_buffer(64) do |buffer, status|
94
+ Lib::Util.read_uchar_buffer(256) do |buffer, status|
95
95
  Lib.uloc_getDisplayName(@id, locale, buffer, buffer.size, status)
96
96
  end
97
97
  end
@@ -99,7 +99,7 @@ module ICU
99
99
  def display_script(locale = nil)
100
100
  locale = locale.to_s unless locale.nil?
101
101
 
102
- Lib::Util.read_uchar_buffer(64) do |buffer, status|
102
+ Lib::Util.read_uchar_buffer(128) do |buffer, status|
103
103
  Lib.uloc_getDisplayScript(@id, locale, buffer, buffer.size, status)
104
104
  end
105
105
  end
@@ -0,0 +1,47 @@
1
+ module ICU
2
+ class Normalizer
3
+ # support for newer ICU normalization API
4
+
5
+ def initialize(package_name = nil, name = 'nfc', mode = :decompose)
6
+ Lib.check_error do |error|
7
+ @instance = Lib.unorm2_getInstance(package_name, name, mode, error)
8
+ end
9
+ end
10
+
11
+ def normalize(input)
12
+ input_length = input.jlength
13
+ in_ptr = UCharPointer.from_string(input)
14
+ needed_length = capacity = 0
15
+ out_ptr = UCharPointer.new(needed_length)
16
+
17
+ retried = false
18
+ begin
19
+ Lib.check_error do |error|
20
+ needed_length = Lib.unorm2_normalize(@instance, in_ptr, input_length, out_ptr, capacity, error)
21
+ end
22
+ rescue BufferOverflowError
23
+ raise BufferOverflowError, "needed: #{needed_length}" if retried
24
+
25
+ capacity = needed_length
26
+ out_ptr = out_ptr.resized_to needed_length
27
+
28
+ retried = true
29
+ retry
30
+ end
31
+
32
+ out_ptr.string
33
+ end
34
+
35
+ def is_normailzed?(input)
36
+ input_length = input.jlength
37
+ in_ptr = UCharPointer.from_string(input)
38
+
39
+ Lib.check_error do |error|
40
+ result = Lib.unorm2_isNormalized(@instance, in_ptr, input_length, error)
41
+ end
42
+
43
+ result
44
+ end
45
+
46
+ end # Normalizer
47
+ end # ICU
@@ -1,3 +1,4 @@
1
+ require 'date'
1
2
 
2
3
  module ICU
3
4
  module TimeFormatting
@@ -1,3 +1,3 @@
1
1
  module ICU
2
- VERSION = "0.1.7"
2
+ VERSION = "0.3.0"
3
3
  end
@@ -1,75 +1,76 @@
1
1
  # encoding: utf-8
2
2
 
3
- require "spec_helper"
4
-
5
3
  module ICU
6
4
  describe BreakIterator do
7
5
 
8
6
  it "should return available locales" do
9
7
  locales = ICU::BreakIterator.available_locales
10
- locales.should be_kind_of(Array)
11
- locales.should_not be_empty
12
- locales.should include("en_US")
8
+ expect(locales).to be_an(Array)
9
+ expect(locales).to_not be_empty
10
+ expect(locales).to include("en_US")
13
11
  end
14
12
 
15
13
  it "finds all word boundaries in an English string" do
16
14
  iterator = BreakIterator.new :word, "en_US"
17
15
  iterator.text = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
18
- iterator.to_a.should == [0, 5, 6, 11, 12, 17, 18, 21, 22, 26, 27, 28, 39, 40, 51, 52, 56, 57, 58, 61, 62, 64, 65, 72, 73, 79, 80, 90, 91, 93, 94, 100, 101, 103, 104, 110, 111, 116, 117, 123, 124]
16
+ expect(iterator.to_a).to eq(
17
+ [0, 5, 6, 11, 12, 17, 18, 21, 22, 26, 27, 28, 39, 40, 51, 52, 56, 57, 58, 61, 62, 64, 65, 72, 73, 79, 80, 90, 91, 93, 94, 100, 101, 103, 104, 110, 111, 116, 117, 123, 124]
18
+ )
19
19
  end
20
20
 
21
21
  it "returns each substring" do
22
22
  iterator = BreakIterator.new :word, "en_US"
23
23
  iterator.text = "Lorem ipsum dolor sit amet."
24
24
 
25
- iterator.substrings.should == ["Lorem", " ", "ipsum", " ", "dolor", " ", "sit", " ", "amet", "."]
25
+ expect(iterator.substrings).to eq(["Lorem", " ", "ipsum", " ", "dolor", " ", "sit", " ", "amet", "."])
26
26
  end
27
27
 
28
28
  it "returns the substrings of a non-ASCII string" do
29
29
  iterator = BreakIterator.new :word, "th_TH"
30
30
  iterator.text = "รู้อะไรไม่สู้รู้วิชา รู้รักษาตัวรอดเป็นยอดดี"
31
31
 
32
- iterator.substrings.should == ["รู้", "อะไร", "ไม่สู้", "รู้", "วิชา", " ", "รู้", "รักษา", "ตัว", "รอด", "เป็น", "ยอดดี"]
32
+ expect(iterator.substrings).to eq(
33
+ ["รู้", "อะไร", "ไม่สู้", "รู้", "วิชา", " ", "รู้", "รักษา", "ตัว", "รอด", "เป็น", "ยอดดี"]
34
+ )
33
35
  end
34
36
 
35
37
  it "finds all word boundaries in a non-ASCII string" do
36
38
  iterator = BreakIterator.new :word, "th_TH"
37
39
  iterator.text = "การทดลอง"
38
- iterator.to_a.should == [0, 3, 8]
40
+ expect(iterator.to_a).to eq([0, 3, 8])
39
41
  end
40
42
 
41
43
  it "finds all sentence boundaries in an English string" do
42
44
  iterator = BreakIterator.new :sentence, "en_US"
43
45
  iterator.text = "This is a sentence. This is another sentence, with a comma in it."
44
- iterator.to_a.should == [0, 20, 65]
46
+ expect(iterator.to_a).to eq([0, 20, 65])
45
47
  end
46
48
 
47
49
  it "can navigate back and forward" do
48
50
  iterator = BreakIterator.new :word, "en_US"
49
51
  iterator.text = "Lorem ipsum dolor sit amet."
50
52
 
51
- iterator.first.should == 0
53
+ expect(iterator.first).to eq(0)
52
54
  iterator.next
53
- iterator.current.should == 5
54
- iterator.last.should == 27
55
+ expect(iterator.current).to eq(5)
56
+ expect(iterator.last).to eq(27)
55
57
  end
56
58
 
57
59
  it "fetches info about given offset" do
58
60
  iterator = BreakIterator.new :word, "en_US"
59
61
  iterator.text = "Lorem ipsum dolor sit amet."
60
62
 
61
- iterator.following(3).should == 5
62
- iterator.preceding(6).should == 5
63
+ expect(iterator.following(3)).to eq(5)
64
+ expect(iterator.preceding(6)).to eq(5)
63
65
 
64
- iterator.should be_boundary(5)
65
- iterator.should_not be_boundary(10)
66
+ expect(iterator).to be_boundary(5)
67
+ expect(iterator).to_not be_boundary(10)
66
68
  end
67
69
 
68
70
  it "returns an Enumerator if no block was given" do
69
71
  iterator = BreakIterator.new :word, "nb"
70
- expected = ICU.ruby19? ? Enumerator : Enumerable::Enumerator
71
72
 
72
- iterator.each.should be_kind_of(expected)
73
+ expect(iterator.each).to be_kind_of(Enumerator)
73
74
  end
74
75
 
75
76
  end # BreakIterator
@@ -1,29 +1,27 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require 'spec_helper'
4
-
5
3
  describe ICU::CharDet::Detector do
6
4
 
7
5
  let(:detector) { ICU::CharDet::Detector.new }
8
6
 
9
7
  it "should recognize UTF-8" do
10
8
  m = detector.detect("æåø")
11
- m.name.should == "UTF-8"
12
- m.language.should be_kind_of(String)
9
+ expect(m.name).to eq("UTF-8")
10
+ expect(m.language).to be_a(String)
13
11
  end
14
12
 
15
13
  it "has a list of detectable charsets" do
16
14
  cs = detector.detectable_charsets
17
- cs.should be_kind_of(Array)
18
- cs.should_not be_empty
15
+ expect(cs).to be_an(Array)
16
+ expect(cs).to_not be_empty
19
17
 
20
- cs.first.should be_kind_of(String)
18
+ expect(cs.first).to be_a(String)
21
19
  end
22
20
 
23
21
  it "should disable / enable the input filter" do
24
- detector.input_filter_enabled?.should be_false
22
+ expect(detector.input_filter_enabled?).to be_falsey
25
23
  detector.input_filter_enabled = true
26
- detector.input_filter_enabled?.should be_true
24
+ expect(detector.input_filter_enabled?).to be_truthy
27
25
  end
28
26
 
29
27
  it "should should set declared encoding" do
@@ -31,14 +29,14 @@ describe ICU::CharDet::Detector do
31
29
  end
32
30
 
33
31
  it "should detect several matching encodings" do
34
- detector.detect_all("foo bar").should be_instance_of(Array)
32
+ expect(detector.detect_all("foo bar")).to be_an(Array)
35
33
  end
36
34
 
37
35
  it "should support null bytes" do
38
36
  # Create a utf-16 string and then force it to binary (ascii) to mimic data from net/http
39
37
  string = "foo".encode("UTF-16").force_encoding("binary")
40
38
  m = detector.detect(string)
41
- m.name.should == "UTF-16BE"
42
- m.language.should be_kind_of(String)
39
+ expect(m.name).to eq("UTF-16BE")
40
+ expect(m.language).to be_a(String)
43
41
  end
44
42
  end
@@ -1,12 +1,10 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require 'spec_helper'
4
-
5
3
  module ICU
6
4
  module Collation
7
5
  describe "Collation" do
8
6
  it "should collate an array of strings" do
9
- Collation.collate("nb", %w[æ å ø]).should == %w[æ ø å]
7
+ expect(Collation.collate("nb", %w[æ å ø])).to eq(%w[æ ø å])
10
8
  end
11
9
  end
12
10
 
@@ -14,51 +12,50 @@ module ICU
14
12
  let(:collator) { Collator.new("nb") }
15
13
 
16
14
  it "should collate an array of strings" do
17
- collator.collate(%w[å ø æ]).should == %w[æ ø å]
15
+ expect(collator.collate(%w[å ø æ])).to eq(%w[æ ø å])
18
16
  end
19
17
 
20
18
  it "raises an error if argument does not respond to :sort" do
21
- lambda { collator.collate(1) }.should raise_error(ArgumentError)
19
+ expect { collator.collate(1) }.to raise_error(ArgumentError)
22
20
  end
23
21
 
24
22
  it "should return available locales" do
25
23
  locales = ICU::Collation.available_locales
26
- locales.should be_kind_of(Array)
27
- locales.should_not be_empty
28
- locales.should include("nb")
24
+ expect(locales).to be_an(Array)
25
+ expect(locales).to_not be_empty
26
+ expect(locales).to include("nb")
29
27
  end
30
28
 
31
29
  it "should return the locale of the collator" do
32
- l = collator.locale
33
- l.should == "nb"
30
+ expect(collator.locale).to eq('nb')
34
31
  end
35
32
 
36
33
  it "should compare two strings" do
37
- collator.compare("blåbærsyltetøy", "blah").should == 1
38
- collator.compare("blah", "blah").should == 0
39
- collator.compare("ba", "bl").should == -1
34
+ expect(collator.compare("blåbærsyltetøy", "blah")).to eq(1)
35
+ expect(collator.compare("blah", "blah")).to eq(0)
36
+ expect(collator.compare("ba", "bl")).to eq(-1)
40
37
  end
41
38
 
42
39
  it "should know if a string is greater than another" do
43
- collator.should be_greater("z", "a")
44
- collator.should_not be_greater("a", "z")
40
+ expect(collator).to be_greater("z", "a")
41
+ expect(collator).to_not be_greater("a", "z")
45
42
  end
46
43
 
47
44
  it "should know if a string is greater or equal to another" do
48
- collator.should be_greater_or_equal("z", "a")
49
- collator.should be_greater_or_equal("z", "z")
50
- collator.should_not be_greater_or_equal("a", "z")
45
+ expect(collator).to be_greater_or_equal("z", "a")
46
+ expect(collator).to be_greater_or_equal("z", "z")
47
+ expect(collator).to_not be_greater_or_equal("a", "z")
51
48
  end
52
49
 
53
50
  it "should know if a string is equal to another" do
54
- collator.should be_equal("a", "a")
55
- collator.should_not be_equal("a", "b")
51
+ expect(collator).to be_equal("a", "a")
52
+ expect(collator).to_not be_equal("a", "b")
56
53
  end
57
54
 
58
55
  it "should return rules" do
59
- collator.rules.should_not be_empty
56
+ expect(collator.rules).to_not be_empty
60
57
  # ö sorts before Ö
61
- collator.rules.include?('ö<<<Ö').should be_true
58
+ expect(collator.rules).to include('ö<<<Ö')
62
59
  end
63
60
 
64
61
  end