ffi-icu 0.1.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 61818359548264beab4f803a6d4b6873eb9796f9
4
- data.tar.gz: 5cffff3be406e40154bc45c51b2e199d3b36eb9c
2
+ SHA256:
3
+ metadata.gz: 3e1817bf83d85197ea62937c274ff98c71984ef4a432a6f76fc3cca764bf45dc
4
+ data.tar.gz: 2a21f2177dc151831fe89782be8cfe53d2f13a6d1038961eb1efe5c3970a27a0
5
5
  SHA512:
6
- metadata.gz: 4f212ef09319ea71ea83493578d659c8e44d8be59a5d9afeb72f14bfd08ca834e00026cf397eba9ebb8737011455bc4c717fd66b3684c00e6515409d80560ccb
7
- data.tar.gz: 0ab9f362d3d94351fc720f95d90ab3718a299e0061a6fec80a2ae690d2a8d44556a8ef73d22d25b50f2d684c1a9dc2bd625030b9c05f2548406c48bde810b5ae
6
+ metadata.gz: 6fb2d659678226632c485ea24de6e79f7d20ccf7b80b03833f680bb295ae07736957f3a9037ac1d3639b432f6974c279b580767d08315ad005c57cc6be79ed8b
7
+ data.tar.gz: 9c49ddef23ae27f88d4eaffbaf0cf42b670e7f07939b7c19b6255a4ac157faae25e8eb5bc973764e767b3b32d40226c4b1cc95772c629fbe4b678d4cff5ebccb
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
@@ -1,7 +1,13 @@
1
+ language: ruby
2
+ os: linux
3
+ dist: xenial
4
+
1
5
  rvm:
2
- - 1.9.3
3
- - 2.0.0
4
- - 2.1.0
6
+ - 2.5
7
+ - 2.6
8
+ - 2.7
5
9
  - ruby-head
10
+
6
11
  before_script:
7
- - sudo apt-get install -y libicu48
12
+ - sudo apt-get install -y libicu-dev
13
+
data/README.md CHANGED
@@ -1,9 +1,7 @@
1
- ffi-icu
1
+ ffi-icu [![Build Status](https://travis-ci.org/erickguan/ffi-icu.svg?branch=master)](https://travis-ci.org/erickguan/ffi-icu)
2
2
  =======
3
3
 
4
- Simple FFI wrappers for things I need from ICU. For the full thing, check out [ICU4R](http://icu4r.rubyforge.org/) instead.
5
-
6
- [![Build Status](https://secure.travis-ci.org/jarib/ffi-icu.png)](http://travis-ci.org/jarib/ffi-icu)
4
+ Simple FFI wrappers for ICU. Checkout the renovated [ICU gem](https://github.com/fantasticfears/icu4r) instead which supports various of encoding and distributed with packaged source. FFI-ICU needs some love with ICU gem's transcoding method.
7
5
 
8
6
  Gem
9
7
  ---
@@ -49,7 +47,6 @@ or
49
47
  Why not just use rchardet?
50
48
 
51
49
  * speed
52
- * 1.9 support
53
50
 
54
51
  Locale Sensitive Collation
55
52
  --------------------------
@@ -127,19 +124,20 @@ Tested on:
127
124
 
128
125
  Platforms:
129
126
 
130
- * OS X 10.6
131
- * Arch Linux
127
+ * OS X 10.6 - 10.10
128
+ * Travis' Linux
132
129
 
133
130
  Rubies:
134
131
 
135
- * MRI 1.9.1
136
- * MRI 1.8.7
132
+ - 2.5
133
+ - 2.6
134
+ - 2.7
135
+ - ruby-head
137
136
 
138
137
  TODO:
139
138
  =====
140
139
 
141
- * Useful ICU stuff:
142
- - date formatting
140
+ * Any other useful part of ICU?
143
141
  * Windows?!
144
142
 
145
143
  Note on Patches/Pull Requests
@@ -6,7 +6,8 @@ Gem::Specification.new do |s|
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Jari Bakken"]
9
- s.date = %q{2010-08-23}
9
+ s.date = %q{2019-10-15}
10
+ s.licenses = ['MIT']
10
11
  s.description = %q{Provides charset detection, locale sensitive collation and more. Depends on libicu.}
11
12
  s.email = %q{jari.bakken@gmail.com}
12
13
  s.extra_rdoc_files = ["LICENSE", "README.md"]
@@ -20,7 +21,6 @@ Gem::Specification.new do |s|
20
21
  s.summary = %q{Simple Ruby FFI wrappers for things I need from ICU.}
21
22
 
22
23
  s.add_runtime_dependency "ffi", "~> 1.0", ">= 1.0.9"
23
- s.add_development_dependency "rspec", ["~> 2.5.0"]
24
- s.add_development_dependency "rake", ["~> 0.9.2"]
24
+ s.add_development_dependency 'rspec', '~> 3.9'
25
+ s.add_development_dependency "rake", [">= 12.3.3"]
25
26
  end
26
-
@@ -18,15 +18,6 @@ module ICU
18
18
  os
19
19
  end
20
20
  end
21
-
22
- def self.ruby19?
23
- RUBY_VERSION >= '1.9'
24
- end
25
- end
26
-
27
- unless ICU.ruby19?
28
- require 'jcode'
29
- $KCODE = 'u'
30
21
  end
31
22
 
32
23
  require "ffi-icu/core_ext/string"
@@ -38,7 +29,7 @@ require "ffi-icu/collation"
38
29
  require "ffi-icu/locale"
39
30
  require "ffi-icu/transliteration"
40
31
  require "ffi-icu/normalization"
32
+ require "ffi-icu/normalizer"
41
33
  require "ffi-icu/break_iterator"
42
34
  require "ffi-icu/number_formatting"
43
35
  require "ffi-icu/time_formatting"
44
-
@@ -39,7 +39,12 @@ module ICU
39
39
  [find_lib("libicui18n.#{FFI::Platform::LIBSUFFIX}.??"),
40
40
  find_lib("libicutu.#{FFI::Platform::LIBSUFFIX}.??")]
41
41
  when :osx
42
- [find_lib("libicucore.#{FFI::Platform::LIBSUFFIX}")]
42
+ # See https://developer.apple.com/documentation/macos-release-notes/macos-big-sur-11_0_1-release-notes (62986286)
43
+ if Gem::Version.new(`sw_vers -productVersion`) >= Gem::Version.new('11')
44
+ ["libicucore.#{FFI::Platform::LIBSUFFIX}"]
45
+ else
46
+ [find_lib("libicucore.#{FFI::Platform::LIBSUFFIX}")]
47
+ end
43
48
  when :linux
44
49
  [find_lib("libicui18n.#{FFI::Platform::LIBSUFFIX}.??"),
45
50
  find_lib("libicutu.#{FFI::Platform::LIBSUFFIX}.??")]
@@ -99,7 +104,7 @@ module ICU
99
104
  # Here are the possible suffixes
100
105
  suffixes = [""]
101
106
  if version
102
- suffixes << "_#{version}" << "_#{version[0].chr}_#{version[1].chr}"
107
+ suffixes << "_#{version}" << "_#{version[0].chr}_#{version[1].chr}" << "_#{version.split('.')[0]}"
103
108
  end
104
109
 
105
110
  # Try to find the u_errorName function using the possible suffixes
@@ -339,6 +344,15 @@ module ICU
339
344
 
340
345
  attach_function :unorm_normalize, "unorm_normalize#{suffix}", [:pointer, :int32_t, :normalization_mode, :int32_t, :pointer, :int32_t, :pointer], :int32_t
341
346
 
347
+ # http://icu-project.org/apiref/icu4c/unorm2_8h.html
348
+
349
+ if Gem::Version.new('4.4') <= Gem::Version.new(self.version)
350
+ enum :normalization2_mode, [ :compose, :decompose, :fcd, :compose_contiguous ]
351
+ attach_function :unorm2_getInstance, "unorm2_getInstance#{suffix}", [:pointer, :pointer, :normalization2_mode, :pointer], :pointer
352
+ attach_function :unorm2_normalize, "unorm2_normalize#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :int32_t, :pointer], :int32_t
353
+ attach_function :unorm2_isNormalized, "unorm2_isNormalized#{suffix}", [:pointer, :pointer, :int32_t, :pointer], :bool
354
+ end
355
+
342
356
  #
343
357
  # Text Boundary Analysis
344
358
  #
@@ -392,10 +406,10 @@ module ICU
392
406
  :ignore
393
407
  ]
394
408
  enum :number_format_attribute, [
395
- :parse_int_only, :grouping_used, :decimal_always_show, :max_integer_digits,
396
- :min_integer_digits, :integer_digits, :max_fraction_digits, :min_fraction_digits,
397
- :fraction_digits, :multiplier, :grouping_size, :rounding_mode,
398
- :rounding_increment, :format_width, :padding_position, :secondary_grouping_size,
409
+ :parse_int_only, :grouping_used, :decimal_always_show, :max_integer_digits,
410
+ :min_integer_digits, :integer_digits, :max_fraction_digits, :min_fraction_digits,
411
+ :fraction_digits, :multiplier, :grouping_size, :rounding_mode,
412
+ :rounding_increment, :format_width, :padding_position, :secondary_grouping_size,
399
413
  :significant_digits_used, :min_significant_digits, :max_significant_digits, :lenient_parse
400
414
  ]
401
415
  attach_function :unum_open, "unum_open#{suffix}", [:number_format_style, :pointer, :int32_t, :string, :pointer, :pointer ], :pointer
@@ -75,7 +75,7 @@ module ICU
75
75
  def display_country(locale = nil)
76
76
  locale = locale.to_s unless locale.nil?
77
77
 
78
- Lib::Util.read_uchar_buffer(64) do |buffer, status|
78
+ Lib::Util.read_uchar_buffer(256) do |buffer, status|
79
79
  Lib.uloc_getDisplayCountry(@id, locale, buffer, buffer.size, status)
80
80
  end
81
81
  end
@@ -83,7 +83,7 @@ module ICU
83
83
  def display_language(locale = nil)
84
84
  locale = locale.to_s unless locale.nil?
85
85
 
86
- Lib::Util.read_uchar_buffer(64) do |buffer, status|
86
+ Lib::Util.read_uchar_buffer(192) do |buffer, status|
87
87
  Lib.uloc_getDisplayLanguage(@id, locale, buffer, buffer.size, status)
88
88
  end
89
89
  end
@@ -91,7 +91,7 @@ module ICU
91
91
  def display_name(locale = nil)
92
92
  locale = locale.to_s unless locale.nil?
93
93
 
94
- Lib::Util.read_uchar_buffer(64) do |buffer, status|
94
+ Lib::Util.read_uchar_buffer(256) do |buffer, status|
95
95
  Lib.uloc_getDisplayName(@id, locale, buffer, buffer.size, status)
96
96
  end
97
97
  end
@@ -99,7 +99,7 @@ module ICU
99
99
  def display_script(locale = nil)
100
100
  locale = locale.to_s unless locale.nil?
101
101
 
102
- Lib::Util.read_uchar_buffer(64) do |buffer, status|
102
+ Lib::Util.read_uchar_buffer(128) do |buffer, status|
103
103
  Lib.uloc_getDisplayScript(@id, locale, buffer, buffer.size, status)
104
104
  end
105
105
  end
@@ -0,0 +1,47 @@
1
+ module ICU
2
+ class Normalizer
3
+ # support for newer ICU normalization API
4
+
5
+ def initialize(package_name = nil, name = 'nfc', mode = :decompose)
6
+ Lib.check_error do |error|
7
+ @instance = Lib.unorm2_getInstance(package_name, name, mode, error)
8
+ end
9
+ end
10
+
11
+ def normalize(input)
12
+ input_length = input.jlength
13
+ in_ptr = UCharPointer.from_string(input)
14
+ needed_length = capacity = 0
15
+ out_ptr = UCharPointer.new(needed_length)
16
+
17
+ retried = false
18
+ begin
19
+ Lib.check_error do |error|
20
+ needed_length = Lib.unorm2_normalize(@instance, in_ptr, input_length, out_ptr, capacity, error)
21
+ end
22
+ rescue BufferOverflowError
23
+ raise BufferOverflowError, "needed: #{needed_length}" if retried
24
+
25
+ capacity = needed_length
26
+ out_ptr = out_ptr.resized_to needed_length
27
+
28
+ retried = true
29
+ retry
30
+ end
31
+
32
+ out_ptr.string
33
+ end
34
+
35
+ def is_normailzed?(input)
36
+ input_length = input.jlength
37
+ in_ptr = UCharPointer.from_string(input)
38
+
39
+ Lib.check_error do |error|
40
+ result = Lib.unorm2_isNormalized(@instance, in_ptr, input_length, error)
41
+ end
42
+
43
+ result
44
+ end
45
+
46
+ end # Normalizer
47
+ end # ICU
@@ -1,3 +1,4 @@
1
+ require 'date'
1
2
 
2
3
  module ICU
3
4
  module TimeFormatting
@@ -1,3 +1,3 @@
1
1
  module ICU
2
- VERSION = "0.1.7"
2
+ VERSION = "0.3.0"
3
3
  end
@@ -1,75 +1,76 @@
1
1
  # encoding: utf-8
2
2
 
3
- require "spec_helper"
4
-
5
3
  module ICU
6
4
  describe BreakIterator do
7
5
 
8
6
  it "should return available locales" do
9
7
  locales = ICU::BreakIterator.available_locales
10
- locales.should be_kind_of(Array)
11
- locales.should_not be_empty
12
- locales.should include("en_US")
8
+ expect(locales).to be_an(Array)
9
+ expect(locales).to_not be_empty
10
+ expect(locales).to include("en_US")
13
11
  end
14
12
 
15
13
  it "finds all word boundaries in an English string" do
16
14
  iterator = BreakIterator.new :word, "en_US"
17
15
  iterator.text = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
18
- iterator.to_a.should == [0, 5, 6, 11, 12, 17, 18, 21, 22, 26, 27, 28, 39, 40, 51, 52, 56, 57, 58, 61, 62, 64, 65, 72, 73, 79, 80, 90, 91, 93, 94, 100, 101, 103, 104, 110, 111, 116, 117, 123, 124]
16
+ expect(iterator.to_a).to eq(
17
+ [0, 5, 6, 11, 12, 17, 18, 21, 22, 26, 27, 28, 39, 40, 51, 52, 56, 57, 58, 61, 62, 64, 65, 72, 73, 79, 80, 90, 91, 93, 94, 100, 101, 103, 104, 110, 111, 116, 117, 123, 124]
18
+ )
19
19
  end
20
20
 
21
21
  it "returns each substring" do
22
22
  iterator = BreakIterator.new :word, "en_US"
23
23
  iterator.text = "Lorem ipsum dolor sit amet."
24
24
 
25
- iterator.substrings.should == ["Lorem", " ", "ipsum", " ", "dolor", " ", "sit", " ", "amet", "."]
25
+ expect(iterator.substrings).to eq(["Lorem", " ", "ipsum", " ", "dolor", " ", "sit", " ", "amet", "."])
26
26
  end
27
27
 
28
28
  it "returns the substrings of a non-ASCII string" do
29
29
  iterator = BreakIterator.new :word, "th_TH"
30
30
  iterator.text = "รู้อะไรไม่สู้รู้วิชา รู้รักษาตัวรอดเป็นยอดดี"
31
31
 
32
- iterator.substrings.should == ["รู้", "อะไร", "ไม่สู้", "รู้", "วิชา", " ", "รู้", "รักษา", "ตัว", "รอด", "เป็น", "ยอดดี"]
32
+ expect(iterator.substrings).to eq(
33
+ ["รู้", "อะไร", "ไม่สู้", "รู้", "วิชา", " ", "รู้", "รักษา", "ตัว", "รอด", "เป็น", "ยอดดี"]
34
+ )
33
35
  end
34
36
 
35
37
  it "finds all word boundaries in a non-ASCII string" do
36
38
  iterator = BreakIterator.new :word, "th_TH"
37
39
  iterator.text = "การทดลอง"
38
- iterator.to_a.should == [0, 3, 8]
40
+ expect(iterator.to_a).to eq([0, 3, 8])
39
41
  end
40
42
 
41
43
  it "finds all sentence boundaries in an English string" do
42
44
  iterator = BreakIterator.new :sentence, "en_US"
43
45
  iterator.text = "This is a sentence. This is another sentence, with a comma in it."
44
- iterator.to_a.should == [0, 20, 65]
46
+ expect(iterator.to_a).to eq([0, 20, 65])
45
47
  end
46
48
 
47
49
  it "can navigate back and forward" do
48
50
  iterator = BreakIterator.new :word, "en_US"
49
51
  iterator.text = "Lorem ipsum dolor sit amet."
50
52
 
51
- iterator.first.should == 0
53
+ expect(iterator.first).to eq(0)
52
54
  iterator.next
53
- iterator.current.should == 5
54
- iterator.last.should == 27
55
+ expect(iterator.current).to eq(5)
56
+ expect(iterator.last).to eq(27)
55
57
  end
56
58
 
57
59
  it "fetches info about given offset" do
58
60
  iterator = BreakIterator.new :word, "en_US"
59
61
  iterator.text = "Lorem ipsum dolor sit amet."
60
62
 
61
- iterator.following(3).should == 5
62
- iterator.preceding(6).should == 5
63
+ expect(iterator.following(3)).to eq(5)
64
+ expect(iterator.preceding(6)).to eq(5)
63
65
 
64
- iterator.should be_boundary(5)
65
- iterator.should_not be_boundary(10)
66
+ expect(iterator).to be_boundary(5)
67
+ expect(iterator).to_not be_boundary(10)
66
68
  end
67
69
 
68
70
  it "returns an Enumerator if no block was given" do
69
71
  iterator = BreakIterator.new :word, "nb"
70
- expected = ICU.ruby19? ? Enumerator : Enumerable::Enumerator
71
72
 
72
- iterator.each.should be_kind_of(expected)
73
+ expect(iterator.each).to be_kind_of(Enumerator)
73
74
  end
74
75
 
75
76
  end # BreakIterator
@@ -1,29 +1,27 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require 'spec_helper'
4
-
5
3
  describe ICU::CharDet::Detector do
6
4
 
7
5
  let(:detector) { ICU::CharDet::Detector.new }
8
6
 
9
7
  it "should recognize UTF-8" do
10
8
  m = detector.detect("æåø")
11
- m.name.should == "UTF-8"
12
- m.language.should be_kind_of(String)
9
+ expect(m.name).to eq("UTF-8")
10
+ expect(m.language).to be_a(String)
13
11
  end
14
12
 
15
13
  it "has a list of detectable charsets" do
16
14
  cs = detector.detectable_charsets
17
- cs.should be_kind_of(Array)
18
- cs.should_not be_empty
15
+ expect(cs).to be_an(Array)
16
+ expect(cs).to_not be_empty
19
17
 
20
- cs.first.should be_kind_of(String)
18
+ expect(cs.first).to be_a(String)
21
19
  end
22
20
 
23
21
  it "should disable / enable the input filter" do
24
- detector.input_filter_enabled?.should be_false
22
+ expect(detector.input_filter_enabled?).to be_falsey
25
23
  detector.input_filter_enabled = true
26
- detector.input_filter_enabled?.should be_true
24
+ expect(detector.input_filter_enabled?).to be_truthy
27
25
  end
28
26
 
29
27
  it "should should set declared encoding" do
@@ -31,14 +29,14 @@ describe ICU::CharDet::Detector do
31
29
  end
32
30
 
33
31
  it "should detect several matching encodings" do
34
- detector.detect_all("foo bar").should be_instance_of(Array)
32
+ expect(detector.detect_all("foo bar")).to be_an(Array)
35
33
  end
36
34
 
37
35
  it "should support null bytes" do
38
36
  # Create a utf-16 string and then force it to binary (ascii) to mimic data from net/http
39
37
  string = "foo".encode("UTF-16").force_encoding("binary")
40
38
  m = detector.detect(string)
41
- m.name.should == "UTF-16BE"
42
- m.language.should be_kind_of(String)
39
+ expect(m.name).to eq("UTF-16BE")
40
+ expect(m.language).to be_a(String)
43
41
  end
44
42
  end
@@ -1,12 +1,10 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require 'spec_helper'
4
-
5
3
  module ICU
6
4
  module Collation
7
5
  describe "Collation" do
8
6
  it "should collate an array of strings" do
9
- Collation.collate("nb", %w[æ å ø]).should == %w[æ ø å]
7
+ expect(Collation.collate("nb", %w[æ å ø])).to eq(%w[æ ø å])
10
8
  end
11
9
  end
12
10
 
@@ -14,51 +12,50 @@ module ICU
14
12
  let(:collator) { Collator.new("nb") }
15
13
 
16
14
  it "should collate an array of strings" do
17
- collator.collate(%w[å ø æ]).should == %w[æ ø å]
15
+ expect(collator.collate(%w[å ø æ])).to eq(%w[æ ø å])
18
16
  end
19
17
 
20
18
  it "raises an error if argument does not respond to :sort" do
21
- lambda { collator.collate(1) }.should raise_error(ArgumentError)
19
+ expect { collator.collate(1) }.to raise_error(ArgumentError)
22
20
  end
23
21
 
24
22
  it "should return available locales" do
25
23
  locales = ICU::Collation.available_locales
26
- locales.should be_kind_of(Array)
27
- locales.should_not be_empty
28
- locales.should include("nb")
24
+ expect(locales).to be_an(Array)
25
+ expect(locales).to_not be_empty
26
+ expect(locales).to include("nb")
29
27
  end
30
28
 
31
29
  it "should return the locale of the collator" do
32
- l = collator.locale
33
- l.should == "nb"
30
+ expect(collator.locale).to eq('nb')
34
31
  end
35
32
 
36
33
  it "should compare two strings" do
37
- collator.compare("blåbærsyltetøy", "blah").should == 1
38
- collator.compare("blah", "blah").should == 0
39
- collator.compare("ba", "bl").should == -1
34
+ expect(collator.compare("blåbærsyltetøy", "blah")).to eq(1)
35
+ expect(collator.compare("blah", "blah")).to eq(0)
36
+ expect(collator.compare("ba", "bl")).to eq(-1)
40
37
  end
41
38
 
42
39
  it "should know if a string is greater than another" do
43
- collator.should be_greater("z", "a")
44
- collator.should_not be_greater("a", "z")
40
+ expect(collator).to be_greater("z", "a")
41
+ expect(collator).to_not be_greater("a", "z")
45
42
  end
46
43
 
47
44
  it "should know if a string is greater or equal to another" do
48
- collator.should be_greater_or_equal("z", "a")
49
- collator.should be_greater_or_equal("z", "z")
50
- collator.should_not be_greater_or_equal("a", "z")
45
+ expect(collator).to be_greater_or_equal("z", "a")
46
+ expect(collator).to be_greater_or_equal("z", "z")
47
+ expect(collator).to_not be_greater_or_equal("a", "z")
51
48
  end
52
49
 
53
50
  it "should know if a string is equal to another" do
54
- collator.should be_equal("a", "a")
55
- collator.should_not be_equal("a", "b")
51
+ expect(collator).to be_equal("a", "a")
52
+ expect(collator).to_not be_equal("a", "b")
56
53
  end
57
54
 
58
55
  it "should return rules" do
59
- collator.rules.should_not be_empty
56
+ expect(collator.rules).to_not be_empty
60
57
  # ö sorts before Ö
61
- collator.rules.include?('ö<<<Ö').should be_true
58
+ expect(collator.rules).to include('ö<<<Ö')
62
59
  end
63
60
 
64
61
  end