ffi-icu 0.1.9 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: c381746aed3839388946fda0552882604cbee60c
4
- data.tar.gz: 6edcf4bb0a7c7fef09c99c3cf6bf269566682736
2
+ SHA256:
3
+ metadata.gz: 2066f9d1a113f07fb761d2c97f742e6cad8dde11b26c0e4d1da2ed8a85567297
4
+ data.tar.gz: e6df31c42fc3e518d8d19a98dec0b5aeed37ae90da3fd7570f32ef58397d531d
5
5
  SHA512:
6
- metadata.gz: 7c35fdf15d4d05069c52e26e60ac8f6818775fd7c224ee8b401de582a6cc3bf29f25da78c93c8260e3398cf38349b284f417c63a10560cff479c396e999f5cd5
7
- data.tar.gz: 1b1c25f761dec3bf855a5c70072f4ef3676319f1d86f8434914e0368df466e2caac85c94531f733ef0c49fbdfec8532f0c618e051d907ec75147d86a1c326e2e
6
+ metadata.gz: bf39178c2893939c57a8e7810d407da635ceff9b4cc5ddbe60360df53a0bf7f35b17d8f4036cf001d85edf4fa83efd8f4ed1b91d6cdb21e54d52810f536bf7ce
7
+ data.tar.gz: ee8c887487e3c3a0c0511e7017753b4801da3fc6a73e4d476211452026fd0ae3d8ae0d4c35be9eb5ed8827df8b757bc985dc5f5eafc77a12c872ca37347e9924
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/.travis.yml CHANGED
@@ -1,7 +1,20 @@
1
+ language: ruby
2
+ os: linux
3
+ dist: focal
4
+
5
+ arch:
6
+ - amd64
7
+ - arm64
8
+
1
9
  rvm:
2
- - 1.9.3
3
- - 2.0.0
4
- - 2.1.0
10
+ - 2.6
11
+ - 2.7
12
+ - 3.0
5
13
  - ruby-head
14
+
6
15
  before_script:
7
- - sudo apt-get install -y libicu48
16
+ - sudo apt-get install -y libicu-dev
17
+
18
+ jobs:
19
+ allow_failures:
20
+ - arch: arm64
data/README.md CHANGED
@@ -1,9 +1,7 @@
1
- ffi-icu
1
+ ffi-icu [![Build Status](https://travis-ci.org/erickguan/ffi-icu.svg?branch=master)](https://travis-ci.org/erickguan/ffi-icu)
2
2
  =======
3
3
 
4
- Simple FFI wrappers for things I need from ICU. For the full thing, check out [ICU4R](http://icu4r.rubyforge.org/) instead.
5
-
6
- [![Build Status](https://secure.travis-ci.org/jarib/ffi-icu.png)](http://travis-ci.org/jarib/ffi-icu)
4
+ Simple FFI wrappers for ICU. Checkout the renovated [ICU gem](https://github.com/fantasticfears/icu4r) instead which supports various of encoding and distributed with packaged source. FFI-ICU needs some love with ICU gem's transcoding method.
7
5
 
8
6
  Gem
9
7
  ---
@@ -18,7 +16,7 @@ Dependencies
18
16
  ICU.
19
17
 
20
18
  If you get messages that the library or functions are not found, you can
21
- set some environment varibles to tell ffi-icu where to find it, e.g.:
19
+ set some environment variables to tell ffi-icu where to find it, e.g.:
22
20
 
23
21
  $ export FFI_ICU_LIB="icui18n.so"
24
22
  $ export FFI_ICU_VERSION_SUFFIX="_3_8"
@@ -49,7 +47,6 @@ or
49
47
  Why not just use rchardet?
50
48
 
51
49
  * speed
52
- * 1.9 support
53
50
 
54
51
  Locale Sensitive Collation
55
52
  --------------------------
@@ -112,14 +109,24 @@ Examples:
112
109
  f #=> "12.11.15 15:21"
113
110
 
114
111
  # reusable formatting objects
115
- formater = ICU::TimeFormatting.create(:locale => 'cs_CZ', :zone => 'Europe/Prague', :date => :long , :time => :none)
116
- formater.format(Time.now) #=> "25. února 2015"
112
+ formatter = ICU::TimeFormatting.create(:locale => 'cs_CZ', :zone => 'Europe/Prague', :date => :long, :time => :none)
113
+ formatter.format(Time.now) #=> "25. února 2015"
117
114
  ```
118
115
 
119
116
  ```ruby
120
117
  # reusable formatting objects
121
- formater = ICU::TimeFormatting.create(:locale => 'cs_CZ', :zone => 'Europe/Prague', :date => :long , :time => :none)
122
- formater.parse("25. února 2015") #=> Wed Feb 25 00:00:00 +0100 2015
118
+ formatter = ICU::TimeFormatting.create(:locale => 'cs_CZ', :zone => 'Europe/Prague', :date => :long, :time => :none)
119
+ formatter.parse("25. února 2015") #=> Wed Feb 25 00:00:00 +0100 2015
120
+ ```
121
+
122
+ For skeleton formatting, visit the [Unicode date field symbol table](https://unicode-org.github.io/icu/userguide/format_parse/datetime/#date-field-symbol-table) page to help find the pattern characters to use.
123
+
124
+ ```ruby
125
+ formatter = ICU::TimeFormatting.create(:locale => 'cs_CZ', :date => :pattern, :time => :pattern, :skeleton => 'MMMMY')
126
+ formatter.format(Time.now) #=> "únor 2015"
127
+
128
+ formatter = ICU::TimeFormatting.create(:locale => 'cs_CZ', :date => :pattern, :time => :pattern, :skeleton => 'Y')
129
+ formatter.format(Time.now) #=> "2015"
123
130
  ```
124
131
 
125
132
  Tested on:
@@ -132,15 +139,15 @@ Platforms:
132
139
 
133
140
  Rubies:
134
141
 
135
- * 1.9.3
136
- * 2.0.0
137
- * 2.1.0
138
- * ruby-head
142
+ - 2.5
143
+ - 2.6
144
+ - 2.7
145
+ - ruby-head
139
146
 
140
147
  TODO:
141
148
  =====
142
149
 
143
- * Any other useful part of ICU?
150
+ * Any other useful part of ICU?
144
151
  * Windows?!
145
152
 
146
153
  Note on Patches/Pull Requests
data/ffi-icu.gemspec CHANGED
@@ -6,7 +6,8 @@ Gem::Specification.new do |s|
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Jari Bakken"]
9
- s.date = %q{2010-08-23}
9
+ s.date = %q{2019-10-15}
10
+ s.licenses = ['MIT']
10
11
  s.description = %q{Provides charset detection, locale sensitive collation and more. Depends on libicu.}
11
12
  s.email = %q{jari.bakken@gmail.com}
12
13
  s.extra_rdoc_files = ["LICENSE", "README.md"]
@@ -20,7 +21,6 @@ Gem::Specification.new do |s|
20
21
  s.summary = %q{Simple Ruby FFI wrappers for things I need from ICU.}
21
22
 
22
23
  s.add_runtime_dependency "ffi", "~> 1.0", ">= 1.0.9"
23
- s.add_development_dependency "rspec", ["~> 2.5.0"]
24
- s.add_development_dependency "rake", ["~> 0.9.2"]
24
+ s.add_development_dependency 'rspec', '~> 3.9'
25
+ s.add_development_dependency "rake", [">= 12.3.3"]
25
26
  end
26
-
data/lib/ffi-icu.rb CHANGED
@@ -18,15 +18,6 @@ module ICU
18
18
  os
19
19
  end
20
20
  end
21
-
22
- def self.ruby19?
23
- RUBY_VERSION >= '1.9'
24
- end
25
- end
26
-
27
- unless ICU.ruby19?
28
- require 'jcode'
29
- $KCODE = 'u'
30
21
  end
31
22
 
32
23
  require "ffi-icu/core_ext/string"
data/lib/ffi-icu/lib.rb CHANGED
@@ -39,7 +39,12 @@ module ICU
39
39
  [find_lib("libicui18n.#{FFI::Platform::LIBSUFFIX}.??"),
40
40
  find_lib("libicutu.#{FFI::Platform::LIBSUFFIX}.??")]
41
41
  when :osx
42
- [find_lib("libicucore.#{FFI::Platform::LIBSUFFIX}")]
42
+ # See https://developer.apple.com/documentation/macos-release-notes/macos-big-sur-11_0_1-release-notes (62986286)
43
+ if Gem::Version.new(`sw_vers -productVersion`) >= Gem::Version.new('11')
44
+ ["libicucore.#{FFI::Platform::LIBSUFFIX}"]
45
+ else
46
+ [find_lib("libicucore.#{FFI::Platform::LIBSUFFIX}")]
47
+ end
43
48
  when :linux
44
49
  [find_lib("libicui18n.#{FFI::Platform::LIBSUFFIX}.??"),
45
50
  find_lib("libicutu.#{FFI::Platform::LIBSUFFIX}.??")]
@@ -99,7 +104,7 @@ module ICU
99
104
  # Here are the possible suffixes
100
105
  suffixes = [""]
101
106
  if version
102
- suffixes << "_#{version}" << "_#{version[0].chr}_#{version[1].chr}"
107
+ suffixes << "_#{version}" << "_#{version[0].chr}_#{version[1].chr}" << "_#{version.split('.')[0]}"
103
108
  end
104
109
 
105
110
  # Try to find the u_errorName function using the possible suffixes
@@ -420,11 +425,12 @@ module ICU
420
425
  attach_function :unum_set_attribute, "unum_setAttribute#{suffix}", [:pointer, :number_format_attribute, :int32_t], :void
421
426
  # date
422
427
  enum :date_format_style, [
423
- :none, -1,
424
- :full, 0,
425
- :long, 1,
426
- :medium, 2,
427
- :short, 3,
428
+ :pattern, -2,
429
+ :none, -1,
430
+ :full, 0,
431
+ :long, 1,
432
+ :medium, 2,
433
+ :short, 3,
428
434
  ]
429
435
  attach_function :udat_open, "udat_open#{suffix}", [:date_format_style, :date_format_style, :string, :pointer, :int32_t, :pointer, :int32_t, :pointer ], :pointer
430
436
  attach_function :udat_close, "unum_close#{suffix}", [:pointer], :void
@@ -432,6 +438,9 @@ module ICU
432
438
  attach_function :udat_parse, "udat_parse#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :pointer], :double
433
439
  attach_function :udat_toPattern, "udat_toPattern#{suffix}", [:pointer, :bool , :pointer, :int32_t , :pointer], :int32_t
434
440
  attach_function :udat_applyPattern, "udat_applyPattern#{suffix}", [:pointer, :bool , :pointer, :int32_t ], :void
441
+ # skeleton pattern
442
+ attach_function :udatpg_open, "udatpg_open#{suffix}", [:string, :pointer], :pointer
443
+ attach_function :udatpg_getBestPattern, "udatpg_getBestPattern#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :int32_t, :pointer], :int32_t
435
444
  # tz
436
445
  attach_function :ucal_setDefaultTimeZone, "ucal_setDefaultTimeZone#{suffix}", [:pointer, :pointer], :int32_t
437
446
  attach_function :ucal_getDefaultTimeZone, "ucal_getDefaultTimeZone#{suffix}", [:pointer, :int32_t, :pointer], :int32_t
@@ -75,7 +75,7 @@ module ICU
75
75
  def display_country(locale = nil)
76
76
  locale = locale.to_s unless locale.nil?
77
77
 
78
- Lib::Util.read_uchar_buffer(64) do |buffer, status|
78
+ Lib::Util.read_uchar_buffer(256) do |buffer, status|
79
79
  Lib.uloc_getDisplayCountry(@id, locale, buffer, buffer.size, status)
80
80
  end
81
81
  end
@@ -83,7 +83,7 @@ module ICU
83
83
  def display_language(locale = nil)
84
84
  locale = locale.to_s unless locale.nil?
85
85
 
86
- Lib::Util.read_uchar_buffer(64) do |buffer, status|
86
+ Lib::Util.read_uchar_buffer(192) do |buffer, status|
87
87
  Lib.uloc_getDisplayLanguage(@id, locale, buffer, buffer.size, status)
88
88
  end
89
89
  end
@@ -91,7 +91,7 @@ module ICU
91
91
  def display_name(locale = nil)
92
92
  locale = locale.to_s unless locale.nil?
93
93
 
94
- Lib::Util.read_uchar_buffer(64) do |buffer, status|
94
+ Lib::Util.read_uchar_buffer(256) do |buffer, status|
95
95
  Lib.uloc_getDisplayName(@id, locale, buffer, buffer.size, status)
96
96
  end
97
97
  end
@@ -99,7 +99,7 @@ module ICU
99
99
  def display_script(locale = nil)
100
100
  locale = locale.to_s unless locale.nil?
101
101
 
102
- Lib::Util.read_uchar_buffer(64) do |buffer, status|
102
+ Lib::Util.read_uchar_buffer(128) do |buffer, status|
103
103
  Lib.uloc_getDisplayScript(@id, locale, buffer, buffer.size, status)
104
104
  end
105
105
  end
@@ -1,3 +1,4 @@
1
+ require 'date'
1
2
 
2
3
  module ICU
3
4
  module TimeFormatting
@@ -64,21 +65,31 @@ module ICU
64
65
 
65
66
  private
66
67
 
67
- def make_formatter(time_style, date_style, locale, time_zone_str)
68
- time_zone = nil
69
- d_len = 0
68
+ def make_formatter(time_style, date_style, locale, time_zone_str, skeleton)
69
+ time_zone = nil
70
+ tz_len = 0
71
+ pattern_len = -1
72
+ pattern_ptr = FFI::MemoryPointer.new(4)
73
+
70
74
  if time_zone_str
71
75
  time_zone = UCharPointer.from_string(time_zone_str)
72
- d_len = time_zone_str.size
76
+ tz_len = time_zone_str.size
73
77
  else
74
78
  Lib.check_error { | error|
75
79
  i_len = 150
76
80
  time_zone = UCharPointer.new(i_len)
77
- d_len = Lib.ucal_getDefaultTimeZone(time_zone, i_len, error)
81
+ tz_len = Lib.ucal_getDefaultTimeZone(time_zone, i_len, error)
78
82
  }
79
83
  end
80
84
 
81
- ptr = Lib.check_error { | error| Lib.udat_open(time_style, date_style, locale, time_zone, d_len, FFI::MemoryPointer.new(4), -1, error) }
85
+ if skeleton
86
+ date_style = :pattern
87
+ time_style = :pattern
88
+
89
+ pattern_len, pattern_ptr = skeleton_format(skeleton, locale)
90
+ end
91
+
92
+ ptr = Lib.check_error { | error| Lib.udat_open(time_style, date_style, locale, time_zone, tz_len, pattern_ptr, pattern_len, error) }
82
93
  FFI::AutoPointer.new(ptr, Lib.method(:udat_close))
83
94
  end
84
95
  end
@@ -90,7 +101,9 @@ module ICU
90
101
  locale = options[:locale] || 'C'
91
102
  tz_style = options[:tz_style]
92
103
  time_zone = options[:zone]
93
- @f = make_formatter(time_style, date_style, locale, time_zone)
104
+ skeleton = options[:skeleton]
105
+
106
+ @f = make_formatter(time_style, date_style, locale, time_zone, skeleton)
94
107
  if tz_style
95
108
  f0 = date_format(true)
96
109
  f1 = update_tz_format(f0, tz_style)
@@ -176,6 +189,31 @@ module ICU
176
189
  needed_length = Lib.udat_applyPattern(@f, localized, pattern, pattern_len)
177
190
  end
178
191
  end
192
+
193
+ def skeleton_format(pattern, locale)
194
+ pattern = UCharPointer.from_string(pattern)
195
+
196
+ needed_length = 0
197
+ pattern_ptr = UCharPointer.new(needed_length)
198
+
199
+ udatpg_ptr = Lib.check_error { |error| Lib.udatpg_open(locale, error) }
200
+ generator = FFI::AutoPointer.new(udatpg_ptr, Lib.method(:udat_close))
201
+
202
+ retried = false
203
+
204
+ begin
205
+ Lib.check_error do |error|
206
+ needed_length = Lib.udatpg_getBestPattern(generator, pattern, pattern.size, pattern_ptr, needed_length, error)
207
+ end
208
+
209
+ return needed_length, pattern_ptr
210
+ rescue BufferOverflowError
211
+ raise BufferOverflowError, "needed: #{needed_length}" if retried
212
+ pattern_ptr = pattern_ptr.resized_to needed_length
213
+ retried = true
214
+ retry
215
+ end
216
+ end
179
217
  end # DateTimeFormatter
180
218
  end # Formatting
181
219
  end # ICU
@@ -1,3 +1,3 @@
1
1
  module ICU
2
- VERSION = "0.1.9"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -1,75 +1,76 @@
1
1
  # encoding: utf-8
2
2
 
3
- require "spec_helper"
4
-
5
3
  module ICU
6
4
  describe BreakIterator do
7
5
 
8
6
  it "should return available locales" do
9
7
  locales = ICU::BreakIterator.available_locales
10
- locales.should be_kind_of(Array)
11
- locales.should_not be_empty
12
- locales.should include("en_US")
8
+ expect(locales).to be_an(Array)
9
+ expect(locales).to_not be_empty
10
+ expect(locales).to include("en_US")
13
11
  end
14
12
 
15
13
  it "finds all word boundaries in an English string" do
16
14
  iterator = BreakIterator.new :word, "en_US"
17
15
  iterator.text = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
18
- iterator.to_a.should == [0, 5, 6, 11, 12, 17, 18, 21, 22, 26, 27, 28, 39, 40, 51, 52, 56, 57, 58, 61, 62, 64, 65, 72, 73, 79, 80, 90, 91, 93, 94, 100, 101, 103, 104, 110, 111, 116, 117, 123, 124]
16
+ expect(iterator.to_a).to eq(
17
+ [0, 5, 6, 11, 12, 17, 18, 21, 22, 26, 27, 28, 39, 40, 51, 52, 56, 57, 58, 61, 62, 64, 65, 72, 73, 79, 80, 90, 91, 93, 94, 100, 101, 103, 104, 110, 111, 116, 117, 123, 124]
18
+ )
19
19
  end
20
20
 
21
21
  it "returns each substring" do
22
22
  iterator = BreakIterator.new :word, "en_US"
23
23
  iterator.text = "Lorem ipsum dolor sit amet."
24
24
 
25
- iterator.substrings.should == ["Lorem", " ", "ipsum", " ", "dolor", " ", "sit", " ", "amet", "."]
25
+ expect(iterator.substrings).to eq(["Lorem", " ", "ipsum", " ", "dolor", " ", "sit", " ", "amet", "."])
26
26
  end
27
27
 
28
28
  it "returns the substrings of a non-ASCII string" do
29
29
  iterator = BreakIterator.new :word, "th_TH"
30
30
  iterator.text = "รู้อะไรไม่สู้รู้วิชา รู้รักษาตัวรอดเป็นยอดดี"
31
31
 
32
- iterator.substrings.should == ["รู้", "อะไร", "ไม่สู้", "รู้", "วิชา", " ", "รู้", "รักษา", "ตัว", "รอด", "เป็น", "ยอดดี"]
32
+ expect(iterator.substrings).to eq(
33
+ ["รู้", "อะไร", "ไม่สู้", "รู้", "วิชา", " ", "รู้", "รักษา", "ตัว", "รอด", "เป็น", "ยอดดี"]
34
+ )
33
35
  end
34
36
 
35
37
  it "finds all word boundaries in a non-ASCII string" do
36
38
  iterator = BreakIterator.new :word, "th_TH"
37
39
  iterator.text = "การทดลอง"
38
- iterator.to_a.should == [0, 3, 8]
40
+ expect(iterator.to_a).to eq([0, 3, 8])
39
41
  end
40
42
 
41
43
  it "finds all sentence boundaries in an English string" do
42
44
  iterator = BreakIterator.new :sentence, "en_US"
43
45
  iterator.text = "This is a sentence. This is another sentence, with a comma in it."
44
- iterator.to_a.should == [0, 20, 65]
46
+ expect(iterator.to_a).to eq([0, 20, 65])
45
47
  end
46
48
 
47
49
  it "can navigate back and forward" do
48
50
  iterator = BreakIterator.new :word, "en_US"
49
51
  iterator.text = "Lorem ipsum dolor sit amet."
50
52
 
51
- iterator.first.should == 0
53
+ expect(iterator.first).to eq(0)
52
54
  iterator.next
53
- iterator.current.should == 5
54
- iterator.last.should == 27
55
+ expect(iterator.current).to eq(5)
56
+ expect(iterator.last).to eq(27)
55
57
  end
56
58
 
57
59
  it "fetches info about given offset" do
58
60
  iterator = BreakIterator.new :word, "en_US"
59
61
  iterator.text = "Lorem ipsum dolor sit amet."
60
62
 
61
- iterator.following(3).should == 5
62
- iterator.preceding(6).should == 5
63
+ expect(iterator.following(3)).to eq(5)
64
+ expect(iterator.preceding(6)).to eq(5)
63
65
 
64
- iterator.should be_boundary(5)
65
- iterator.should_not be_boundary(10)
66
+ expect(iterator).to be_boundary(5)
67
+ expect(iterator).to_not be_boundary(10)
66
68
  end
67
69
 
68
70
  it "returns an Enumerator if no block was given" do
69
71
  iterator = BreakIterator.new :word, "nb"
70
- expected = ICU.ruby19? ? Enumerator : Enumerable::Enumerator
71
72
 
72
- iterator.each.should be_kind_of(expected)
73
+ expect(iterator.each).to be_kind_of(Enumerator)
73
74
  end
74
75
 
75
76
  end # BreakIterator
data/spec/chardet_spec.rb CHANGED
@@ -1,29 +1,27 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require 'spec_helper'
4
-
5
3
  describe ICU::CharDet::Detector do
6
4
 
7
5
  let(:detector) { ICU::CharDet::Detector.new }
8
6
 
9
7
  it "should recognize UTF-8" do
10
8
  m = detector.detect("æåø")
11
- m.name.should == "UTF-8"
12
- m.language.should be_kind_of(String)
9
+ expect(m.name).to eq("UTF-8")
10
+ expect(m.language).to be_a(String)
13
11
  end
14
12
 
15
13
  it "has a list of detectable charsets" do
16
14
  cs = detector.detectable_charsets
17
- cs.should be_kind_of(Array)
18
- cs.should_not be_empty
15
+ expect(cs).to be_an(Array)
16
+ expect(cs).to_not be_empty
19
17
 
20
- cs.first.should be_kind_of(String)
18
+ expect(cs.first).to be_a(String)
21
19
  end
22
20
 
23
21
  it "should disable / enable the input filter" do
24
- detector.input_filter_enabled?.should be_false
22
+ expect(detector.input_filter_enabled?).to be_falsey
25
23
  detector.input_filter_enabled = true
26
- detector.input_filter_enabled?.should be_true
24
+ expect(detector.input_filter_enabled?).to be_truthy
27
25
  end
28
26
 
29
27
  it "should should set declared encoding" do
@@ -31,14 +29,14 @@ describe ICU::CharDet::Detector do
31
29
  end
32
30
 
33
31
  it "should detect several matching encodings" do
34
- detector.detect_all("foo bar").should be_instance_of(Array)
32
+ expect(detector.detect_all("foo bar")).to be_an(Array)
35
33
  end
36
34
 
37
35
  it "should support null bytes" do
38
36
  # Create a utf-16 string and then force it to binary (ascii) to mimic data from net/http
39
37
  string = "foo".encode("UTF-16").force_encoding("binary")
40
38
  m = detector.detect(string)
41
- m.name.should == "UTF-16BE"
42
- m.language.should be_kind_of(String)
39
+ expect(m.name).to eq("UTF-16BE")
40
+ expect(m.language).to be_a(String)
43
41
  end
44
42
  end