ffi-icu 0.1.9 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: c381746aed3839388946fda0552882604cbee60c
4
- data.tar.gz: 6edcf4bb0a7c7fef09c99c3cf6bf269566682736
2
+ SHA256:
3
+ metadata.gz: 2066f9d1a113f07fb761d2c97f742e6cad8dde11b26c0e4d1da2ed8a85567297
4
+ data.tar.gz: e6df31c42fc3e518d8d19a98dec0b5aeed37ae90da3fd7570f32ef58397d531d
5
5
  SHA512:
6
- metadata.gz: 7c35fdf15d4d05069c52e26e60ac8f6818775fd7c224ee8b401de582a6cc3bf29f25da78c93c8260e3398cf38349b284f417c63a10560cff479c396e999f5cd5
7
- data.tar.gz: 1b1c25f761dec3bf855a5c70072f4ef3676319f1d86f8434914e0368df466e2caac85c94531f733ef0c49fbdfec8532f0c618e051d907ec75147d86a1c326e2e
6
+ metadata.gz: bf39178c2893939c57a8e7810d407da635ceff9b4cc5ddbe60360df53a0bf7f35b17d8f4036cf001d85edf4fa83efd8f4ed1b91d6cdb21e54d52810f536bf7ce
7
+ data.tar.gz: ee8c887487e3c3a0c0511e7017753b4801da3fc6a73e4d476211452026fd0ae3d8ae0d4c35be9eb5ed8827df8b757bc985dc5f5eafc77a12c872ca37347e9924
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/.travis.yml CHANGED
@@ -1,7 +1,20 @@
1
+ language: ruby
2
+ os: linux
3
+ dist: focal
4
+
5
+ arch:
6
+ - amd64
7
+ - arm64
8
+
1
9
  rvm:
2
- - 1.9.3
3
- - 2.0.0
4
- - 2.1.0
10
+ - 2.6
11
+ - 2.7
12
+ - 3.0
5
13
  - ruby-head
14
+
6
15
  before_script:
7
- - sudo apt-get install -y libicu48
16
+ - sudo apt-get install -y libicu-dev
17
+
18
+ jobs:
19
+ allow_failures:
20
+ - arch: arm64
data/README.md CHANGED
@@ -1,9 +1,7 @@
1
- ffi-icu
1
+ ffi-icu [![Build Status](https://travis-ci.org/erickguan/ffi-icu.svg?branch=master)](https://travis-ci.org/erickguan/ffi-icu)
2
2
  =======
3
3
 
4
- Simple FFI wrappers for things I need from ICU. For the full thing, check out [ICU4R](http://icu4r.rubyforge.org/) instead.
5
-
6
- [![Build Status](https://secure.travis-ci.org/jarib/ffi-icu.png)](http://travis-ci.org/jarib/ffi-icu)
4
+ Simple FFI wrappers for ICU. Checkout the renovated [ICU gem](https://github.com/fantasticfears/icu4r) instead which supports various of encoding and distributed with packaged source. FFI-ICU needs some love with ICU gem's transcoding method.
7
5
 
8
6
  Gem
9
7
  ---
@@ -18,7 +16,7 @@ Dependencies
18
16
  ICU.
19
17
 
20
18
  If you get messages that the library or functions are not found, you can
21
- set some environment varibles to tell ffi-icu where to find it, e.g.:
19
+ set some environment variables to tell ffi-icu where to find it, e.g.:
22
20
 
23
21
  $ export FFI_ICU_LIB="icui18n.so"
24
22
  $ export FFI_ICU_VERSION_SUFFIX="_3_8"
@@ -49,7 +47,6 @@ or
49
47
  Why not just use rchardet?
50
48
 
51
49
  * speed
52
- * 1.9 support
53
50
 
54
51
  Locale Sensitive Collation
55
52
  --------------------------
@@ -112,14 +109,24 @@ Examples:
112
109
  f #=> "12.11.15 15:21"
113
110
 
114
111
  # reusable formatting objects
115
- formater = ICU::TimeFormatting.create(:locale => 'cs_CZ', :zone => 'Europe/Prague', :date => :long , :time => :none)
116
- formater.format(Time.now) #=> "25. února 2015"
112
+ formatter = ICU::TimeFormatting.create(:locale => 'cs_CZ', :zone => 'Europe/Prague', :date => :long, :time => :none)
113
+ formatter.format(Time.now) #=> "25. února 2015"
117
114
  ```
118
115
 
119
116
  ```ruby
120
117
  # reusable formatting objects
121
- formater = ICU::TimeFormatting.create(:locale => 'cs_CZ', :zone => 'Europe/Prague', :date => :long , :time => :none)
122
- formater.parse("25. února 2015") #=> Wed Feb 25 00:00:00 +0100 2015
118
+ formatter = ICU::TimeFormatting.create(:locale => 'cs_CZ', :zone => 'Europe/Prague', :date => :long, :time => :none)
119
+ formatter.parse("25. února 2015") #=> Wed Feb 25 00:00:00 +0100 2015
120
+ ```
121
+
122
+ For skeleton formatting, visit the [Unicode date field symbol table](https://unicode-org.github.io/icu/userguide/format_parse/datetime/#date-field-symbol-table) page to help find the pattern characters to use.
123
+
124
+ ```ruby
125
+ formatter = ICU::TimeFormatting.create(:locale => 'cs_CZ', :date => :pattern, :time => :pattern, :skeleton => 'MMMMY')
126
+ formatter.format(Time.now) #=> "únor 2015"
127
+
128
+ formatter = ICU::TimeFormatting.create(:locale => 'cs_CZ', :date => :pattern, :time => :pattern, :skeleton => 'Y')
129
+ formatter.format(Time.now) #=> "2015"
123
130
  ```
124
131
 
125
132
  Tested on:
@@ -132,15 +139,15 @@ Platforms:
132
139
 
133
140
  Rubies:
134
141
 
135
- * 1.9.3
136
- * 2.0.0
137
- * 2.1.0
138
- * ruby-head
142
+ - 2.5
143
+ - 2.6
144
+ - 2.7
145
+ - ruby-head
139
146
 
140
147
  TODO:
141
148
  =====
142
149
 
143
- * Any other useful part of ICU?
150
+ * Any other useful part of ICU?
144
151
  * Windows?!
145
152
 
146
153
  Note on Patches/Pull Requests
data/ffi-icu.gemspec CHANGED
@@ -6,7 +6,8 @@ Gem::Specification.new do |s|
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Jari Bakken"]
9
- s.date = %q{2010-08-23}
9
+ s.date = %q{2019-10-15}
10
+ s.licenses = ['MIT']
10
11
  s.description = %q{Provides charset detection, locale sensitive collation and more. Depends on libicu.}
11
12
  s.email = %q{jari.bakken@gmail.com}
12
13
  s.extra_rdoc_files = ["LICENSE", "README.md"]
@@ -20,7 +21,6 @@ Gem::Specification.new do |s|
20
21
  s.summary = %q{Simple Ruby FFI wrappers for things I need from ICU.}
21
22
 
22
23
  s.add_runtime_dependency "ffi", "~> 1.0", ">= 1.0.9"
23
- s.add_development_dependency "rspec", ["~> 2.5.0"]
24
- s.add_development_dependency "rake", ["~> 0.9.2"]
24
+ s.add_development_dependency 'rspec', '~> 3.9'
25
+ s.add_development_dependency "rake", [">= 12.3.3"]
25
26
  end
26
-
data/lib/ffi-icu.rb CHANGED
@@ -18,15 +18,6 @@ module ICU
18
18
  os
19
19
  end
20
20
  end
21
-
22
- def self.ruby19?
23
- RUBY_VERSION >= '1.9'
24
- end
25
- end
26
-
27
- unless ICU.ruby19?
28
- require 'jcode'
29
- $KCODE = 'u'
30
21
  end
31
22
 
32
23
  require "ffi-icu/core_ext/string"
data/lib/ffi-icu/lib.rb CHANGED
@@ -39,7 +39,12 @@ module ICU
39
39
  [find_lib("libicui18n.#{FFI::Platform::LIBSUFFIX}.??"),
40
40
  find_lib("libicutu.#{FFI::Platform::LIBSUFFIX}.??")]
41
41
  when :osx
42
- [find_lib("libicucore.#{FFI::Platform::LIBSUFFIX}")]
42
+ # See https://developer.apple.com/documentation/macos-release-notes/macos-big-sur-11_0_1-release-notes (62986286)
43
+ if Gem::Version.new(`sw_vers -productVersion`) >= Gem::Version.new('11')
44
+ ["libicucore.#{FFI::Platform::LIBSUFFIX}"]
45
+ else
46
+ [find_lib("libicucore.#{FFI::Platform::LIBSUFFIX}")]
47
+ end
43
48
  when :linux
44
49
  [find_lib("libicui18n.#{FFI::Platform::LIBSUFFIX}.??"),
45
50
  find_lib("libicutu.#{FFI::Platform::LIBSUFFIX}.??")]
@@ -99,7 +104,7 @@ module ICU
99
104
  # Here are the possible suffixes
100
105
  suffixes = [""]
101
106
  if version
102
- suffixes << "_#{version}" << "_#{version[0].chr}_#{version[1].chr}"
107
+ suffixes << "_#{version}" << "_#{version[0].chr}_#{version[1].chr}" << "_#{version.split('.')[0]}"
103
108
  end
104
109
 
105
110
  # Try to find the u_errorName function using the possible suffixes
@@ -420,11 +425,12 @@ module ICU
420
425
  attach_function :unum_set_attribute, "unum_setAttribute#{suffix}", [:pointer, :number_format_attribute, :int32_t], :void
421
426
  # date
422
427
  enum :date_format_style, [
423
- :none, -1,
424
- :full, 0,
425
- :long, 1,
426
- :medium, 2,
427
- :short, 3,
428
+ :pattern, -2,
429
+ :none, -1,
430
+ :full, 0,
431
+ :long, 1,
432
+ :medium, 2,
433
+ :short, 3,
428
434
  ]
429
435
  attach_function :udat_open, "udat_open#{suffix}", [:date_format_style, :date_format_style, :string, :pointer, :int32_t, :pointer, :int32_t, :pointer ], :pointer
430
436
  attach_function :udat_close, "unum_close#{suffix}", [:pointer], :void
@@ -432,6 +438,9 @@ module ICU
432
438
  attach_function :udat_parse, "udat_parse#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :pointer], :double
433
439
  attach_function :udat_toPattern, "udat_toPattern#{suffix}", [:pointer, :bool , :pointer, :int32_t , :pointer], :int32_t
434
440
  attach_function :udat_applyPattern, "udat_applyPattern#{suffix}", [:pointer, :bool , :pointer, :int32_t ], :void
441
+ # skeleton pattern
442
+ attach_function :udatpg_open, "udatpg_open#{suffix}", [:string, :pointer], :pointer
443
+ attach_function :udatpg_getBestPattern, "udatpg_getBestPattern#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :int32_t, :pointer], :int32_t
435
444
  # tz
436
445
  attach_function :ucal_setDefaultTimeZone, "ucal_setDefaultTimeZone#{suffix}", [:pointer, :pointer], :int32_t
437
446
  attach_function :ucal_getDefaultTimeZone, "ucal_getDefaultTimeZone#{suffix}", [:pointer, :int32_t, :pointer], :int32_t
@@ -75,7 +75,7 @@ module ICU
75
75
  def display_country(locale = nil)
76
76
  locale = locale.to_s unless locale.nil?
77
77
 
78
- Lib::Util.read_uchar_buffer(64) do |buffer, status|
78
+ Lib::Util.read_uchar_buffer(256) do |buffer, status|
79
79
  Lib.uloc_getDisplayCountry(@id, locale, buffer, buffer.size, status)
80
80
  end
81
81
  end
@@ -83,7 +83,7 @@ module ICU
83
83
  def display_language(locale = nil)
84
84
  locale = locale.to_s unless locale.nil?
85
85
 
86
- Lib::Util.read_uchar_buffer(64) do |buffer, status|
86
+ Lib::Util.read_uchar_buffer(192) do |buffer, status|
87
87
  Lib.uloc_getDisplayLanguage(@id, locale, buffer, buffer.size, status)
88
88
  end
89
89
  end
@@ -91,7 +91,7 @@ module ICU
91
91
  def display_name(locale = nil)
92
92
  locale = locale.to_s unless locale.nil?
93
93
 
94
- Lib::Util.read_uchar_buffer(64) do |buffer, status|
94
+ Lib::Util.read_uchar_buffer(256) do |buffer, status|
95
95
  Lib.uloc_getDisplayName(@id, locale, buffer, buffer.size, status)
96
96
  end
97
97
  end
@@ -99,7 +99,7 @@ module ICU
99
99
  def display_script(locale = nil)
100
100
  locale = locale.to_s unless locale.nil?
101
101
 
102
- Lib::Util.read_uchar_buffer(64) do |buffer, status|
102
+ Lib::Util.read_uchar_buffer(128) do |buffer, status|
103
103
  Lib.uloc_getDisplayScript(@id, locale, buffer, buffer.size, status)
104
104
  end
105
105
  end
@@ -1,3 +1,4 @@
1
+ require 'date'
1
2
 
2
3
  module ICU
3
4
  module TimeFormatting
@@ -64,21 +65,31 @@ module ICU
64
65
 
65
66
  private
66
67
 
67
- def make_formatter(time_style, date_style, locale, time_zone_str)
68
- time_zone = nil
69
- d_len = 0
68
+ def make_formatter(time_style, date_style, locale, time_zone_str, skeleton)
69
+ time_zone = nil
70
+ tz_len = 0
71
+ pattern_len = -1
72
+ pattern_ptr = FFI::MemoryPointer.new(4)
73
+
70
74
  if time_zone_str
71
75
  time_zone = UCharPointer.from_string(time_zone_str)
72
- d_len = time_zone_str.size
76
+ tz_len = time_zone_str.size
73
77
  else
74
78
  Lib.check_error { | error|
75
79
  i_len = 150
76
80
  time_zone = UCharPointer.new(i_len)
77
- d_len = Lib.ucal_getDefaultTimeZone(time_zone, i_len, error)
81
+ tz_len = Lib.ucal_getDefaultTimeZone(time_zone, i_len, error)
78
82
  }
79
83
  end
80
84
 
81
- ptr = Lib.check_error { | error| Lib.udat_open(time_style, date_style, locale, time_zone, d_len, FFI::MemoryPointer.new(4), -1, error) }
85
+ if skeleton
86
+ date_style = :pattern
87
+ time_style = :pattern
88
+
89
+ pattern_len, pattern_ptr = skeleton_format(skeleton, locale)
90
+ end
91
+
92
+ ptr = Lib.check_error { | error| Lib.udat_open(time_style, date_style, locale, time_zone, tz_len, pattern_ptr, pattern_len, error) }
82
93
  FFI::AutoPointer.new(ptr, Lib.method(:udat_close))
83
94
  end
84
95
  end
@@ -90,7 +101,9 @@ module ICU
90
101
  locale = options[:locale] || 'C'
91
102
  tz_style = options[:tz_style]
92
103
  time_zone = options[:zone]
93
- @f = make_formatter(time_style, date_style, locale, time_zone)
104
+ skeleton = options[:skeleton]
105
+
106
+ @f = make_formatter(time_style, date_style, locale, time_zone, skeleton)
94
107
  if tz_style
95
108
  f0 = date_format(true)
96
109
  f1 = update_tz_format(f0, tz_style)
@@ -176,6 +189,31 @@ module ICU
176
189
  needed_length = Lib.udat_applyPattern(@f, localized, pattern, pattern_len)
177
190
  end
178
191
  end
192
+
193
+ def skeleton_format(pattern, locale)
194
+ pattern = UCharPointer.from_string(pattern)
195
+
196
+ needed_length = 0
197
+ pattern_ptr = UCharPointer.new(needed_length)
198
+
199
+ udatpg_ptr = Lib.check_error { |error| Lib.udatpg_open(locale, error) }
200
+ generator = FFI::AutoPointer.new(udatpg_ptr, Lib.method(:udat_close))
201
+
202
+ retried = false
203
+
204
+ begin
205
+ Lib.check_error do |error|
206
+ needed_length = Lib.udatpg_getBestPattern(generator, pattern, pattern.size, pattern_ptr, needed_length, error)
207
+ end
208
+
209
+ return needed_length, pattern_ptr
210
+ rescue BufferOverflowError
211
+ raise BufferOverflowError, "needed: #{needed_length}" if retried
212
+ pattern_ptr = pattern_ptr.resized_to needed_length
213
+ retried = true
214
+ retry
215
+ end
216
+ end
179
217
  end # DateTimeFormatter
180
218
  end # Formatting
181
219
  end # ICU
@@ -1,3 +1,3 @@
1
1
  module ICU
2
- VERSION = "0.1.9"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -1,75 +1,76 @@
1
1
  # encoding: utf-8
2
2
 
3
- require "spec_helper"
4
-
5
3
  module ICU
6
4
  describe BreakIterator do
7
5
 
8
6
  it "should return available locales" do
9
7
  locales = ICU::BreakIterator.available_locales
10
- locales.should be_kind_of(Array)
11
- locales.should_not be_empty
12
- locales.should include("en_US")
8
+ expect(locales).to be_an(Array)
9
+ expect(locales).to_not be_empty
10
+ expect(locales).to include("en_US")
13
11
  end
14
12
 
15
13
  it "finds all word boundaries in an English string" do
16
14
  iterator = BreakIterator.new :word, "en_US"
17
15
  iterator.text = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
18
- iterator.to_a.should == [0, 5, 6, 11, 12, 17, 18, 21, 22, 26, 27, 28, 39, 40, 51, 52, 56, 57, 58, 61, 62, 64, 65, 72, 73, 79, 80, 90, 91, 93, 94, 100, 101, 103, 104, 110, 111, 116, 117, 123, 124]
16
+ expect(iterator.to_a).to eq(
17
+ [0, 5, 6, 11, 12, 17, 18, 21, 22, 26, 27, 28, 39, 40, 51, 52, 56, 57, 58, 61, 62, 64, 65, 72, 73, 79, 80, 90, 91, 93, 94, 100, 101, 103, 104, 110, 111, 116, 117, 123, 124]
18
+ )
19
19
  end
20
20
 
21
21
  it "returns each substring" do
22
22
  iterator = BreakIterator.new :word, "en_US"
23
23
  iterator.text = "Lorem ipsum dolor sit amet."
24
24
 
25
- iterator.substrings.should == ["Lorem", " ", "ipsum", " ", "dolor", " ", "sit", " ", "amet", "."]
25
+ expect(iterator.substrings).to eq(["Lorem", " ", "ipsum", " ", "dolor", " ", "sit", " ", "amet", "."])
26
26
  end
27
27
 
28
28
  it "returns the substrings of a non-ASCII string" do
29
29
  iterator = BreakIterator.new :word, "th_TH"
30
30
  iterator.text = "รู้อะไรไม่สู้รู้วิชา รู้รักษาตัวรอดเป็นยอดดี"
31
31
 
32
- iterator.substrings.should == ["รู้", "อะไร", "ไม่สู้", "รู้", "วิชา", " ", "รู้", "รักษา", "ตัว", "รอด", "เป็น", "ยอดดี"]
32
+ expect(iterator.substrings).to eq(
33
+ ["รู้", "อะไร", "ไม่สู้", "รู้", "วิชา", " ", "รู้", "รักษา", "ตัว", "รอด", "เป็น", "ยอดดี"]
34
+ )
33
35
  end
34
36
 
35
37
  it "finds all word boundaries in a non-ASCII string" do
36
38
  iterator = BreakIterator.new :word, "th_TH"
37
39
  iterator.text = "การทดลอง"
38
- iterator.to_a.should == [0, 3, 8]
40
+ expect(iterator.to_a).to eq([0, 3, 8])
39
41
  end
40
42
 
41
43
  it "finds all sentence boundaries in an English string" do
42
44
  iterator = BreakIterator.new :sentence, "en_US"
43
45
  iterator.text = "This is a sentence. This is another sentence, with a comma in it."
44
- iterator.to_a.should == [0, 20, 65]
46
+ expect(iterator.to_a).to eq([0, 20, 65])
45
47
  end
46
48
 
47
49
  it "can navigate back and forward" do
48
50
  iterator = BreakIterator.new :word, "en_US"
49
51
  iterator.text = "Lorem ipsum dolor sit amet."
50
52
 
51
- iterator.first.should == 0
53
+ expect(iterator.first).to eq(0)
52
54
  iterator.next
53
- iterator.current.should == 5
54
- iterator.last.should == 27
55
+ expect(iterator.current).to eq(5)
56
+ expect(iterator.last).to eq(27)
55
57
  end
56
58
 
57
59
  it "fetches info about given offset" do
58
60
  iterator = BreakIterator.new :word, "en_US"
59
61
  iterator.text = "Lorem ipsum dolor sit amet."
60
62
 
61
- iterator.following(3).should == 5
62
- iterator.preceding(6).should == 5
63
+ expect(iterator.following(3)).to eq(5)
64
+ expect(iterator.preceding(6)).to eq(5)
63
65
 
64
- iterator.should be_boundary(5)
65
- iterator.should_not be_boundary(10)
66
+ expect(iterator).to be_boundary(5)
67
+ expect(iterator).to_not be_boundary(10)
66
68
  end
67
69
 
68
70
  it "returns an Enumerator if no block was given" do
69
71
  iterator = BreakIterator.new :word, "nb"
70
- expected = ICU.ruby19? ? Enumerator : Enumerable::Enumerator
71
72
 
72
- iterator.each.should be_kind_of(expected)
73
+ expect(iterator.each).to be_kind_of(Enumerator)
73
74
  end
74
75
 
75
76
  end # BreakIterator
data/spec/chardet_spec.rb CHANGED
@@ -1,29 +1,27 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require 'spec_helper'
4
-
5
3
  describe ICU::CharDet::Detector do
6
4
 
7
5
  let(:detector) { ICU::CharDet::Detector.new }
8
6
 
9
7
  it "should recognize UTF-8" do
10
8
  m = detector.detect("æåø")
11
- m.name.should == "UTF-8"
12
- m.language.should be_kind_of(String)
9
+ expect(m.name).to eq("UTF-8")
10
+ expect(m.language).to be_a(String)
13
11
  end
14
12
 
15
13
  it "has a list of detectable charsets" do
16
14
  cs = detector.detectable_charsets
17
- cs.should be_kind_of(Array)
18
- cs.should_not be_empty
15
+ expect(cs).to be_an(Array)
16
+ expect(cs).to_not be_empty
19
17
 
20
- cs.first.should be_kind_of(String)
18
+ expect(cs.first).to be_a(String)
21
19
  end
22
20
 
23
21
  it "should disable / enable the input filter" do
24
- detector.input_filter_enabled?.should be_false
22
+ expect(detector.input_filter_enabled?).to be_falsey
25
23
  detector.input_filter_enabled = true
26
- detector.input_filter_enabled?.should be_true
24
+ expect(detector.input_filter_enabled?).to be_truthy
27
25
  end
28
26
 
29
27
  it "should should set declared encoding" do
@@ -31,14 +29,14 @@ describe ICU::CharDet::Detector do
31
29
  end
32
30
 
33
31
  it "should detect several matching encodings" do
34
- detector.detect_all("foo bar").should be_instance_of(Array)
32
+ expect(detector.detect_all("foo bar")).to be_an(Array)
35
33
  end
36
34
 
37
35
  it "should support null bytes" do
38
36
  # Create a utf-16 string and then force it to binary (ascii) to mimic data from net/http
39
37
  string = "foo".encode("UTF-16").force_encoding("binary")
40
38
  m = detector.detect(string)
41
- m.name.should == "UTF-16BE"
42
- m.language.should be_kind_of(String)
39
+ expect(m.name).to eq("UTF-16BE")
40
+ expect(m.language).to be_a(String)
43
41
  end
44
42
  end