ffi-icu 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +10 -0
  3. data/LICENSE +1 -1
  4. data/README.md +21 -51
  5. data/Rakefile +4 -5
  6. data/ffi-icu.gemspec +34 -25
  7. data/lib/ffi-icu/break_iterator.rb +19 -18
  8. data/lib/ffi-icu/chardet.rb +12 -13
  9. data/lib/ffi-icu/collation.rb +62 -59
  10. data/lib/ffi-icu/duration_formatting.rb +293 -267
  11. data/lib/ffi-icu/lib/util.rb +10 -10
  12. data/lib/ffi-icu/lib.rb +273 -202
  13. data/lib/ffi-icu/locale.rb +14 -10
  14. data/lib/ffi-icu/normalization.rb +7 -7
  15. data/lib/ffi-icu/normalizer.rb +14 -8
  16. data/lib/ffi-icu/number_formatting.rb +41 -27
  17. data/lib/ffi-icu/time_formatting.rb +116 -93
  18. data/lib/ffi-icu/transliteration.rb +19 -19
  19. data/lib/ffi-icu/uchar.rb +14 -17
  20. data/lib/ffi-icu/version.rb +3 -1
  21. data/lib/ffi-icu.rb +16 -17
  22. metadata +35 -71
  23. data/.document +0 -5
  24. data/.gitignore +0 -23
  25. data/.rspec +0 -2
  26. data/.travis.yml +0 -28
  27. data/benchmark/detect.rb +0 -14
  28. data/benchmark/shared.rb +0 -17
  29. data/build_icu.sh +0 -53
  30. data/lib/ffi-icu/core_ext/string.rb +0 -9
  31. data/spec/break_iterator_spec.rb +0 -77
  32. data/spec/chardet_spec.rb +0 -42
  33. data/spec/collation_spec.rb +0 -84
  34. data/spec/duration_formatting_spec.rb +0 -143
  35. data/spec/lib/version_info_spec.rb +0 -20
  36. data/spec/lib_spec.rb +0 -63
  37. data/spec/locale_spec.rb +0 -280
  38. data/spec/normalization_spec.rb +0 -22
  39. data/spec/normalizer_spec.rb +0 -57
  40. data/spec/number_formatting_spec.rb +0 -79
  41. data/spec/spec_helper.rb +0 -13
  42. data/spec/time_spec.rb +0 -198
  43. data/spec/transliteration_spec.rb +0 -36
  44. data/spec/uchar_spec.rb +0 -34
  45. data/test.c +0 -56
@@ -1,12 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module ICU
2
4
  module Transliteration
3
-
4
5
  class << self
5
6
  def transliterate(translit_id, str, rules = nil)
6
- t = Transliterator.new translit_id, rules
7
- t.transliterate str
7
+ t = Transliterator.new(translit_id, rules)
8
+ t.transliterate(str)
8
9
  end
9
- alias_method :translit, :transliterate
10
+ alias translit transliterate
10
11
 
11
12
  def available_ids
12
13
  enum_ptr = Lib.check_error do |error|
@@ -21,34 +22,34 @@ module ICU
21
22
  end
22
23
 
23
24
  class Transliterator
24
-
25
25
  def initialize(id, rules = nil, direction = :forward)
26
26
  rules_length = 0
27
27
 
28
28
  if rules
29
- rules_length = rules.jlength + 1
29
+ rules_length = rules.size + 1
30
30
  rules = UCharPointer.from_string(rules)
31
31
  end
32
32
 
33
33
  parse_error = Lib::UParseError.new
34
34
  begin
35
35
  Lib.check_error do |status|
36
- ptr = Lib.utrans_openU(UCharPointer.from_string(id), id.jlength, direction, rules, rules_length, @parse_error, status)
36
+ ptr = Lib.utrans_openU(UCharPointer.from_string(id), id.size, direction, rules, rules_length,
37
+ @parse_error, status)
37
38
  @tr = FFI::AutoPointer.new(ptr, Lib.method(:utrans_close))
38
39
  end
39
- rescue ICU::Error => ex
40
- raise ex, "#{ex.message} (#{parse_error})"
40
+ rescue ICU::Error => e
41
+ raise(e, "#{e.message} (#{parse_error})")
41
42
  end
42
43
  end
43
44
 
44
45
  def transliterate(from)
45
46
  # this is a bit unpleasant
46
47
 
47
- unicode_size = from.unpack("U*").size
48
+ unicode_size = from.unpack('U*').size
48
49
  capacity = unicode_size + 1
49
50
  buf = UCharPointer.from_string(from, capacity)
50
- limit = FFI::MemoryPointer.new :int32
51
- text_length = FFI::MemoryPointer.new :int32
51
+ limit = FFI::MemoryPointer.new(:int32)
52
+ text_length = FFI::MemoryPointer.new(:int32)
52
53
 
53
54
  retried = false
54
55
 
@@ -63,9 +64,9 @@ module ICU
63
64
  end
64
65
  rescue BufferOverflowError
65
66
  new_size = text_length.get_int32(0)
66
- $stderr.puts "BufferOverflowError, needs: #{new_size}" if $DEBUG
67
+ warn("BufferOverflowError, needs: #{new_size}") if $DEBUG
67
68
 
68
- raise BufferOverflowError, "needed #{new_size}" if retried
69
+ raise(BufferOverflowError, "needed #{new_size}") if retried
69
70
 
70
71
  capacity = new_size + 1
71
72
 
@@ -78,9 +79,8 @@ module ICU
78
79
  retry
79
80
  end
80
81
 
81
- buf.string text_length.get_int32(0)
82
+ buf.string(text_length.get_int32(0))
82
83
  end
83
-
84
- end # Transliterator
85
- end # Translit
86
- end # ICU
84
+ end
85
+ end
86
+ end
data/lib/ffi-icu/uchar.rb CHANGED
@@ -1,36 +1,35 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module ICU
2
4
  class UCharPointer < FFI::MemoryPointer
3
-
4
5
  UCHAR_TYPE = :uint16 # not sure how platform-dependent this is..
5
6
  TYPE_SIZE = FFI.type_size(UCHAR_TYPE)
6
7
 
7
8
  def self.from_string(str, capacity = nil)
8
- str = str.encode("UTF-8") if str.respond_to? :encode
9
- chars = str.unpack("U*")
9
+ str = str.encode('UTF-8') if str.respond_to?(:encode)
10
+ chars = str.unpack('U*')
10
11
 
11
12
  if capacity
12
- if capacity < chars.size
13
- raise ArgumentError, "capacity is too small for string of #{chars.size} UChars"
14
- end
13
+ raise(ArgumentError, "capacity is too small for string of #{chars.size} UChars") if capacity < chars.size
15
14
 
16
- ptr = new capacity
15
+ ptr = new(capacity)
17
16
  else
18
- ptr = new chars.size
17
+ ptr = new(chars.size)
19
18
  end
20
19
 
21
- ptr.write_array_of_uint16 chars
20
+ ptr.write_array_of_uint16(chars)
22
21
 
23
22
  ptr
24
23
  end
25
24
 
26
25
  def initialize(size)
27
- super UCHAR_TYPE, size
26
+ super(UCHAR_TYPE, size)
28
27
  end
29
28
 
30
29
  def resized_to(new_size)
31
- raise "new_size must be larger than current size" if new_size < size
30
+ raise('new_size must be larger than current size') if new_size < size
32
31
 
33
- resized = self.class.new new_size
32
+ resized = self.class.new(new_size)
34
33
  resized.put_bytes(0, get_bytes(0, size))
35
34
 
36
35
  resized
@@ -40,13 +39,11 @@ module ICU
40
39
  length ||= size / TYPE_SIZE
41
40
 
42
41
  wstring = read_array_of_uint16(length)
43
- wstring.pack("U*")
42
+ wstring.pack('U*')
44
43
  end
45
44
 
46
45
  def length_in_uchars
47
46
  size / type_size
48
47
  end
49
-
50
-
51
- end # UCharPointer
52
- end # ICU
48
+ end
49
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module ICU
2
- VERSION = "0.5.2"
4
+ VERSION = '0.6.0'
3
5
  end
data/lib/ffi-icu.rb CHANGED
@@ -1,9 +1,9 @@
1
- require "rbconfig"
2
- require "ffi"
1
+ require 'rbconfig'
2
+ require 'ffi'
3
3
 
4
4
  module ICU
5
5
  def self.platform
6
- os = RbConfig::CONFIG["host_os"]
6
+ os = RbConfig::CONFIG['host_os']
7
7
 
8
8
  case os
9
9
  when /darwin/
@@ -20,17 +20,16 @@ module ICU
20
20
  end
21
21
  end
22
22
 
23
- require "ffi-icu/core_ext/string"
24
- require "ffi-icu/lib"
25
- require "ffi-icu/lib/util"
26
- require "ffi-icu/uchar"
27
- require "ffi-icu/chardet"
28
- require "ffi-icu/collation"
29
- require "ffi-icu/locale"
30
- require "ffi-icu/transliteration"
31
- require "ffi-icu/normalization"
32
- require "ffi-icu/normalizer"
33
- require "ffi-icu/break_iterator"
34
- require "ffi-icu/number_formatting"
35
- require "ffi-icu/time_formatting"
36
- require "ffi-icu/duration_formatting"
23
+ require 'ffi-icu/lib'
24
+ require 'ffi-icu/lib/util'
25
+ require 'ffi-icu/uchar'
26
+ require 'ffi-icu/chardet'
27
+ require 'ffi-icu/collation'
28
+ require 'ffi-icu/locale'
29
+ require 'ffi-icu/transliteration'
30
+ require 'ffi-icu/normalization'
31
+ require 'ffi-icu/normalizer'
32
+ require 'ffi-icu/break_iterator'
33
+ require 'ffi-icu/number_formatting'
34
+ require 'ffi-icu/time_formatting'
35
+ require 'ffi-icu/duration_formatting'
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ffi-icu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
- - Jari Bakken
8
- autorequire:
7
+ - Erick Guan
8
+ - Damian Nelson
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-10-15 00:00:00.000000000 Z
11
+ date: 1980-01-02 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bigdecimal
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '3.1'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3.1'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: ffi
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -31,59 +45,37 @@ dependencies:
31
45
  - !ruby/object:Gem::Version
32
46
  version: 1.0.9
33
47
  - !ruby/object:Gem::Dependency
34
- name: rspec
48
+ name: stringio
35
49
  requirement: !ruby/object:Gem::Requirement
36
50
  requirements:
37
51
  - - "~>"
38
52
  - !ruby/object:Gem::Version
39
- version: '3.9'
40
- type: :development
53
+ version: '3.0'
54
+ type: :runtime
41
55
  prerelease: false
42
56
  version_requirements: !ruby/object:Gem::Requirement
43
57
  requirements:
44
58
  - - "~>"
45
59
  - !ruby/object:Gem::Version
46
- version: '3.9'
47
- - !ruby/object:Gem::Dependency
48
- name: rake
49
- requirement: !ruby/object:Gem::Requirement
50
- requirements:
51
- - - ">="
52
- - !ruby/object:Gem::Version
53
- version: 12.3.3
54
- type: :development
55
- prerelease: false
56
- version_requirements: !ruby/object:Gem::Requirement
57
- requirements:
58
- - - ">="
59
- - !ruby/object:Gem::Version
60
- version: 12.3.3
61
- description: Provides charset detection, locale sensitive collation and more. Depends
62
- on libicu.
63
- email: jari.bakken@gmail.com
60
+ version: '3.0'
61
+ description: Provides charset detection, transiliteration, locale sensitive collation
62
+ and more. Depends on libicu. ICU operates on CLDR data.
63
+ email: erickguanst@gmail.com
64
64
  executables: []
65
65
  extensions: []
66
66
  extra_rdoc_files:
67
67
  - LICENSE
68
68
  - README.md
69
69
  files:
70
- - ".document"
71
- - ".gitignore"
72
- - ".rspec"
73
- - ".travis.yml"
74
70
  - Gemfile
75
71
  - LICENSE
76
72
  - README.md
77
73
  - Rakefile
78
- - benchmark/detect.rb
79
- - benchmark/shared.rb
80
- - build_icu.sh
81
74
  - ffi-icu.gemspec
82
75
  - lib/ffi-icu.rb
83
76
  - lib/ffi-icu/break_iterator.rb
84
77
  - lib/ffi-icu/chardet.rb
85
78
  - lib/ffi-icu/collation.rb
86
- - lib/ffi-icu/core_ext/string.rb
87
79
  - lib/ffi-icu/duration_formatting.rb
88
80
  - lib/ffi-icu/lib.rb
89
81
  - lib/ffi-icu/lib/util.rb
@@ -95,26 +87,13 @@ files:
95
87
  - lib/ffi-icu/transliteration.rb
96
88
  - lib/ffi-icu/uchar.rb
97
89
  - lib/ffi-icu/version.rb
98
- - spec/break_iterator_spec.rb
99
- - spec/chardet_spec.rb
100
- - spec/collation_spec.rb
101
- - spec/duration_formatting_spec.rb
102
- - spec/lib/version_info_spec.rb
103
- - spec/lib_spec.rb
104
- - spec/locale_spec.rb
105
- - spec/normalization_spec.rb
106
- - spec/normalizer_spec.rb
107
- - spec/number_formatting_spec.rb
108
- - spec/spec_helper.rb
109
- - spec/time_spec.rb
110
- - spec/transliteration_spec.rb
111
- - spec/uchar_spec.rb
112
- - test.c
113
- homepage: http://github.com/jarib/ffi-icu
90
+ homepage: https://github.com/erickguan/ffi-icu
114
91
  licenses:
115
92
  - MIT
116
- metadata: {}
117
- post_install_message:
93
+ metadata:
94
+ source_code_uri: https://github.com/erickguan/ffi-icu
95
+ changelog_uri: https://github.com/erickguan/ffi-icu/blob/master/CHANGELOG.md
96
+ rubygems_mfa_required: 'true'
118
97
  rdoc_options:
119
98
  - "--charset=UTF-8"
120
99
  require_paths:
@@ -123,29 +102,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
123
102
  requirements:
124
103
  - - ">="
125
104
  - !ruby/object:Gem::Version
126
- version: '0'
105
+ version: 3.2.0
127
106
  required_rubygems_version: !ruby/object:Gem::Requirement
128
107
  requirements:
129
108
  - - ">="
130
109
  - !ruby/object:Gem::Version
131
- version: '0'
110
+ version: 2.5.0
132
111
  requirements: []
133
- rubygems_version: 3.4.10
134
- signing_key:
112
+ rubygems_version: 4.0.10
135
113
  specification_version: 4
136
- summary: Simple Ruby FFI wrappers for things I need from ICU.
137
- test_files:
138
- - spec/break_iterator_spec.rb
139
- - spec/chardet_spec.rb
140
- - spec/collation_spec.rb
141
- - spec/duration_formatting_spec.rb
142
- - spec/lib/version_info_spec.rb
143
- - spec/lib_spec.rb
144
- - spec/locale_spec.rb
145
- - spec/normalization_spec.rb
146
- - spec/normalizer_spec.rb
147
- - spec/number_formatting_spec.rb
148
- - spec/spec_helper.rb
149
- - spec/time_spec.rb
150
- - spec/transliteration_spec.rb
151
- - spec/uchar_spec.rb
114
+ summary: Ruby FFI wrappers for International Components for Unicode (ICU).
115
+ test_files: []
data/.document DELETED
@@ -1,5 +0,0 @@
1
- README.rdoc
2
- lib/**/*.rb
3
- bin/*
4
- features/**/*.feature
5
- LICENSE
data/.gitignore DELETED
@@ -1,23 +0,0 @@
1
- ## MAC OS
2
- .DS_Store
3
-
4
- ## TEXTMATE
5
- *.tmproj
6
- tmtags
7
-
8
- ## EMACS
9
- *~
10
- \#*
11
- .\#*
12
-
13
- ## VIM
14
- *.swp
15
-
16
- ## PROJECT::GENERAL
17
- coverage
18
- rdoc
19
- pkg
20
-
21
- ## PROJECT::SPECIFIC
22
- *.rbc
23
- Gemfile.lock
data/.rspec DELETED
@@ -1,2 +0,0 @@
1
- --color
2
- --require spec_helper
data/.travis.yml DELETED
@@ -1,28 +0,0 @@
1
- language: ruby
2
- os: linux
3
- dist: focal
4
-
5
- arch:
6
- - amd64
7
- - arm64
8
-
9
- rvm:
10
- - 2.7
11
- - 3.0
12
- - 3.1
13
- - ruby-head
14
- - truffleruby
15
-
16
- before_script:
17
- - sudo apt install -y icu-devtools g++
18
- - sudo chmod +x build_icu.sh
19
- - sudo $PWD/build_icu.sh versions
20
- - sudo $PWD/build_icu.sh install 71.1
21
- - export LD_LIBRARY_PATH=/usr/local/lib
22
- - icuinfo
23
- - yes | gem update --system --force
24
- - gem install bundler
25
-
26
- jobs:
27
- allow_failures:
28
- - rvm: truffleruby
data/benchmark/detect.rb DELETED
@@ -1,14 +0,0 @@
1
- # encoding: utf-8
2
-
3
- require "benchmark"
4
-
5
- $LOAD_PATH.unshift "lib"
6
- require "ffi-icu"
7
- require "rchardet"
8
-
9
- TESTS = 1000
10
-
11
- Benchmark.bmbm do |results|
12
- results.report("rchardet:") { TESTS.times { CharDet.detect("æåø") } }
13
- results.report("ffi-icu:") { TESTS.times { ICU::CharDet.detect("æåø") } }
14
- end
data/benchmark/shared.rb DELETED
@@ -1,17 +0,0 @@
1
- # encoding: utf-8
2
-
3
- require "benchmark"
4
-
5
- $LOAD_PATH.unshift "lib"
6
- require "ffi-icu"
7
- require "rchardet"
8
-
9
- TESTS = 1000
10
-
11
- $rchardet = CharDet::UniversalDetector.new
12
- $icu = ICU::CharDet::Detector.new
13
-
14
- Benchmark.bmbm do |results|
15
- results.report("rchardet instance:") { TESTS.times { $rchardet.reset; $rchardet.feed("æåø"); $rchardet.result } }
16
- results.report("ffi-icu instance:") { TESTS.times { $icu.detect("æåø") } }
17
- end
data/build_icu.sh DELETED
@@ -1,53 +0,0 @@
1
- #!/usr/bin/env bash
2
-
3
- if [[ -x $(which icuinfo) ]]; then
4
- echo System ICU version: $(icuinfo | grep -o '"version">[^<]\+' | grep -o '[^"><]\+$')
5
- else
6
- echo 'System ICU not installed'
7
- fi
8
-
9
- if [[ "$1" == '' ]]; then
10
- echo ''
11
- echo 'Usage:'
12
- echo ''
13
- echo '1) bash icu-install.sh versions'
14
- echo ''
15
- echo '2) bash icu-install.sh install <version>'
16
- fi
17
-
18
- if [[ "$1" == 'versions' ]]; then
19
- echo ''
20
- echo 'Available ICU versions'
21
- wget -O - https://icu.unicode.org/download 2>/dev/null | grep -P -o '(?<=http://site.icu-project.org/download/)\d+#TOC-ICU4C-Download.+;&gt;\K[\d.]+'
22
- fi
23
-
24
- if [[ "$2" != "" && "$1" == 'install' ]]; then
25
- which g++ || sudo apt install -y g++
26
-
27
- ICU_VERSION=$2
28
- ICU_SRC_FILE="icu4c-$(echo $ICU_VERSION | sed -e 's/\./_/')-src.tgz"
29
- echo "Trying to install ICU version: $ICU_VERSION"
30
- if [[ ! -e "$ICU_SRC_FILE" ]]; then
31
- wget "https://github.com/unicode-org/icu/releases/download/release-$(echo $ICU_VERSION | sed -e 's/\./-/')/$ICU_SRC_FILE"
32
- fi
33
- if [[ ! -e "$ICU_SRC_FILE" ]]; then
34
- exit 1;
35
- fi
36
-
37
- ICU_SRC_FOLDER="icu-release-$(echo $ICU_VERSION | sed -e 's/\./-/')"
38
- tar zxvf "$ICU_SRC_FILE"
39
- which g++ || sudo apt install -y g++
40
-
41
- if [[ ! -e "/opt/icu$ICU_VERSION" ]]; then
42
- pushd icu/source
43
- sudo mkdir "/opt/icu$ICU_VERSION"
44
- ./configure --prefix="/opt/icu$ICU_VERSION" && make -j2 && sudo make install
45
- ls -alh /opt/icu$ICU_VERSION/lib/
46
- sudo cp -r /opt/icu$ICU_VERSION/lib/* /usr/local/lib
47
- popd
48
- else
49
- echo "ICU already installed at (/opt/icu$ICU_VERSION)"
50
- fi
51
-
52
- rm -f "$ICU_SRC_FILE"
53
- fi
@@ -1,9 +0,0 @@
1
- class String
2
- unless method_defined?(:bytesize)
3
- alias_method :bytesize, :length
4
- end
5
-
6
- unless method_defined?(:jlength)
7
- alias_method :jlength, :length
8
- end
9
- end
@@ -1,77 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module ICU
4
- describe BreakIterator do
5
-
6
- it "should return available locales" do
7
- locales = ICU::BreakIterator.available_locales
8
- expect(locales).to be_an(Array)
9
- expect(locales).to_not be_empty
10
- expect(locales).to include("en_US")
11
- end
12
-
13
- it "finds all word boundaries in an English string" do
14
- iterator = BreakIterator.new :word, "en_US"
15
- iterator.text = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
16
- expect(iterator.to_a).to eq(
17
- [0, 5, 6, 11, 12, 17, 18, 21, 22, 26, 27, 28, 39, 40, 51, 52, 56, 57, 58, 61, 62, 64, 65, 72, 73, 79, 80, 90, 91, 93, 94, 100, 101, 103, 104, 110, 111, 116, 117, 123, 124]
18
- )
19
- end
20
-
21
- it "returns each substring" do
22
- iterator = BreakIterator.new :word, "en_US"
23
- iterator.text = "Lorem ipsum dolor sit amet."
24
-
25
- expect(iterator.substrings).to eq(["Lorem", " ", "ipsum", " ", "dolor", " ", "sit", " ", "amet", "."])
26
- end
27
-
28
- it "returns the substrings of a non-ASCII string" do
29
- iterator = BreakIterator.new :word, "th_TH"
30
- iterator.text = "รู้อะไรไม่สู้รู้วิชา รู้รักษาตัวรอดเป็นยอดดี"
31
-
32
- expect(iterator.substrings).to eq(
33
- ["รู้", "อะไร", "ไม่สู้", "รู้", "วิชา", " ", "รู้", "รักษา", "ตัว", "รอด", "เป็น", "ยอดดี"]
34
- )
35
- end
36
-
37
- it "finds all word boundaries in a non-ASCII string" do
38
- iterator = BreakIterator.new :word, "th_TH"
39
- iterator.text = "การทดลอง"
40
- expect(iterator.to_a).to eq([0, 3, 8])
41
- end
42
-
43
- it "finds all sentence boundaries in an English string" do
44
- iterator = BreakIterator.new :sentence, "en_US"
45
- iterator.text = "This is a sentence. This is another sentence, with a comma in it."
46
- expect(iterator.to_a).to eq([0, 20, 65])
47
- end
48
-
49
- it "can navigate back and forward" do
50
- iterator = BreakIterator.new :word, "en_US"
51
- iterator.text = "Lorem ipsum dolor sit amet."
52
-
53
- expect(iterator.first).to eq(0)
54
- iterator.next
55
- expect(iterator.current).to eq(5)
56
- expect(iterator.last).to eq(27)
57
- end
58
-
59
- it "fetches info about given offset" do
60
- iterator = BreakIterator.new :word, "en_US"
61
- iterator.text = "Lorem ipsum dolor sit amet."
62
-
63
- expect(iterator.following(3)).to eq(5)
64
- expect(iterator.preceding(6)).to eq(5)
65
-
66
- expect(iterator).to be_boundary(5)
67
- expect(iterator).to_not be_boundary(10)
68
- end
69
-
70
- it "returns an Enumerator if no block was given" do
71
- iterator = BreakIterator.new :word, "nb"
72
-
73
- expect(iterator.each).to be_kind_of(Enumerator)
74
- end
75
-
76
- end # BreakIterator
77
- end # ICU
data/spec/chardet_spec.rb DELETED
@@ -1,42 +0,0 @@
1
- # encoding: UTF-8
2
-
3
- describe ICU::CharDet::Detector do
4
-
5
- let(:detector) { ICU::CharDet::Detector.new }
6
-
7
- it "should recognize UTF-8" do
8
- m = detector.detect("æåø")
9
- expect(m.name).to eq("UTF-8")
10
- expect(m.language).to be_a(String)
11
- end
12
-
13
- it "has a list of detectable charsets" do
14
- cs = detector.detectable_charsets
15
- expect(cs).to be_an(Array)
16
- expect(cs).to_not be_empty
17
-
18
- expect(cs.first).to be_a(String)
19
- end
20
-
21
- it "should disable / enable the input filter" do
22
- expect(detector.input_filter_enabled?).to be_falsey
23
- detector.input_filter_enabled = true
24
- expect(detector.input_filter_enabled?).to be_truthy
25
- end
26
-
27
- it "should should set declared encoding" do
28
- detector.declared_encoding = "UTF-8"
29
- end
30
-
31
- it "should detect several matching encodings" do
32
- expect(detector.detect_all("foo bar")).to be_an(Array)
33
- end
34
-
35
- it "should support null bytes" do
36
- # Create a utf-16 string and then force it to binary (ascii) to mimic data from net/http
37
- string = "foo".encode("UTF-16").force_encoding("binary")
38
- m = detector.detect(string)
39
- expect(m.name).to eq("UTF-16BE")
40
- expect(m.language).to be_a(String)
41
- end
42
- end