ffi-icu 0.5.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +10 -0
- data/LICENSE +1 -1
- data/README.md +21 -51
- data/Rakefile +4 -5
- data/ffi-icu.gemspec +34 -25
- data/lib/ffi-icu/break_iterator.rb +19 -18
- data/lib/ffi-icu/chardet.rb +12 -13
- data/lib/ffi-icu/collation.rb +62 -59
- data/lib/ffi-icu/duration_formatting.rb +293 -267
- data/lib/ffi-icu/lib/util.rb +10 -10
- data/lib/ffi-icu/lib.rb +273 -202
- data/lib/ffi-icu/locale.rb +14 -10
- data/lib/ffi-icu/normalization.rb +7 -7
- data/lib/ffi-icu/normalizer.rb +14 -8
- data/lib/ffi-icu/number_formatting.rb +41 -27
- data/lib/ffi-icu/time_formatting.rb +116 -93
- data/lib/ffi-icu/transliteration.rb +19 -19
- data/lib/ffi-icu/uchar.rb +14 -17
- data/lib/ffi-icu/version.rb +3 -1
- data/lib/ffi-icu.rb +16 -17
- metadata +35 -71
- data/.document +0 -5
- data/.gitignore +0 -23
- data/.rspec +0 -2
- data/.travis.yml +0 -28
- data/benchmark/detect.rb +0 -14
- data/benchmark/shared.rb +0 -17
- data/build_icu.sh +0 -53
- data/lib/ffi-icu/core_ext/string.rb +0 -9
- data/spec/break_iterator_spec.rb +0 -77
- data/spec/chardet_spec.rb +0 -42
- data/spec/collation_spec.rb +0 -84
- data/spec/duration_formatting_spec.rb +0 -143
- data/spec/lib/version_info_spec.rb +0 -20
- data/spec/lib_spec.rb +0 -63
- data/spec/locale_spec.rb +0 -280
- data/spec/normalization_spec.rb +0 -22
- data/spec/normalizer_spec.rb +0 -57
- data/spec/number_formatting_spec.rb +0 -79
- data/spec/spec_helper.rb +0 -13
- data/spec/time_spec.rb +0 -198
- data/spec/transliteration_spec.rb +0 -36
- data/spec/uchar_spec.rb +0 -34
- data/test.c +0 -56
|
@@ -1,12 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module ICU
|
|
2
4
|
module Transliteration
|
|
3
|
-
|
|
4
5
|
class << self
|
|
5
6
|
def transliterate(translit_id, str, rules = nil)
|
|
6
|
-
t = Transliterator.new
|
|
7
|
-
t.transliterate
|
|
7
|
+
t = Transliterator.new(translit_id, rules)
|
|
8
|
+
t.transliterate(str)
|
|
8
9
|
end
|
|
9
|
-
|
|
10
|
+
alias translit transliterate
|
|
10
11
|
|
|
11
12
|
def available_ids
|
|
12
13
|
enum_ptr = Lib.check_error do |error|
|
|
@@ -21,34 +22,34 @@ module ICU
|
|
|
21
22
|
end
|
|
22
23
|
|
|
23
24
|
class Transliterator
|
|
24
|
-
|
|
25
25
|
def initialize(id, rules = nil, direction = :forward)
|
|
26
26
|
rules_length = 0
|
|
27
27
|
|
|
28
28
|
if rules
|
|
29
|
-
rules_length = rules.
|
|
29
|
+
rules_length = rules.size + 1
|
|
30
30
|
rules = UCharPointer.from_string(rules)
|
|
31
31
|
end
|
|
32
32
|
|
|
33
33
|
parse_error = Lib::UParseError.new
|
|
34
34
|
begin
|
|
35
35
|
Lib.check_error do |status|
|
|
36
|
-
ptr = Lib.utrans_openU(UCharPointer.from_string(id), id.
|
|
36
|
+
ptr = Lib.utrans_openU(UCharPointer.from_string(id), id.size, direction, rules, rules_length,
|
|
37
|
+
@parse_error, status)
|
|
37
38
|
@tr = FFI::AutoPointer.new(ptr, Lib.method(:utrans_close))
|
|
38
39
|
end
|
|
39
|
-
rescue ICU::Error =>
|
|
40
|
-
raise
|
|
40
|
+
rescue ICU::Error => e
|
|
41
|
+
raise(e, "#{e.message} (#{parse_error})")
|
|
41
42
|
end
|
|
42
43
|
end
|
|
43
44
|
|
|
44
45
|
def transliterate(from)
|
|
45
46
|
# this is a bit unpleasant
|
|
46
47
|
|
|
47
|
-
unicode_size = from.unpack(
|
|
48
|
+
unicode_size = from.unpack('U*').size
|
|
48
49
|
capacity = unicode_size + 1
|
|
49
50
|
buf = UCharPointer.from_string(from, capacity)
|
|
50
|
-
limit = FFI::MemoryPointer.new
|
|
51
|
-
text_length = FFI::MemoryPointer.new
|
|
51
|
+
limit = FFI::MemoryPointer.new(:int32)
|
|
52
|
+
text_length = FFI::MemoryPointer.new(:int32)
|
|
52
53
|
|
|
53
54
|
retried = false
|
|
54
55
|
|
|
@@ -63,9 +64,9 @@ module ICU
|
|
|
63
64
|
end
|
|
64
65
|
rescue BufferOverflowError
|
|
65
66
|
new_size = text_length.get_int32(0)
|
|
66
|
-
|
|
67
|
+
warn("BufferOverflowError, needs: #{new_size}") if $DEBUG
|
|
67
68
|
|
|
68
|
-
raise
|
|
69
|
+
raise(BufferOverflowError, "needed #{new_size}") if retried
|
|
69
70
|
|
|
70
71
|
capacity = new_size + 1
|
|
71
72
|
|
|
@@ -78,9 +79,8 @@ module ICU
|
|
|
78
79
|
retry
|
|
79
80
|
end
|
|
80
81
|
|
|
81
|
-
buf.string
|
|
82
|
+
buf.string(text_length.get_int32(0))
|
|
82
83
|
end
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
end # ICU
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
data/lib/ffi-icu/uchar.rb
CHANGED
|
@@ -1,36 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module ICU
|
|
2
4
|
class UCharPointer < FFI::MemoryPointer
|
|
3
|
-
|
|
4
5
|
UCHAR_TYPE = :uint16 # not sure how platform-dependent this is..
|
|
5
6
|
TYPE_SIZE = FFI.type_size(UCHAR_TYPE)
|
|
6
7
|
|
|
7
8
|
def self.from_string(str, capacity = nil)
|
|
8
|
-
str = str.encode(
|
|
9
|
-
chars = str.unpack(
|
|
9
|
+
str = str.encode('UTF-8') if str.respond_to?(:encode)
|
|
10
|
+
chars = str.unpack('U*')
|
|
10
11
|
|
|
11
12
|
if capacity
|
|
12
|
-
if capacity < chars.size
|
|
13
|
-
raise ArgumentError, "capacity is too small for string of #{chars.size} UChars"
|
|
14
|
-
end
|
|
13
|
+
raise(ArgumentError, "capacity is too small for string of #{chars.size} UChars") if capacity < chars.size
|
|
15
14
|
|
|
16
|
-
ptr = new
|
|
15
|
+
ptr = new(capacity)
|
|
17
16
|
else
|
|
18
|
-
ptr = new
|
|
17
|
+
ptr = new(chars.size)
|
|
19
18
|
end
|
|
20
19
|
|
|
21
|
-
ptr.write_array_of_uint16
|
|
20
|
+
ptr.write_array_of_uint16(chars)
|
|
22
21
|
|
|
23
22
|
ptr
|
|
24
23
|
end
|
|
25
24
|
|
|
26
25
|
def initialize(size)
|
|
27
|
-
super
|
|
26
|
+
super(UCHAR_TYPE, size)
|
|
28
27
|
end
|
|
29
28
|
|
|
30
29
|
def resized_to(new_size)
|
|
31
|
-
raise
|
|
30
|
+
raise('new_size must be larger than current size') if new_size < size
|
|
32
31
|
|
|
33
|
-
resized = self.class.new
|
|
32
|
+
resized = self.class.new(new_size)
|
|
34
33
|
resized.put_bytes(0, get_bytes(0, size))
|
|
35
34
|
|
|
36
35
|
resized
|
|
@@ -40,13 +39,11 @@ module ICU
|
|
|
40
39
|
length ||= size / TYPE_SIZE
|
|
41
40
|
|
|
42
41
|
wstring = read_array_of_uint16(length)
|
|
43
|
-
wstring.pack(
|
|
42
|
+
wstring.pack('U*')
|
|
44
43
|
end
|
|
45
44
|
|
|
46
45
|
def length_in_uchars
|
|
47
46
|
size / type_size
|
|
48
47
|
end
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
end # UCharPointer
|
|
52
|
-
end # ICU
|
|
48
|
+
end
|
|
49
|
+
end
|
data/lib/ffi-icu/version.rb
CHANGED
data/lib/ffi-icu.rb
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
require
|
|
2
|
-
require
|
|
1
|
+
require 'rbconfig'
|
|
2
|
+
require 'ffi'
|
|
3
3
|
|
|
4
4
|
module ICU
|
|
5
5
|
def self.platform
|
|
6
|
-
os = RbConfig::CONFIG[
|
|
6
|
+
os = RbConfig::CONFIG['host_os']
|
|
7
7
|
|
|
8
8
|
case os
|
|
9
9
|
when /darwin/
|
|
@@ -20,17 +20,16 @@ module ICU
|
|
|
20
20
|
end
|
|
21
21
|
end
|
|
22
22
|
|
|
23
|
-
require
|
|
24
|
-
require
|
|
25
|
-
require
|
|
26
|
-
require
|
|
27
|
-
require
|
|
28
|
-
require
|
|
29
|
-
require
|
|
30
|
-
require
|
|
31
|
-
require
|
|
32
|
-
require
|
|
33
|
-
require
|
|
34
|
-
require
|
|
35
|
-
require
|
|
36
|
-
require "ffi-icu/duration_formatting"
|
|
23
|
+
require 'ffi-icu/lib'
|
|
24
|
+
require 'ffi-icu/lib/util'
|
|
25
|
+
require 'ffi-icu/uchar'
|
|
26
|
+
require 'ffi-icu/chardet'
|
|
27
|
+
require 'ffi-icu/collation'
|
|
28
|
+
require 'ffi-icu/locale'
|
|
29
|
+
require 'ffi-icu/transliteration'
|
|
30
|
+
require 'ffi-icu/normalization'
|
|
31
|
+
require 'ffi-icu/normalizer'
|
|
32
|
+
require 'ffi-icu/break_iterator'
|
|
33
|
+
require 'ffi-icu/number_formatting'
|
|
34
|
+
require 'ffi-icu/time_formatting'
|
|
35
|
+
require 'ffi-icu/duration_formatting'
|
metadata
CHANGED
|
@@ -1,15 +1,29 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ffi-icu
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.6.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
|
-
-
|
|
8
|
-
|
|
7
|
+
- Erick Guan
|
|
8
|
+
- Damian Nelson
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: bigdecimal
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '3.1'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '3.1'
|
|
13
27
|
- !ruby/object:Gem::Dependency
|
|
14
28
|
name: ffi
|
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -31,59 +45,37 @@ dependencies:
|
|
|
31
45
|
- !ruby/object:Gem::Version
|
|
32
46
|
version: 1.0.9
|
|
33
47
|
- !ruby/object:Gem::Dependency
|
|
34
|
-
name:
|
|
48
|
+
name: stringio
|
|
35
49
|
requirement: !ruby/object:Gem::Requirement
|
|
36
50
|
requirements:
|
|
37
51
|
- - "~>"
|
|
38
52
|
- !ruby/object:Gem::Version
|
|
39
|
-
version: '3.
|
|
40
|
-
type: :
|
|
53
|
+
version: '3.0'
|
|
54
|
+
type: :runtime
|
|
41
55
|
prerelease: false
|
|
42
56
|
version_requirements: !ruby/object:Gem::Requirement
|
|
43
57
|
requirements:
|
|
44
58
|
- - "~>"
|
|
45
59
|
- !ruby/object:Gem::Version
|
|
46
|
-
version: '3.
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
requirements:
|
|
51
|
-
- - ">="
|
|
52
|
-
- !ruby/object:Gem::Version
|
|
53
|
-
version: 12.3.3
|
|
54
|
-
type: :development
|
|
55
|
-
prerelease: false
|
|
56
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
57
|
-
requirements:
|
|
58
|
-
- - ">="
|
|
59
|
-
- !ruby/object:Gem::Version
|
|
60
|
-
version: 12.3.3
|
|
61
|
-
description: Provides charset detection, locale sensitive collation and more. Depends
|
|
62
|
-
on libicu.
|
|
63
|
-
email: jari.bakken@gmail.com
|
|
60
|
+
version: '3.0'
|
|
61
|
+
description: Provides charset detection, transiliteration, locale sensitive collation
|
|
62
|
+
and more. Depends on libicu. ICU operates on CLDR data.
|
|
63
|
+
email: erickguanst@gmail.com
|
|
64
64
|
executables: []
|
|
65
65
|
extensions: []
|
|
66
66
|
extra_rdoc_files:
|
|
67
67
|
- LICENSE
|
|
68
68
|
- README.md
|
|
69
69
|
files:
|
|
70
|
-
- ".document"
|
|
71
|
-
- ".gitignore"
|
|
72
|
-
- ".rspec"
|
|
73
|
-
- ".travis.yml"
|
|
74
70
|
- Gemfile
|
|
75
71
|
- LICENSE
|
|
76
72
|
- README.md
|
|
77
73
|
- Rakefile
|
|
78
|
-
- benchmark/detect.rb
|
|
79
|
-
- benchmark/shared.rb
|
|
80
|
-
- build_icu.sh
|
|
81
74
|
- ffi-icu.gemspec
|
|
82
75
|
- lib/ffi-icu.rb
|
|
83
76
|
- lib/ffi-icu/break_iterator.rb
|
|
84
77
|
- lib/ffi-icu/chardet.rb
|
|
85
78
|
- lib/ffi-icu/collation.rb
|
|
86
|
-
- lib/ffi-icu/core_ext/string.rb
|
|
87
79
|
- lib/ffi-icu/duration_formatting.rb
|
|
88
80
|
- lib/ffi-icu/lib.rb
|
|
89
81
|
- lib/ffi-icu/lib/util.rb
|
|
@@ -95,26 +87,13 @@ files:
|
|
|
95
87
|
- lib/ffi-icu/transliteration.rb
|
|
96
88
|
- lib/ffi-icu/uchar.rb
|
|
97
89
|
- lib/ffi-icu/version.rb
|
|
98
|
-
|
|
99
|
-
- spec/chardet_spec.rb
|
|
100
|
-
- spec/collation_spec.rb
|
|
101
|
-
- spec/duration_formatting_spec.rb
|
|
102
|
-
- spec/lib/version_info_spec.rb
|
|
103
|
-
- spec/lib_spec.rb
|
|
104
|
-
- spec/locale_spec.rb
|
|
105
|
-
- spec/normalization_spec.rb
|
|
106
|
-
- spec/normalizer_spec.rb
|
|
107
|
-
- spec/number_formatting_spec.rb
|
|
108
|
-
- spec/spec_helper.rb
|
|
109
|
-
- spec/time_spec.rb
|
|
110
|
-
- spec/transliteration_spec.rb
|
|
111
|
-
- spec/uchar_spec.rb
|
|
112
|
-
- test.c
|
|
113
|
-
homepage: http://github.com/jarib/ffi-icu
|
|
90
|
+
homepage: https://github.com/erickguan/ffi-icu
|
|
114
91
|
licenses:
|
|
115
92
|
- MIT
|
|
116
|
-
metadata:
|
|
117
|
-
|
|
93
|
+
metadata:
|
|
94
|
+
source_code_uri: https://github.com/erickguan/ffi-icu
|
|
95
|
+
changelog_uri: https://github.com/erickguan/ffi-icu/blob/master/CHANGELOG.md
|
|
96
|
+
rubygems_mfa_required: 'true'
|
|
118
97
|
rdoc_options:
|
|
119
98
|
- "--charset=UTF-8"
|
|
120
99
|
require_paths:
|
|
@@ -123,29 +102,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
123
102
|
requirements:
|
|
124
103
|
- - ">="
|
|
125
104
|
- !ruby/object:Gem::Version
|
|
126
|
-
version:
|
|
105
|
+
version: 3.2.0
|
|
127
106
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
128
107
|
requirements:
|
|
129
108
|
- - ">="
|
|
130
109
|
- !ruby/object:Gem::Version
|
|
131
|
-
version:
|
|
110
|
+
version: 2.5.0
|
|
132
111
|
requirements: []
|
|
133
|
-
rubygems_version:
|
|
134
|
-
signing_key:
|
|
112
|
+
rubygems_version: 4.0.10
|
|
135
113
|
specification_version: 4
|
|
136
|
-
summary:
|
|
137
|
-
test_files:
|
|
138
|
-
- spec/break_iterator_spec.rb
|
|
139
|
-
- spec/chardet_spec.rb
|
|
140
|
-
- spec/collation_spec.rb
|
|
141
|
-
- spec/duration_formatting_spec.rb
|
|
142
|
-
- spec/lib/version_info_spec.rb
|
|
143
|
-
- spec/lib_spec.rb
|
|
144
|
-
- spec/locale_spec.rb
|
|
145
|
-
- spec/normalization_spec.rb
|
|
146
|
-
- spec/normalizer_spec.rb
|
|
147
|
-
- spec/number_formatting_spec.rb
|
|
148
|
-
- spec/spec_helper.rb
|
|
149
|
-
- spec/time_spec.rb
|
|
150
|
-
- spec/transliteration_spec.rb
|
|
151
|
-
- spec/uchar_spec.rb
|
|
114
|
+
summary: Ruby FFI wrappers for International Components for Unicode (ICU).
|
|
115
|
+
test_files: []
|
data/.document
DELETED
data/.gitignore
DELETED
data/.rspec
DELETED
data/.travis.yml
DELETED
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
language: ruby
|
|
2
|
-
os: linux
|
|
3
|
-
dist: focal
|
|
4
|
-
|
|
5
|
-
arch:
|
|
6
|
-
- amd64
|
|
7
|
-
- arm64
|
|
8
|
-
|
|
9
|
-
rvm:
|
|
10
|
-
- 2.7
|
|
11
|
-
- 3.0
|
|
12
|
-
- 3.1
|
|
13
|
-
- ruby-head
|
|
14
|
-
- truffleruby
|
|
15
|
-
|
|
16
|
-
before_script:
|
|
17
|
-
- sudo apt install -y icu-devtools g++
|
|
18
|
-
- sudo chmod +x build_icu.sh
|
|
19
|
-
- sudo $PWD/build_icu.sh versions
|
|
20
|
-
- sudo $PWD/build_icu.sh install 71.1
|
|
21
|
-
- export LD_LIBRARY_PATH=/usr/local/lib
|
|
22
|
-
- icuinfo
|
|
23
|
-
- yes | gem update --system --force
|
|
24
|
-
- gem install bundler
|
|
25
|
-
|
|
26
|
-
jobs:
|
|
27
|
-
allow_failures:
|
|
28
|
-
- rvm: truffleruby
|
data/benchmark/detect.rb
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
# encoding: utf-8
|
|
2
|
-
|
|
3
|
-
require "benchmark"
|
|
4
|
-
|
|
5
|
-
$LOAD_PATH.unshift "lib"
|
|
6
|
-
require "ffi-icu"
|
|
7
|
-
require "rchardet"
|
|
8
|
-
|
|
9
|
-
TESTS = 1000
|
|
10
|
-
|
|
11
|
-
Benchmark.bmbm do |results|
|
|
12
|
-
results.report("rchardet:") { TESTS.times { CharDet.detect("æåø") } }
|
|
13
|
-
results.report("ffi-icu:") { TESTS.times { ICU::CharDet.detect("æåø") } }
|
|
14
|
-
end
|
data/benchmark/shared.rb
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
# encoding: utf-8
|
|
2
|
-
|
|
3
|
-
require "benchmark"
|
|
4
|
-
|
|
5
|
-
$LOAD_PATH.unshift "lib"
|
|
6
|
-
require "ffi-icu"
|
|
7
|
-
require "rchardet"
|
|
8
|
-
|
|
9
|
-
TESTS = 1000
|
|
10
|
-
|
|
11
|
-
$rchardet = CharDet::UniversalDetector.new
|
|
12
|
-
$icu = ICU::CharDet::Detector.new
|
|
13
|
-
|
|
14
|
-
Benchmark.bmbm do |results|
|
|
15
|
-
results.report("rchardet instance:") { TESTS.times { $rchardet.reset; $rchardet.feed("æåø"); $rchardet.result } }
|
|
16
|
-
results.report("ffi-icu instance:") { TESTS.times { $icu.detect("æåø") } }
|
|
17
|
-
end
|
data/build_icu.sh
DELETED
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env bash
|
|
2
|
-
|
|
3
|
-
if [[ -x $(which icuinfo) ]]; then
|
|
4
|
-
echo System ICU version: $(icuinfo | grep -o '"version">[^<]\+' | grep -o '[^"><]\+$')
|
|
5
|
-
else
|
|
6
|
-
echo 'System ICU not installed'
|
|
7
|
-
fi
|
|
8
|
-
|
|
9
|
-
if [[ "$1" == '' ]]; then
|
|
10
|
-
echo ''
|
|
11
|
-
echo 'Usage:'
|
|
12
|
-
echo ''
|
|
13
|
-
echo '1) bash icu-install.sh versions'
|
|
14
|
-
echo ''
|
|
15
|
-
echo '2) bash icu-install.sh install <version>'
|
|
16
|
-
fi
|
|
17
|
-
|
|
18
|
-
if [[ "$1" == 'versions' ]]; then
|
|
19
|
-
echo ''
|
|
20
|
-
echo 'Available ICU versions'
|
|
21
|
-
wget -O - https://icu.unicode.org/download 2>/dev/null | grep -P -o '(?<=http://site.icu-project.org/download/)\d+#TOC-ICU4C-Download.+;>\K[\d.]+'
|
|
22
|
-
fi
|
|
23
|
-
|
|
24
|
-
if [[ "$2" != "" && "$1" == 'install' ]]; then
|
|
25
|
-
which g++ || sudo apt install -y g++
|
|
26
|
-
|
|
27
|
-
ICU_VERSION=$2
|
|
28
|
-
ICU_SRC_FILE="icu4c-$(echo $ICU_VERSION | sed -e 's/\./_/')-src.tgz"
|
|
29
|
-
echo "Trying to install ICU version: $ICU_VERSION"
|
|
30
|
-
if [[ ! -e "$ICU_SRC_FILE" ]]; then
|
|
31
|
-
wget "https://github.com/unicode-org/icu/releases/download/release-$(echo $ICU_VERSION | sed -e 's/\./-/')/$ICU_SRC_FILE"
|
|
32
|
-
fi
|
|
33
|
-
if [[ ! -e "$ICU_SRC_FILE" ]]; then
|
|
34
|
-
exit 1;
|
|
35
|
-
fi
|
|
36
|
-
|
|
37
|
-
ICU_SRC_FOLDER="icu-release-$(echo $ICU_VERSION | sed -e 's/\./-/')"
|
|
38
|
-
tar zxvf "$ICU_SRC_FILE"
|
|
39
|
-
which g++ || sudo apt install -y g++
|
|
40
|
-
|
|
41
|
-
if [[ ! -e "/opt/icu$ICU_VERSION" ]]; then
|
|
42
|
-
pushd icu/source
|
|
43
|
-
sudo mkdir "/opt/icu$ICU_VERSION"
|
|
44
|
-
./configure --prefix="/opt/icu$ICU_VERSION" && make -j2 && sudo make install
|
|
45
|
-
ls -alh /opt/icu$ICU_VERSION/lib/
|
|
46
|
-
sudo cp -r /opt/icu$ICU_VERSION/lib/* /usr/local/lib
|
|
47
|
-
popd
|
|
48
|
-
else
|
|
49
|
-
echo "ICU already installed at (/opt/icu$ICU_VERSION)"
|
|
50
|
-
fi
|
|
51
|
-
|
|
52
|
-
rm -f "$ICU_SRC_FILE"
|
|
53
|
-
fi
|
data/spec/break_iterator_spec.rb
DELETED
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
# encoding: utf-8
|
|
2
|
-
|
|
3
|
-
module ICU
|
|
4
|
-
describe BreakIterator do
|
|
5
|
-
|
|
6
|
-
it "should return available locales" do
|
|
7
|
-
locales = ICU::BreakIterator.available_locales
|
|
8
|
-
expect(locales).to be_an(Array)
|
|
9
|
-
expect(locales).to_not be_empty
|
|
10
|
-
expect(locales).to include("en_US")
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
it "finds all word boundaries in an English string" do
|
|
14
|
-
iterator = BreakIterator.new :word, "en_US"
|
|
15
|
-
iterator.text = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
|
|
16
|
-
expect(iterator.to_a).to eq(
|
|
17
|
-
[0, 5, 6, 11, 12, 17, 18, 21, 22, 26, 27, 28, 39, 40, 51, 52, 56, 57, 58, 61, 62, 64, 65, 72, 73, 79, 80, 90, 91, 93, 94, 100, 101, 103, 104, 110, 111, 116, 117, 123, 124]
|
|
18
|
-
)
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
it "returns each substring" do
|
|
22
|
-
iterator = BreakIterator.new :word, "en_US"
|
|
23
|
-
iterator.text = "Lorem ipsum dolor sit amet."
|
|
24
|
-
|
|
25
|
-
expect(iterator.substrings).to eq(["Lorem", " ", "ipsum", " ", "dolor", " ", "sit", " ", "amet", "."])
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
it "returns the substrings of a non-ASCII string" do
|
|
29
|
-
iterator = BreakIterator.new :word, "th_TH"
|
|
30
|
-
iterator.text = "รู้อะไรไม่สู้รู้วิชา รู้รักษาตัวรอดเป็นยอดดี"
|
|
31
|
-
|
|
32
|
-
expect(iterator.substrings).to eq(
|
|
33
|
-
["รู้", "อะไร", "ไม่สู้", "รู้", "วิชา", " ", "รู้", "รักษา", "ตัว", "รอด", "เป็น", "ยอดดี"]
|
|
34
|
-
)
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
it "finds all word boundaries in a non-ASCII string" do
|
|
38
|
-
iterator = BreakIterator.new :word, "th_TH"
|
|
39
|
-
iterator.text = "การทดลอง"
|
|
40
|
-
expect(iterator.to_a).to eq([0, 3, 8])
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
it "finds all sentence boundaries in an English string" do
|
|
44
|
-
iterator = BreakIterator.new :sentence, "en_US"
|
|
45
|
-
iterator.text = "This is a sentence. This is another sentence, with a comma in it."
|
|
46
|
-
expect(iterator.to_a).to eq([0, 20, 65])
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
it "can navigate back and forward" do
|
|
50
|
-
iterator = BreakIterator.new :word, "en_US"
|
|
51
|
-
iterator.text = "Lorem ipsum dolor sit amet."
|
|
52
|
-
|
|
53
|
-
expect(iterator.first).to eq(0)
|
|
54
|
-
iterator.next
|
|
55
|
-
expect(iterator.current).to eq(5)
|
|
56
|
-
expect(iterator.last).to eq(27)
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
it "fetches info about given offset" do
|
|
60
|
-
iterator = BreakIterator.new :word, "en_US"
|
|
61
|
-
iterator.text = "Lorem ipsum dolor sit amet."
|
|
62
|
-
|
|
63
|
-
expect(iterator.following(3)).to eq(5)
|
|
64
|
-
expect(iterator.preceding(6)).to eq(5)
|
|
65
|
-
|
|
66
|
-
expect(iterator).to be_boundary(5)
|
|
67
|
-
expect(iterator).to_not be_boundary(10)
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
it "returns an Enumerator if no block was given" do
|
|
71
|
-
iterator = BreakIterator.new :word, "nb"
|
|
72
|
-
|
|
73
|
-
expect(iterator.each).to be_kind_of(Enumerator)
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
end # BreakIterator
|
|
77
|
-
end # ICU
|
data/spec/chardet_spec.rb
DELETED
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
# encoding: UTF-8
|
|
2
|
-
|
|
3
|
-
describe ICU::CharDet::Detector do
|
|
4
|
-
|
|
5
|
-
let(:detector) { ICU::CharDet::Detector.new }
|
|
6
|
-
|
|
7
|
-
it "should recognize UTF-8" do
|
|
8
|
-
m = detector.detect("æåø")
|
|
9
|
-
expect(m.name).to eq("UTF-8")
|
|
10
|
-
expect(m.language).to be_a(String)
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
it "has a list of detectable charsets" do
|
|
14
|
-
cs = detector.detectable_charsets
|
|
15
|
-
expect(cs).to be_an(Array)
|
|
16
|
-
expect(cs).to_not be_empty
|
|
17
|
-
|
|
18
|
-
expect(cs.first).to be_a(String)
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
it "should disable / enable the input filter" do
|
|
22
|
-
expect(detector.input_filter_enabled?).to be_falsey
|
|
23
|
-
detector.input_filter_enabled = true
|
|
24
|
-
expect(detector.input_filter_enabled?).to be_truthy
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
it "should should set declared encoding" do
|
|
28
|
-
detector.declared_encoding = "UTF-8"
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
it "should detect several matching encodings" do
|
|
32
|
-
expect(detector.detect_all("foo bar")).to be_an(Array)
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
it "should support null bytes" do
|
|
36
|
-
# Create a utf-16 string and then force it to binary (ascii) to mimic data from net/http
|
|
37
|
-
string = "foo".encode("UTF-16").force_encoding("binary")
|
|
38
|
-
m = detector.detect(string)
|
|
39
|
-
expect(m.name).to eq("UTF-16BE")
|
|
40
|
-
expect(m.language).to be_a(String)
|
|
41
|
-
end
|
|
42
|
-
end
|