ffi-icu 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +7 -6
- data/lib/ffi-icu.rb +1 -1
- data/lib/ffi-icu/lib.rb +11 -4
- data/lib/ffi-icu/normalizer.rb +47 -0
- data/lib/ffi-icu/version.rb +1 -1
- data/spec/normalizer_spec.rb +59 -0
- metadata +4 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dcb52a7fa85970538582cc014850664a50a9725a
|
4
|
+
data.tar.gz: e226b65e6436f613207145582c1c6b0ccd9d2cf0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 62af7b275d1e40bc2d39ce81792f6bff5dc38b4473fcbaf2c507ce8420dd93ce9a2ade21819281288cd893350e3afd930a065d7b6d9d0359e75fedad86a97325
|
7
|
+
data.tar.gz: b0e89ecc1ca69ddbdb4d6a4ae5818f763b69006609c642aecd484fed96c864bd0db3a0c48cba2e8e56bdae616dc1ff32e2995c126178334a0d6d9b923d09c8c6
|
data/README.md
CHANGED
@@ -127,19 +127,20 @@ Tested on:
|
|
127
127
|
|
128
128
|
Platforms:
|
129
129
|
|
130
|
-
* OS X 10.6
|
131
|
-
*
|
130
|
+
* OS X 10.6 - 10.10
|
131
|
+
* Travis' Linux
|
132
132
|
|
133
133
|
Rubies:
|
134
134
|
|
135
|
-
*
|
136
|
-
*
|
135
|
+
* 1.9.3
|
136
|
+
* 2.0.0
|
137
|
+
* 2.1.0
|
138
|
+
* ruby-head
|
137
139
|
|
138
140
|
TODO:
|
139
141
|
=====
|
140
142
|
|
141
|
-
*
|
142
|
-
- date formatting
|
143
|
+
* Any other useful part of ICU?
|
143
144
|
* Windows?!
|
144
145
|
|
145
146
|
Note on Patches/Pull Requests
|
data/lib/ffi-icu.rb
CHANGED
@@ -38,7 +38,7 @@ require "ffi-icu/collation"
|
|
38
38
|
require "ffi-icu/locale"
|
39
39
|
require "ffi-icu/transliteration"
|
40
40
|
require "ffi-icu/normalization"
|
41
|
+
require "ffi-icu/normalizer"
|
41
42
|
require "ffi-icu/break_iterator"
|
42
43
|
require "ffi-icu/number_formatting"
|
43
44
|
require "ffi-icu/time_formatting"
|
44
|
-
|
data/lib/ffi-icu/lib.rb
CHANGED
@@ -339,6 +339,13 @@ module ICU
|
|
339
339
|
|
340
340
|
attach_function :unorm_normalize, "unorm_normalize#{suffix}", [:pointer, :int32_t, :normalization_mode, :int32_t, :pointer, :int32_t, :pointer], :int32_t
|
341
341
|
|
342
|
+
# http://icu-project.org/apiref/icu4c/unorm2_8h.html
|
343
|
+
|
344
|
+
enum :normalization2_mode, [ :compose, :decompose, :fcd, :compose_contiguous ]
|
345
|
+
attach_function :unorm2_getInstance, "unorm2_getInstance#{suffix}", [:pointer, :pointer, :normalization2_mode, :pointer], :pointer
|
346
|
+
attach_function :unorm2_normalize, "unorm2_normalize#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :int32_t, :pointer], :int32_t
|
347
|
+
attach_function :unorm2_isNormalized, "unorm2_isNormalized#{suffix}", [:pointer, :pointer, :int32_t, :pointer], :bool
|
348
|
+
|
342
349
|
#
|
343
350
|
# Text Boundary Analysis
|
344
351
|
#
|
@@ -392,10 +399,10 @@ module ICU
|
|
392
399
|
:ignore
|
393
400
|
]
|
394
401
|
enum :number_format_attribute, [
|
395
|
-
:parse_int_only, :grouping_used, :decimal_always_show, :max_integer_digits,
|
396
|
-
:min_integer_digits, :integer_digits, :max_fraction_digits, :min_fraction_digits,
|
397
|
-
:fraction_digits, :multiplier, :grouping_size, :rounding_mode,
|
398
|
-
:rounding_increment, :format_width, :padding_position, :secondary_grouping_size,
|
402
|
+
:parse_int_only, :grouping_used, :decimal_always_show, :max_integer_digits,
|
403
|
+
:min_integer_digits, :integer_digits, :max_fraction_digits, :min_fraction_digits,
|
404
|
+
:fraction_digits, :multiplier, :grouping_size, :rounding_mode,
|
405
|
+
:rounding_increment, :format_width, :padding_position, :secondary_grouping_size,
|
399
406
|
:significant_digits_used, :min_significant_digits, :max_significant_digits, :lenient_parse
|
400
407
|
]
|
401
408
|
attach_function :unum_open, "unum_open#{suffix}", [:number_format_style, :pointer, :int32_t, :string, :pointer, :pointer ], :pointer
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module ICU
|
2
|
+
class Normalizer
|
3
|
+
# support for newer ICU normalization API
|
4
|
+
|
5
|
+
def initialize(package_name = nil, name = 'nfc', mode = :decompose)
|
6
|
+
Lib.check_error do |error|
|
7
|
+
@instance = Lib.unorm2_getInstance(package_name, name, mode, error)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def normalize(input)
|
12
|
+
input_length = input.jlength
|
13
|
+
in_ptr = UCharPointer.from_string(input)
|
14
|
+
needed_length = capacity = 0
|
15
|
+
out_ptr = UCharPointer.new(needed_length)
|
16
|
+
|
17
|
+
retried = false
|
18
|
+
begin
|
19
|
+
Lib.check_error do |error|
|
20
|
+
needed_length = Lib.unorm2_normalize(@instance, in_ptr, input_length, out_ptr, capacity, error)
|
21
|
+
end
|
22
|
+
rescue BufferOverflowError
|
23
|
+
raise BufferOverflowError, "needed: #{needed_length}" if retried
|
24
|
+
|
25
|
+
capacity = needed_length
|
26
|
+
out_ptr = out_ptr.resized_to needed_length
|
27
|
+
|
28
|
+
retried = true
|
29
|
+
retry
|
30
|
+
end
|
31
|
+
|
32
|
+
out_ptr.string
|
33
|
+
end
|
34
|
+
|
35
|
+
def is_normailzed?(input)
|
36
|
+
input_length = input.jlength
|
37
|
+
in_ptr = UCharPointer.from_string(input)
|
38
|
+
|
39
|
+
Lib.check_error do |error|
|
40
|
+
result = Lib.unorm2_isNormalized(@instance, in_ptr, input_length, error)
|
41
|
+
end
|
42
|
+
|
43
|
+
result
|
44
|
+
end
|
45
|
+
|
46
|
+
end # Normalizer
|
47
|
+
end # ICU
|
data/lib/ffi-icu/version.rb
CHANGED
@@ -0,0 +1,59 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
module ICU
|
6
|
+
describe Normalizer do
|
7
|
+
describe 'NFD: nfc decompose' do
|
8
|
+
let(:normalizer) { ICU::Normalizer.new(nil, 'nfc', :decompose) }
|
9
|
+
|
10
|
+
it "should normalize a string" do
|
11
|
+
normalizer.normalize("Å").unpack("U*").should == [65, 778]
|
12
|
+
normalizer.normalize("ô").unpack("U*").should == [111, 770]
|
13
|
+
normalizer.normalize("a").unpack("U*").should == [97]
|
14
|
+
normalizer.normalize("中文").unpack("U*").should == [20013, 25991]
|
15
|
+
normalizer.normalize("Äffin").unpack("U*").should == [65, 776, 102, 102, 105, 110]
|
16
|
+
normalizer.normalize("Äffin").unpack("U*").should == [65, 776, 64259, 110]
|
17
|
+
normalizer.normalize("Henry IV").unpack("U*").should == [72, 101, 110, 114, 121, 32, 73, 86]
|
18
|
+
normalizer.normalize("Henry Ⅳ").unpack("U*").should == [72, 101, 110, 114, 121, 32, 8547]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe 'NFC: nfc compose' do
|
23
|
+
let(:normalizer) { ICU::Normalizer.new(nil, 'nfc', :compose) }
|
24
|
+
|
25
|
+
it "should normalize a string" do
|
26
|
+
normalizer.normalize("Å").unpack("U*").should == [197]
|
27
|
+
normalizer.normalize("ô").unpack("U*").should == [244]
|
28
|
+
normalizer.normalize("a").unpack("U*").should == [97]
|
29
|
+
normalizer.normalize("中文").unpack("U*").should == [20013, 25991]
|
30
|
+
normalizer.normalize("Äffin").unpack("U*").should == [196, 102, 102, 105, 110]
|
31
|
+
normalizer.normalize("Äffin").unpack("U*").should == [196, 64259, 110]
|
32
|
+
normalizer.normalize("Henry IV").unpack("U*").should == [72, 101, 110, 114, 121, 32, 73, 86]
|
33
|
+
normalizer.normalize("Henry Ⅳ").unpack("U*").should == [72, 101, 110, 114, 121, 32, 8547]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe 'NFKD: nfkc decompose' do
|
38
|
+
let(:normalizer) { ICU::Normalizer.new(nil, 'nfkc', :decompose) }
|
39
|
+
|
40
|
+
it "should normalize a string" do
|
41
|
+
normalizer.normalize("Äffin").unpack("U*").should == [65, 776, 102, 102, 105, 110]
|
42
|
+
normalizer.normalize("Äffin").unpack("U*").should == [65, 776, 102, 102, 105, 110]
|
43
|
+
normalizer.normalize("Henry IV").unpack("U*").should == [72, 101, 110, 114, 121, 32, 73, 86]
|
44
|
+
normalizer.normalize("Henry Ⅳ").unpack("U*").should == [72, 101, 110, 114, 121, 32, 73, 86]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
describe 'NFKC: nfkc compose' do
|
49
|
+
let(:normalizer) { ICU::Normalizer.new(nil, 'nfkc', :compose) }
|
50
|
+
|
51
|
+
it "should normalize a string" do
|
52
|
+
normalizer.normalize("Äffin").unpack("U*").should == [196, 102, 102, 105, 110]
|
53
|
+
normalizer.normalize("Äffin").unpack("U*").should == [196, 102, 102, 105, 110]
|
54
|
+
normalizer.normalize("Henry IV").unpack("U*").should == [72, 101, 110, 114, 121, 32, 73, 86]
|
55
|
+
normalizer.normalize("Henry Ⅳ").unpack("U*").should == [72, 101, 110, 114, 121, 32, 73, 86]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end # Normalizer
|
59
|
+
end # ICU
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ffi-icu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jari Bakken
|
@@ -86,6 +86,7 @@ files:
|
|
86
86
|
- lib/ffi-icu/lib/util.rb
|
87
87
|
- lib/ffi-icu/locale.rb
|
88
88
|
- lib/ffi-icu/normalization.rb
|
89
|
+
- lib/ffi-icu/normalizer.rb
|
89
90
|
- lib/ffi-icu/number_formatting.rb
|
90
91
|
- lib/ffi-icu/time_formatting.rb
|
91
92
|
- lib/ffi-icu/transliteration.rb
|
@@ -98,6 +99,7 @@ files:
|
|
98
99
|
- spec/lib_spec.rb
|
99
100
|
- spec/locale_spec.rb
|
100
101
|
- spec/normalization_spec.rb
|
102
|
+
- spec/normalizer_spec.rb
|
101
103
|
- spec/number_formatting_spec.rb
|
102
104
|
- spec/spec.opts
|
103
105
|
- spec/spec_helper.rb
|
@@ -137,6 +139,7 @@ test_files:
|
|
137
139
|
- spec/lib_spec.rb
|
138
140
|
- spec/locale_spec.rb
|
139
141
|
- spec/normalization_spec.rb
|
142
|
+
- spec/normalizer_spec.rb
|
140
143
|
- spec/number_formatting_spec.rb
|
141
144
|
- spec/spec.opts
|
142
145
|
- spec/spec_helper.rb
|