ffi-icu 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +7 -6
- data/lib/ffi-icu.rb +1 -1
- data/lib/ffi-icu/lib.rb +11 -4
- data/lib/ffi-icu/normalizer.rb +47 -0
- data/lib/ffi-icu/version.rb +1 -1
- data/spec/normalizer_spec.rb +59 -0
- metadata +4 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dcb52a7fa85970538582cc014850664a50a9725a
|
4
|
+
data.tar.gz: e226b65e6436f613207145582c1c6b0ccd9d2cf0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 62af7b275d1e40bc2d39ce81792f6bff5dc38b4473fcbaf2c507ce8420dd93ce9a2ade21819281288cd893350e3afd930a065d7b6d9d0359e75fedad86a97325
|
7
|
+
data.tar.gz: b0e89ecc1ca69ddbdb4d6a4ae5818f763b69006609c642aecd484fed96c864bd0db3a0c48cba2e8e56bdae616dc1ff32e2995c126178334a0d6d9b923d09c8c6
|
data/README.md
CHANGED
@@ -127,19 +127,20 @@ Tested on:
|
|
127
127
|
|
128
128
|
Platforms:
|
129
129
|
|
130
|
-
* OS X 10.6
|
131
|
-
*
|
130
|
+
* OS X 10.6 - 10.10
|
131
|
+
* Travis' Linux
|
132
132
|
|
133
133
|
Rubies:
|
134
134
|
|
135
|
-
*
|
136
|
-
*
|
135
|
+
* 1.9.3
|
136
|
+
* 2.0.0
|
137
|
+
* 2.1.0
|
138
|
+
* ruby-head
|
137
139
|
|
138
140
|
TODO:
|
139
141
|
=====
|
140
142
|
|
141
|
-
*
|
142
|
-
- date formatting
|
143
|
+
* Any other useful part of ICU?
|
143
144
|
* Windows?!
|
144
145
|
|
145
146
|
Note on Patches/Pull Requests
|
data/lib/ffi-icu.rb
CHANGED
@@ -38,7 +38,7 @@ require "ffi-icu/collation"
|
|
38
38
|
require "ffi-icu/locale"
|
39
39
|
require "ffi-icu/transliteration"
|
40
40
|
require "ffi-icu/normalization"
|
41
|
+
require "ffi-icu/normalizer"
|
41
42
|
require "ffi-icu/break_iterator"
|
42
43
|
require "ffi-icu/number_formatting"
|
43
44
|
require "ffi-icu/time_formatting"
|
44
|
-
|
data/lib/ffi-icu/lib.rb
CHANGED
@@ -339,6 +339,13 @@ module ICU
|
|
339
339
|
|
340
340
|
attach_function :unorm_normalize, "unorm_normalize#{suffix}", [:pointer, :int32_t, :normalization_mode, :int32_t, :pointer, :int32_t, :pointer], :int32_t
|
341
341
|
|
342
|
+
# http://icu-project.org/apiref/icu4c/unorm2_8h.html
|
343
|
+
|
344
|
+
enum :normalization2_mode, [ :compose, :decompose, :fcd, :compose_contiguous ]
|
345
|
+
attach_function :unorm2_getInstance, "unorm2_getInstance#{suffix}", [:pointer, :pointer, :normalization2_mode, :pointer], :pointer
|
346
|
+
attach_function :unorm2_normalize, "unorm2_normalize#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :int32_t, :pointer], :int32_t
|
347
|
+
attach_function :unorm2_isNormalized, "unorm2_isNormalized#{suffix}", [:pointer, :pointer, :int32_t, :pointer], :bool
|
348
|
+
|
342
349
|
#
|
343
350
|
# Text Boundary Analysis
|
344
351
|
#
|
@@ -392,10 +399,10 @@ module ICU
|
|
392
399
|
:ignore
|
393
400
|
]
|
394
401
|
enum :number_format_attribute, [
|
395
|
-
:parse_int_only, :grouping_used, :decimal_always_show, :max_integer_digits,
|
396
|
-
:min_integer_digits, :integer_digits, :max_fraction_digits, :min_fraction_digits,
|
397
|
-
:fraction_digits, :multiplier, :grouping_size, :rounding_mode,
|
398
|
-
:rounding_increment, :format_width, :padding_position, :secondary_grouping_size,
|
402
|
+
:parse_int_only, :grouping_used, :decimal_always_show, :max_integer_digits,
|
403
|
+
:min_integer_digits, :integer_digits, :max_fraction_digits, :min_fraction_digits,
|
404
|
+
:fraction_digits, :multiplier, :grouping_size, :rounding_mode,
|
405
|
+
:rounding_increment, :format_width, :padding_position, :secondary_grouping_size,
|
399
406
|
:significant_digits_used, :min_significant_digits, :max_significant_digits, :lenient_parse
|
400
407
|
]
|
401
408
|
attach_function :unum_open, "unum_open#{suffix}", [:number_format_style, :pointer, :int32_t, :string, :pointer, :pointer ], :pointer
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module ICU
|
2
|
+
class Normalizer
|
3
|
+
# support for newer ICU normalization API
|
4
|
+
|
5
|
+
def initialize(package_name = nil, name = 'nfc', mode = :decompose)
|
6
|
+
Lib.check_error do |error|
|
7
|
+
@instance = Lib.unorm2_getInstance(package_name, name, mode, error)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def normalize(input)
|
12
|
+
input_length = input.jlength
|
13
|
+
in_ptr = UCharPointer.from_string(input)
|
14
|
+
needed_length = capacity = 0
|
15
|
+
out_ptr = UCharPointer.new(needed_length)
|
16
|
+
|
17
|
+
retried = false
|
18
|
+
begin
|
19
|
+
Lib.check_error do |error|
|
20
|
+
needed_length = Lib.unorm2_normalize(@instance, in_ptr, input_length, out_ptr, capacity, error)
|
21
|
+
end
|
22
|
+
rescue BufferOverflowError
|
23
|
+
raise BufferOverflowError, "needed: #{needed_length}" if retried
|
24
|
+
|
25
|
+
capacity = needed_length
|
26
|
+
out_ptr = out_ptr.resized_to needed_length
|
27
|
+
|
28
|
+
retried = true
|
29
|
+
retry
|
30
|
+
end
|
31
|
+
|
32
|
+
out_ptr.string
|
33
|
+
end
|
34
|
+
|
35
|
+
def is_normailzed?(input)
|
36
|
+
input_length = input.jlength
|
37
|
+
in_ptr = UCharPointer.from_string(input)
|
38
|
+
|
39
|
+
Lib.check_error do |error|
|
40
|
+
result = Lib.unorm2_isNormalized(@instance, in_ptr, input_length, error)
|
41
|
+
end
|
42
|
+
|
43
|
+
result
|
44
|
+
end
|
45
|
+
|
46
|
+
end # Normalizer
|
47
|
+
end # ICU
|
data/lib/ffi-icu/version.rb
CHANGED
@@ -0,0 +1,59 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
module ICU
|
6
|
+
describe Normalizer do
|
7
|
+
describe 'NFD: nfc decompose' do
|
8
|
+
let(:normalizer) { ICU::Normalizer.new(nil, 'nfc', :decompose) }
|
9
|
+
|
10
|
+
it "should normalize a string" do
|
11
|
+
normalizer.normalize("Å").unpack("U*").should == [65, 778]
|
12
|
+
normalizer.normalize("ô").unpack("U*").should == [111, 770]
|
13
|
+
normalizer.normalize("a").unpack("U*").should == [97]
|
14
|
+
normalizer.normalize("中文").unpack("U*").should == [20013, 25991]
|
15
|
+
normalizer.normalize("Äffin").unpack("U*").should == [65, 776, 102, 102, 105, 110]
|
16
|
+
normalizer.normalize("Äffin").unpack("U*").should == [65, 776, 64259, 110]
|
17
|
+
normalizer.normalize("Henry IV").unpack("U*").should == [72, 101, 110, 114, 121, 32, 73, 86]
|
18
|
+
normalizer.normalize("Henry Ⅳ").unpack("U*").should == [72, 101, 110, 114, 121, 32, 8547]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe 'NFC: nfc compose' do
|
23
|
+
let(:normalizer) { ICU::Normalizer.new(nil, 'nfc', :compose) }
|
24
|
+
|
25
|
+
it "should normalize a string" do
|
26
|
+
normalizer.normalize("Å").unpack("U*").should == [197]
|
27
|
+
normalizer.normalize("ô").unpack("U*").should == [244]
|
28
|
+
normalizer.normalize("a").unpack("U*").should == [97]
|
29
|
+
normalizer.normalize("中文").unpack("U*").should == [20013, 25991]
|
30
|
+
normalizer.normalize("Äffin").unpack("U*").should == [196, 102, 102, 105, 110]
|
31
|
+
normalizer.normalize("Äffin").unpack("U*").should == [196, 64259, 110]
|
32
|
+
normalizer.normalize("Henry IV").unpack("U*").should == [72, 101, 110, 114, 121, 32, 73, 86]
|
33
|
+
normalizer.normalize("Henry Ⅳ").unpack("U*").should == [72, 101, 110, 114, 121, 32, 8547]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe 'NFKD: nfkc decompose' do
|
38
|
+
let(:normalizer) { ICU::Normalizer.new(nil, 'nfkc', :decompose) }
|
39
|
+
|
40
|
+
it "should normalize a string" do
|
41
|
+
normalizer.normalize("Äffin").unpack("U*").should == [65, 776, 102, 102, 105, 110]
|
42
|
+
normalizer.normalize("Äffin").unpack("U*").should == [65, 776, 102, 102, 105, 110]
|
43
|
+
normalizer.normalize("Henry IV").unpack("U*").should == [72, 101, 110, 114, 121, 32, 73, 86]
|
44
|
+
normalizer.normalize("Henry Ⅳ").unpack("U*").should == [72, 101, 110, 114, 121, 32, 73, 86]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
describe 'NFKC: nfkc compose' do
|
49
|
+
let(:normalizer) { ICU::Normalizer.new(nil, 'nfkc', :compose) }
|
50
|
+
|
51
|
+
it "should normalize a string" do
|
52
|
+
normalizer.normalize("Äffin").unpack("U*").should == [196, 102, 102, 105, 110]
|
53
|
+
normalizer.normalize("Äffin").unpack("U*").should == [196, 102, 102, 105, 110]
|
54
|
+
normalizer.normalize("Henry IV").unpack("U*").should == [72, 101, 110, 114, 121, 32, 73, 86]
|
55
|
+
normalizer.normalize("Henry Ⅳ").unpack("U*").should == [72, 101, 110, 114, 121, 32, 73, 86]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end # Normalizer
|
59
|
+
end # ICU
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ffi-icu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jari Bakken
|
@@ -86,6 +86,7 @@ files:
|
|
86
86
|
- lib/ffi-icu/lib/util.rb
|
87
87
|
- lib/ffi-icu/locale.rb
|
88
88
|
- lib/ffi-icu/normalization.rb
|
89
|
+
- lib/ffi-icu/normalizer.rb
|
89
90
|
- lib/ffi-icu/number_formatting.rb
|
90
91
|
- lib/ffi-icu/time_formatting.rb
|
91
92
|
- lib/ffi-icu/transliteration.rb
|
@@ -98,6 +99,7 @@ files:
|
|
98
99
|
- spec/lib_spec.rb
|
99
100
|
- spec/locale_spec.rb
|
100
101
|
- spec/normalization_spec.rb
|
102
|
+
- spec/normalizer_spec.rb
|
101
103
|
- spec/number_formatting_spec.rb
|
102
104
|
- spec/spec.opts
|
103
105
|
- spec/spec_helper.rb
|
@@ -137,6 +139,7 @@ test_files:
|
|
137
139
|
- spec/lib_spec.rb
|
138
140
|
- spec/locale_spec.rb
|
139
141
|
- spec/normalization_spec.rb
|
142
|
+
- spec/normalizer_spec.rb
|
140
143
|
- spec/number_formatting_spec.rb
|
141
144
|
- spec/spec.opts
|
142
145
|
- spec/spec_helper.rb
|