ffi-icu 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/README.rdoc +17 -3
- data/VERSION +1 -1
- data/lib/ffi-icu.rb +10 -0
- data/lib/ffi-icu/chardet.rb +1 -1
- data/lib/ffi-icu/collation.rb +9 -2
- data/lib/ffi-icu/lib.rb +85 -14
- data/lib/ffi-icu/normalization.rb +29 -0
- data/lib/ffi-icu/transliteration.rb +79 -0
- data/lib/ffi-icu/uchar.rb +29 -6
- data/spec/collation_spec.rb +3 -2
- data/spec/normalization_spec.rb +24 -0
- data/spec/transliteration_spec.rb +33 -0
- data/test.c +56 -0
- metadata +20 -4
data/.gitignore
CHANGED
data/README.rdoc
CHANGED
@@ -1,10 +1,19 @@
|
|
1
1
|
= ffi-icu
|
2
2
|
|
3
|
-
Simple FFI wrappers for things I need from ICU.
|
3
|
+
Simple FFI wrappers for things I need from ICU. For the full thing, check out ICU4R instead.
|
4
|
+
|
5
|
+
= Gem
|
6
|
+
|
7
|
+
* http://rubygems.org/gems/ffi-icu
|
8
|
+
|
9
|
+
gem install ffi-icu
|
4
10
|
|
5
11
|
= Dependencies
|
6
12
|
|
7
|
-
ICU
|
13
|
+
ICU. If you get messages that the library or functions are not found, you can
|
14
|
+
set some environment varibles to tell ffi-icu where to find it, i.e.:
|
15
|
+
|
16
|
+
FFI_ICU_LIB="icui18n.so" FFI_ICU_VERSION_SUFFIX="_3_8" ruby -r ffi-icu
|
8
17
|
|
9
18
|
= Features
|
10
19
|
|
@@ -54,7 +63,12 @@ Rubies:
|
|
54
63
|
* MRI 1.9.1
|
55
64
|
* MRI 1.8.7
|
56
65
|
|
57
|
-
|
66
|
+
= TODO:
|
67
|
+
|
68
|
+
* Useful ICU stuff:
|
69
|
+
- number formatting (decimal points, thousand separators, currency)
|
70
|
+
- date formatting
|
71
|
+
* Windows?!
|
58
72
|
|
59
73
|
== Note on Patches/Pull Requests
|
60
74
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.2
|
data/lib/ffi-icu.rb
CHANGED
@@ -14,10 +14,20 @@ module ICU
|
|
14
14
|
os
|
15
15
|
end
|
16
16
|
end
|
17
|
+
|
18
|
+
def self.ruby19?
|
19
|
+
RUBY_VERSION >= '1.9'
|
20
|
+
end
|
17
21
|
end
|
18
22
|
|
19
23
|
require "ffi-icu/lib"
|
20
24
|
require "ffi-icu/uchar"
|
21
25
|
require "ffi-icu/chardet"
|
22
26
|
require "ffi-icu/collation"
|
27
|
+
require "ffi-icu/transliteration"
|
28
|
+
require "ffi-icu/normalization"
|
23
29
|
|
30
|
+
unless ICU.ruby19?
|
31
|
+
require 'jcode'
|
32
|
+
$KCODE = 'u'
|
33
|
+
end
|
data/lib/ffi-icu/chardet.rb
CHANGED
@@ -44,7 +44,7 @@ module ICU
|
|
44
44
|
def detect_all(str)
|
45
45
|
set_text(str)
|
46
46
|
|
47
|
-
matches_found_ptr = FFI::MemoryPointer.new :
|
47
|
+
matches_found_ptr = FFI::MemoryPointer.new :int32_t
|
48
48
|
array_ptr = Lib.check_error do |status|
|
49
49
|
Lib.ucsdet_detectAll(@detector, matches_found_ptr, status)
|
50
50
|
end
|
data/lib/ffi-icu/collation.rb
CHANGED
@@ -59,8 +59,15 @@ module ICU
|
|
59
59
|
UCharPointer.from_string(b), b.length)
|
60
60
|
end
|
61
61
|
|
62
|
-
|
63
|
-
|
62
|
+
def equal?(*args)
|
63
|
+
return super() if args.empty?
|
64
|
+
|
65
|
+
if args.size != 2
|
66
|
+
raise ArgumentError, "wrong number of arguments (#{args.size} for 2)"
|
67
|
+
end
|
68
|
+
|
69
|
+
a, b = args
|
70
|
+
|
64
71
|
Lib.ucol_equal(@c, UCharPointer.from_string(a), a.length,
|
65
72
|
UCharPointer.from_string(b), b.length)
|
66
73
|
end
|
data/lib/ffi-icu/lib.rb
CHANGED
@@ -2,6 +2,9 @@ module ICU
|
|
2
2
|
class Error < StandardError
|
3
3
|
end
|
4
4
|
|
5
|
+
class BufferOverflowError < StandardError
|
6
|
+
end
|
7
|
+
|
5
8
|
module Lib
|
6
9
|
extend FFI::Library
|
7
10
|
|
@@ -14,6 +17,21 @@ module ICU
|
|
14
17
|
def self.find_icu
|
15
18
|
suffix = ''
|
16
19
|
|
20
|
+
# let the user tell us where the lib is
|
21
|
+
if ENV['FFI_ICU_LIB']
|
22
|
+
libs = ENV['FFI_ICU_LIB'].split(",")
|
23
|
+
ffi_lib *libs
|
24
|
+
|
25
|
+
if ENV['FFI_ICU_VERSION_SUFFIX']
|
26
|
+
return ENV['FFI_ICU_VERSION_SUFFIX']
|
27
|
+
elsif num = libs.first[/\d+$/]
|
28
|
+
return num.split(//).join("_")
|
29
|
+
else
|
30
|
+
return suffix
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# ok, try to find it
|
17
35
|
case ICU.platform
|
18
36
|
when :osx
|
19
37
|
ffi_lib "icucore"
|
@@ -40,7 +58,12 @@ module ICU
|
|
40
58
|
error_code = ptr.read_int
|
41
59
|
|
42
60
|
if error_code > 0
|
43
|
-
|
61
|
+
name = Lib.u_errorName error_code
|
62
|
+
if name == "U_BUFFER_OVERFLOW_ERROR"
|
63
|
+
raise BufferOverflowError
|
64
|
+
else
|
65
|
+
raise Error, name
|
66
|
+
end
|
44
67
|
elsif error_code < 0
|
45
68
|
warn "ffi-icu: #{Lib.u_errorName error_code}"
|
46
69
|
end
|
@@ -49,12 +72,14 @@ module ICU
|
|
49
72
|
end
|
50
73
|
|
51
74
|
def self.enum_ptr_to_array(enum_ptr)
|
52
|
-
length =
|
53
|
-
|
75
|
+
length = check_error do |status|
|
76
|
+
uenum_count(enum_ptr, status)
|
54
77
|
end
|
55
78
|
|
79
|
+
len = FFI::MemoryPointer.new(:int)
|
80
|
+
|
56
81
|
(0...length).map do |idx|
|
57
|
-
|
82
|
+
check_error { |st| uenum_next(enum_ptr, len, st) }
|
58
83
|
end
|
59
84
|
end
|
60
85
|
|
@@ -71,7 +96,8 @@ module ICU
|
|
71
96
|
attach_function :uenum_count, "uenum_count#{suffix}", [:pointer, :pointer], :int
|
72
97
|
attach_function :uenum_close, "uenum_close#{suffix}", [:pointer], :void
|
73
98
|
attach_function :uenum_next, "uenum_next#{suffix}", [:pointer, :pointer, :pointer], :string
|
74
|
-
|
99
|
+
attach_function :u_charsToUChars, "u_charsToUChars#{suffix}", [:string, :pointer, :int32_t], :void
|
100
|
+
attach_function :u_UCharsToChars, "u_UCharsToChars#{suffix}", [:pointer, :string, :int32_t], :void
|
75
101
|
|
76
102
|
# CharDet
|
77
103
|
#
|
@@ -80,12 +106,12 @@ module ICU
|
|
80
106
|
|
81
107
|
attach_function :ucsdet_open, "ucsdet_open#{suffix}", [:pointer], :pointer
|
82
108
|
attach_function :ucsdet_close, "ucsdet_close#{suffix}", [:pointer], :void
|
83
|
-
attach_function :ucsdet_setText, "ucsdet_setText#{suffix}", [:pointer, :string, :
|
84
|
-
attach_function :ucsdet_setDeclaredEncoding, "ucsdet_setDeclaredEncoding#{suffix}", [:pointer, :string, :
|
109
|
+
attach_function :ucsdet_setText, "ucsdet_setText#{suffix}", [:pointer, :string, :int32_t, :pointer], :void
|
110
|
+
attach_function :ucsdet_setDeclaredEncoding, "ucsdet_setDeclaredEncoding#{suffix}", [:pointer, :string, :int32_t, :pointer], :void
|
85
111
|
attach_function :ucsdet_detect, "ucsdet_detect#{suffix}", [:pointer, :pointer], :pointer
|
86
112
|
attach_function :ucsdet_detectAll, "ucsdet_detectAll#{suffix}", [:pointer, :pointer, :pointer], :pointer
|
87
113
|
attach_function :ucsdet_getName, "ucsdet_getName#{suffix}", [:pointer, :pointer], :string
|
88
|
-
attach_function :ucsdet_getConfidence, "ucsdet_getConfidence#{suffix}", [:pointer, :pointer], :
|
114
|
+
attach_function :ucsdet_getConfidence, "ucsdet_getConfidence#{suffix}", [:pointer, :pointer], :int32_t
|
89
115
|
attach_function :ucsdet_getLanguage, "ucsdet_getLanguage#{suffix}", [:pointer, :pointer], :string
|
90
116
|
attach_function :ucsdet_getAllDetectableCharsets, "ucsdet_getAllDetectableCharsets#{suffix}", [:pointer, :pointer], :pointer
|
91
117
|
attach_function :ucsdet_isInputFilterEnabled, "ucsdet_isInputFilterEnabled#{suffix}", [:pointer], :bool
|
@@ -98,15 +124,60 @@ module ICU
|
|
98
124
|
|
99
125
|
attach_function :ucol_open, "ucol_open#{suffix}", [:string, :pointer], :pointer
|
100
126
|
attach_function :ucol_close, "ucol_close#{suffix}", [:pointer], :void
|
101
|
-
attach_function :ucol_strcoll, "ucol_strcoll#{suffix}", [:pointer, :pointer, :
|
127
|
+
attach_function :ucol_strcoll, "ucol_strcoll#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :int32_t], :int
|
102
128
|
attach_function :ucol_getKeywords, "ucol_getKeywords#{suffix}", [:pointer], :pointer
|
103
129
|
attach_function :ucol_getKeywordValues, "ucol_getKeywordValues#{suffix}", [:string, :pointer], :pointer
|
104
|
-
attach_function :ucol_getAvailable, "ucol_getAvailable#{suffix}", [:
|
105
|
-
attach_function :ucol_countAvailable, "ucol_countAvailable#{suffix}", [], :
|
130
|
+
attach_function :ucol_getAvailable, "ucol_getAvailable#{suffix}", [:int32_t], :string
|
131
|
+
attach_function :ucol_countAvailable, "ucol_countAvailable#{suffix}", [], :int32_t
|
106
132
|
attach_function :ucol_getLocale, "ucol_getLocale#{suffix}", [:pointer, :int, :pointer], :string
|
107
|
-
attach_function :ucol_greater, "ucol_greater#{suffix}", [:pointer, :pointer, :
|
108
|
-
attach_function :ucol_greaterOrEqual, "ucol_greaterOrEqual#{suffix}", [:pointer, :pointer, :
|
109
|
-
attach_function :ucol_equal, "ucol_equal#{suffix}", [:pointer, :pointer, :
|
133
|
+
attach_function :ucol_greater, "ucol_greater#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :int32_t], :bool
|
134
|
+
attach_function :ucol_greaterOrEqual, "ucol_greaterOrEqual#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :int32_t], :bool
|
135
|
+
attach_function :ucol_equal, "ucol_equal#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :int32_t], :bool
|
136
|
+
|
137
|
+
# Transliteration
|
138
|
+
#
|
139
|
+
# http://icu-project.org/apiref/icu4c/utrans_8h.html
|
140
|
+
#
|
141
|
+
|
142
|
+
class UParseError < FFI::Struct
|
143
|
+
layout :line, :int32_t,
|
144
|
+
:offset, :int32_t,
|
145
|
+
:pre_context, :pointer,
|
146
|
+
:post_context, :pointer
|
147
|
+
|
148
|
+
|
149
|
+
end
|
150
|
+
|
151
|
+
class UTransPosition < FFI::Struct
|
152
|
+
layout :context_start, :int32_t,
|
153
|
+
:context_limit, :int32_t,
|
154
|
+
:start, :int32_t,
|
155
|
+
:end, :int32_t
|
156
|
+
|
157
|
+
end
|
158
|
+
|
159
|
+
enum :trans_direction, [:forward, :reverse]
|
160
|
+
|
161
|
+
attach_function :utrans_openIDs, "utrans_openIDs#{suffix}", [:pointer], :pointer
|
162
|
+
attach_function :utrans_openU, "utrans_openU#{suffix}", [:pointer, :int32_t, :trans_direction, :pointer, :int32_t, :pointer, :pointer], :pointer
|
163
|
+
attach_function :utrans_open, "utrans_open#{suffix}", [:string, :trans_direction, :pointer, :int32_t, :pointer, :pointer], :pointer
|
164
|
+
attach_function :utrans_close, "utrans_close#{suffix}", [:pointer], :void
|
165
|
+
attach_function :utrans_transUChars, "utrans_transUChars#{suffix}", [:pointer, :pointer, :pointer, :int32_t, :int32_t, :pointer, :pointer], :void
|
166
|
+
|
167
|
+
# Normalization
|
168
|
+
#
|
169
|
+
# http://icu-project.org/apiref/icu4c/unorm_8h.html
|
170
|
+
#
|
171
|
+
|
172
|
+
enum :normalization_mode, [ :none, 1,
|
173
|
+
:nfd, 2,
|
174
|
+
:nfkd, 3,
|
175
|
+
:nfc, 4,
|
176
|
+
:default, 4,
|
177
|
+
:nfkc, 5,
|
178
|
+
:fcd, 6
|
179
|
+
]
|
110
180
|
|
181
|
+
attach_function :unorm_normalize, "unorm_normalize#{suffix}", [:pointer, :int32_t, :normalization_mode, :int32_t, :pointer, :int32_t, :pointer], :int32_t
|
111
182
|
end # Lib
|
112
183
|
end # ICU
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module ICU
|
2
|
+
module Normalization
|
3
|
+
|
4
|
+
def self.normalize(input, mode = :default)
|
5
|
+
input_length = ICU.ruby19? ? input.length : input.jlength
|
6
|
+
needed_length = 0
|
7
|
+
result_length = 0
|
8
|
+
|
9
|
+
retried = false
|
10
|
+
ptr = nil
|
11
|
+
|
12
|
+
begin
|
13
|
+
Lib.check_error do |error|
|
14
|
+
needed_length = Lib.unorm_normalize(UCharPointer.from_string(input), input_length, mode, 0, ptr, result_length, error)
|
15
|
+
end
|
16
|
+
rescue BufferOverflowError
|
17
|
+
raise if retried
|
18
|
+
ptr = UCharPointer.from_string("\0" * needed_length)
|
19
|
+
result_length = needed_length + 1
|
20
|
+
|
21
|
+
retried = true
|
22
|
+
retry
|
23
|
+
end
|
24
|
+
|
25
|
+
ptr.string if ptr
|
26
|
+
end
|
27
|
+
|
28
|
+
end # Normalization
|
29
|
+
end # ICU
|
@@ -0,0 +1,79 @@
|
|
1
|
+
module ICU
|
2
|
+
module Transliteration
|
3
|
+
|
4
|
+
class << self
|
5
|
+
def transliterate(translit_id, str)
|
6
|
+
t = Transliterator.new translit_id
|
7
|
+
res = t.transliterate str
|
8
|
+
t.close
|
9
|
+
|
10
|
+
res
|
11
|
+
end
|
12
|
+
alias_method :translit, :transliterate
|
13
|
+
|
14
|
+
def available_ids
|
15
|
+
enum_ptr = Lib.check_error do |error|
|
16
|
+
Lib.utrans_openIDs(error)
|
17
|
+
end
|
18
|
+
|
19
|
+
result = Lib.enum_ptr_to_array(enum_ptr)
|
20
|
+
Lib.uenum_close(enum_ptr)
|
21
|
+
|
22
|
+
result
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class Transliterator
|
27
|
+
|
28
|
+
def initialize(id, direction = :forward)
|
29
|
+
@parse_error = Lib::UParseError.new
|
30
|
+
Lib.check_error do |status|
|
31
|
+
# couldn't get utrans_openU to work properly, so using deprecated utrans_open for now
|
32
|
+
@tr = Lib.utrans_open(id, direction, nil, 0, @parse_error, status)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def transliterate(from)
|
37
|
+
# this is a bit unpleasant
|
38
|
+
|
39
|
+
unicode_size = from.unpack("U*").size
|
40
|
+
capacity = from.bytesize + 1
|
41
|
+
buf = UCharPointer.from_string(from)
|
42
|
+
limit = FFI::MemoryPointer.new :int32
|
43
|
+
text_length = FFI::MemoryPointer.new :int32
|
44
|
+
|
45
|
+
retried = false
|
46
|
+
|
47
|
+
begin
|
48
|
+
# resets to original size on retry
|
49
|
+
[limit, text_length].each do |ptr|
|
50
|
+
ptr.put_int32(0, unicode_size)
|
51
|
+
end
|
52
|
+
|
53
|
+
Lib.check_error do |error|
|
54
|
+
Lib.utrans_transUChars(@tr, buf, text_length, capacity, 0, limit, error)
|
55
|
+
end
|
56
|
+
rescue BufferOverflowError
|
57
|
+
new_size = text_length.get_int32(0)
|
58
|
+
$stderr.puts "BufferOverflowError, needs: #{new_size}" if $DEBUG
|
59
|
+
|
60
|
+
raise BufferOverflowError, "needed #{new_size}" if retried
|
61
|
+
|
62
|
+
capacity = new_size + 1
|
63
|
+
buf = buf.resized_to capacity
|
64
|
+
retried = true
|
65
|
+
|
66
|
+
retry
|
67
|
+
end
|
68
|
+
|
69
|
+
buf.string text_length.get_int32(0)
|
70
|
+
end
|
71
|
+
|
72
|
+
def close
|
73
|
+
Lib.utrans_close @tr
|
74
|
+
end
|
75
|
+
|
76
|
+
end # Transliterator
|
77
|
+
end # Translit
|
78
|
+
end # ICU
|
79
|
+
|
data/lib/ffi-icu/uchar.rb
CHANGED
@@ -1,11 +1,34 @@
|
|
1
1
|
module ICU
|
2
|
-
|
3
2
|
class UCharPointer < FFI::MemoryPointer
|
3
|
+
|
4
|
+
UCHAR_TYPE = :uint16 # not sure how platform-dependent this is..
|
5
|
+
TYPE_SIZE = FFI.type_size(UCHAR_TYPE)
|
6
|
+
|
4
7
|
def self.from_string(str)
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
+
str = str.encode("UTF-8") if str.respond_to? :encode
|
9
|
+
bytes = str.unpack("U*")
|
10
|
+
|
11
|
+
ptr = new UCHAR_TYPE, bytes.size
|
12
|
+
ptr.put_array_of_uint16 0, bytes
|
13
|
+
|
14
|
+
ptr
|
15
|
+
end
|
16
|
+
|
17
|
+
def resized_to(new_size)
|
18
|
+
raise "new_size must be larger than current size" if new_size < size
|
19
|
+
resized = self.class.new UCHAR_TYPE, new_size
|
20
|
+
resized.put_bytes(0, get_bytes(0, size))
|
21
|
+
|
22
|
+
resized
|
23
|
+
end
|
24
|
+
|
25
|
+
def string(length = nil)
|
26
|
+
length ||= size / TYPE_SIZE
|
27
|
+
|
28
|
+
wstring = get_array_of_uint16(0, length)
|
29
|
+
wstring.pack("U*")
|
8
30
|
end
|
9
|
-
end
|
10
31
|
|
11
|
-
|
32
|
+
|
33
|
+
end # UCharPointer
|
34
|
+
end # ICU
|
data/spec/collation_spec.rb
CHANGED
@@ -6,7 +6,7 @@ module ICU
|
|
6
6
|
module Collation
|
7
7
|
describe Collator do
|
8
8
|
|
9
|
-
before { @c = Collator.new("
|
9
|
+
before { @c = Collator.new("nb") }
|
10
10
|
after { @c.close }
|
11
11
|
|
12
12
|
it "should collate an array of strings" do
|
@@ -43,7 +43,8 @@ module ICU
|
|
43
43
|
end
|
44
44
|
|
45
45
|
it "should know if a string is equal to another" do
|
46
|
-
@c.should
|
46
|
+
@c.should be_equal("a", "a")
|
47
|
+
@c.should_not be_equal("a", "b")
|
47
48
|
end
|
48
49
|
|
49
50
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
module ICU
|
6
|
+
module Normalization
|
7
|
+
# http://bugs.icu-project.org/trac/browser/icu/trunk/source/test/cintltst/cnormtst.c
|
8
|
+
|
9
|
+
describe "Normalization" do
|
10
|
+
|
11
|
+
it "should normalize a string - decomposed" do
|
12
|
+
ICU::Normalization.normalize("Å", :nfd).unpack("U*").should == [65, 778]
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should normalize a string - composed" do
|
16
|
+
ICU::Normalization.normalize("Å", :nfc).unpack("U*").should == [197]
|
17
|
+
end
|
18
|
+
|
19
|
+
# TODO: add more normalization tests
|
20
|
+
|
21
|
+
|
22
|
+
end
|
23
|
+
end # Normalization
|
24
|
+
end # ICU
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
module ICU
|
6
|
+
describe Transliteration::Transliterator do
|
7
|
+
|
8
|
+
def transliterator(*args)
|
9
|
+
@t = Transliteration::Transliterator.new(*args)
|
10
|
+
end
|
11
|
+
|
12
|
+
after { @t.close if @t }
|
13
|
+
|
14
|
+
[
|
15
|
+
{ :id => "Any-Hex", :input => "abcde", :output => "\\u0061\\u0062\\u0063\\u0064\\u0065" },
|
16
|
+
{ :id => "Lower", :input => "ABC", :output => "abc" },
|
17
|
+
].each do |test|
|
18
|
+
|
19
|
+
it "should transliterate #{test[:id]}" do
|
20
|
+
transliterator(test[:id]).transliterate(test[:input]).should == test[:output]
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end # Transliterator
|
25
|
+
|
26
|
+
describe Transliteration do
|
27
|
+
it "should provide a list of available ids" do
|
28
|
+
ids = ICU::Transliteration.available_ids
|
29
|
+
ids.should be_kind_of(Array)
|
30
|
+
ids.should_not be_empty
|
31
|
+
end
|
32
|
+
end # Transliteration
|
33
|
+
end # ICU
|
data/test.c
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
#include <unicode/unorm.h>
|
2
|
+
#include <unicode/ustdio.h>
|
3
|
+
#include <stdio.h>
|
4
|
+
#include <stdarg.h>
|
5
|
+
#include <iconv.h>
|
6
|
+
#include <string.h>
|
7
|
+
#include <stdlib.h>
|
8
|
+
|
9
|
+
void print_uchars(UChar *str) {
|
10
|
+
UFILE *out = u_finit(stdout, NULL, NULL);
|
11
|
+
u_fprintf(out, "uchars: %S\n", str);
|
12
|
+
u_fclose(out);
|
13
|
+
}
|
14
|
+
|
15
|
+
void print_error(UErrorCode status) {
|
16
|
+
printf("err: %s (%d)\n", u_errorName(status), status);
|
17
|
+
}
|
18
|
+
|
19
|
+
int main (int argc, char const *argv[])
|
20
|
+
{
|
21
|
+
|
22
|
+
UTransliterator* trans = NULL;
|
23
|
+
UErrorCode status = U_ZERO_ERROR;
|
24
|
+
|
25
|
+
trans = utrans_open("Any-Hex", UTRANS_FORWARD, NULL, 0, NULL, &status);
|
26
|
+
if(U_FAILURE(status)) {
|
27
|
+
print_error(status);
|
28
|
+
exit(1);
|
29
|
+
}
|
30
|
+
|
31
|
+
UChar from[256];
|
32
|
+
UChar buf[6];
|
33
|
+
|
34
|
+
int32_t text_length, limit;
|
35
|
+
|
36
|
+
u_uastrcpy(from, "abcde");
|
37
|
+
u_strcpy(buf, from);
|
38
|
+
|
39
|
+
limit = text_length = u_strlen(buf);
|
40
|
+
printf("limit: %d\n", limit);
|
41
|
+
printf("text_length: %d\n", limit);
|
42
|
+
|
43
|
+
utrans_transUChars(trans, buf, &text_length, 256, 0, &limit, &status);
|
44
|
+
|
45
|
+
printf("uchar ptr length after: %d\n", u_strlen(buf));
|
46
|
+
printf("text_length after: %d\n", text_length);
|
47
|
+
|
48
|
+
if(U_FAILURE(status)) {
|
49
|
+
print_error(status);
|
50
|
+
exit(1);
|
51
|
+
}
|
52
|
+
|
53
|
+
print_uchars(buf);
|
54
|
+
|
55
|
+
return 0;
|
56
|
+
}
|
metadata
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ffi-icu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
4
5
|
prerelease: false
|
5
6
|
segments:
|
6
7
|
- 0
|
7
8
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
9
|
+
- 2
|
10
|
+
version: 0.0.2
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
13
|
- Jari Bakken
|
@@ -14,16 +15,18 @@ autorequire:
|
|
14
15
|
bindir: bin
|
15
16
|
cert_chain: []
|
16
17
|
|
17
|
-
date: 2010-05-
|
18
|
+
date: 2010-05-29 00:00:00 +02:00
|
18
19
|
default_executable:
|
19
20
|
dependencies:
|
20
21
|
- !ruby/object:Gem::Dependency
|
21
22
|
name: ffi
|
22
23
|
prerelease: false
|
23
24
|
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
24
26
|
requirements:
|
25
27
|
- - ">="
|
26
28
|
- !ruby/object:Gem::Version
|
29
|
+
hash: 1
|
27
30
|
segments:
|
28
31
|
- 0
|
29
32
|
- 6
|
@@ -35,9 +38,11 @@ dependencies:
|
|
35
38
|
name: rspec
|
36
39
|
prerelease: false
|
37
40
|
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
38
42
|
requirements:
|
39
43
|
- - ">="
|
40
44
|
- !ruby/object:Gem::Version
|
45
|
+
hash: 27
|
41
46
|
segments:
|
42
47
|
- 1
|
43
48
|
- 3
|
@@ -67,11 +72,16 @@ files:
|
|
67
72
|
- lib/ffi-icu/chardet.rb
|
68
73
|
- lib/ffi-icu/collation.rb
|
69
74
|
- lib/ffi-icu/lib.rb
|
75
|
+
- lib/ffi-icu/normalization.rb
|
76
|
+
- lib/ffi-icu/transliteration.rb
|
70
77
|
- lib/ffi-icu/uchar.rb
|
71
78
|
- spec/chardet_spec.rb
|
72
79
|
- spec/collation_spec.rb
|
80
|
+
- spec/normalization_spec.rb
|
73
81
|
- spec/spec.opts
|
74
82
|
- spec/spec_helper.rb
|
83
|
+
- spec/transliteration_spec.rb
|
84
|
+
- test.c
|
75
85
|
has_rdoc: true
|
76
86
|
homepage: http://github.com/jarib/ffi-icu
|
77
87
|
licenses: []
|
@@ -82,27 +92,33 @@ rdoc_options:
|
|
82
92
|
require_paths:
|
83
93
|
- lib
|
84
94
|
required_ruby_version: !ruby/object:Gem::Requirement
|
95
|
+
none: false
|
85
96
|
requirements:
|
86
97
|
- - ">="
|
87
98
|
- !ruby/object:Gem::Version
|
99
|
+
hash: 3
|
88
100
|
segments:
|
89
101
|
- 0
|
90
102
|
version: "0"
|
91
103
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
104
|
+
none: false
|
92
105
|
requirements:
|
93
106
|
- - ">="
|
94
107
|
- !ruby/object:Gem::Version
|
108
|
+
hash: 3
|
95
109
|
segments:
|
96
110
|
- 0
|
97
111
|
version: "0"
|
98
112
|
requirements: []
|
99
113
|
|
100
114
|
rubyforge_project:
|
101
|
-
rubygems_version: 1.3.
|
115
|
+
rubygems_version: 1.3.7
|
102
116
|
signing_key:
|
103
117
|
specification_version: 3
|
104
118
|
summary: Simple FFI wrappers for things I need from ICU.
|
105
119
|
test_files:
|
106
120
|
- spec/chardet_spec.rb
|
121
|
+
- spec/normalization_spec.rb
|
122
|
+
- spec/transliteration_spec.rb
|
107
123
|
- spec/spec_helper.rb
|
108
124
|
- spec/collation_spec.rb
|