ffi-icu 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/README.rdoc +17 -3
- data/VERSION +1 -1
- data/lib/ffi-icu.rb +10 -0
- data/lib/ffi-icu/chardet.rb +1 -1
- data/lib/ffi-icu/collation.rb +9 -2
- data/lib/ffi-icu/lib.rb +85 -14
- data/lib/ffi-icu/normalization.rb +29 -0
- data/lib/ffi-icu/transliteration.rb +79 -0
- data/lib/ffi-icu/uchar.rb +29 -6
- data/spec/collation_spec.rb +3 -2
- data/spec/normalization_spec.rb +24 -0
- data/spec/transliteration_spec.rb +33 -0
- data/test.c +56 -0
- metadata +20 -4
data/.gitignore
CHANGED
data/README.rdoc
CHANGED
@@ -1,10 +1,19 @@
|
|
1
1
|
= ffi-icu
|
2
2
|
|
3
|
-
Simple FFI wrappers for things I need from ICU.
|
3
|
+
Simple FFI wrappers for things I need from ICU. For the full thing, check out ICU4R instead.
|
4
|
+
|
5
|
+
= Gem
|
6
|
+
|
7
|
+
* http://rubygems.org/gems/ffi-icu
|
8
|
+
|
9
|
+
gem install ffi-icu
|
4
10
|
|
5
11
|
= Dependencies
|
6
12
|
|
7
|
-
ICU
|
13
|
+
ICU. If you get messages that the library or functions are not found, you can
|
14
|
+
set some environment varibles to tell ffi-icu where to find it, i.e.:
|
15
|
+
|
16
|
+
FFI_ICU_LIB="icui18n.so" FFI_ICU_VERSION_SUFFIX="_3_8" ruby -r ffi-icu
|
8
17
|
|
9
18
|
= Features
|
10
19
|
|
@@ -54,7 +63,12 @@ Rubies:
|
|
54
63
|
* MRI 1.9.1
|
55
64
|
* MRI 1.8.7
|
56
65
|
|
57
|
-
|
66
|
+
= TODO:
|
67
|
+
|
68
|
+
* Useful ICU stuff:
|
69
|
+
- number formatting (decimal points, thousand separators, currency)
|
70
|
+
- date formatting
|
71
|
+
* Windows?!
|
58
72
|
|
59
73
|
== Note on Patches/Pull Requests
|
60
74
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.2
|
data/lib/ffi-icu.rb
CHANGED
@@ -14,10 +14,20 @@ module ICU
|
|
14
14
|
os
|
15
15
|
end
|
16
16
|
end
|
17
|
+
|
18
|
+
def self.ruby19?
|
19
|
+
RUBY_VERSION >= '1.9'
|
20
|
+
end
|
17
21
|
end
|
18
22
|
|
19
23
|
require "ffi-icu/lib"
|
20
24
|
require "ffi-icu/uchar"
|
21
25
|
require "ffi-icu/chardet"
|
22
26
|
require "ffi-icu/collation"
|
27
|
+
require "ffi-icu/transliteration"
|
28
|
+
require "ffi-icu/normalization"
|
23
29
|
|
30
|
+
unless ICU.ruby19?
|
31
|
+
require 'jcode'
|
32
|
+
$KCODE = 'u'
|
33
|
+
end
|
data/lib/ffi-icu/chardet.rb
CHANGED
@@ -44,7 +44,7 @@ module ICU
|
|
44
44
|
def detect_all(str)
|
45
45
|
set_text(str)
|
46
46
|
|
47
|
-
matches_found_ptr = FFI::MemoryPointer.new :
|
47
|
+
matches_found_ptr = FFI::MemoryPointer.new :int32_t
|
48
48
|
array_ptr = Lib.check_error do |status|
|
49
49
|
Lib.ucsdet_detectAll(@detector, matches_found_ptr, status)
|
50
50
|
end
|
data/lib/ffi-icu/collation.rb
CHANGED
@@ -59,8 +59,15 @@ module ICU
|
|
59
59
|
UCharPointer.from_string(b), b.length)
|
60
60
|
end
|
61
61
|
|
62
|
-
|
63
|
-
|
62
|
+
def equal?(*args)
|
63
|
+
return super() if args.empty?
|
64
|
+
|
65
|
+
if args.size != 2
|
66
|
+
raise ArgumentError, "wrong number of arguments (#{args.size} for 2)"
|
67
|
+
end
|
68
|
+
|
69
|
+
a, b = args
|
70
|
+
|
64
71
|
Lib.ucol_equal(@c, UCharPointer.from_string(a), a.length,
|
65
72
|
UCharPointer.from_string(b), b.length)
|
66
73
|
end
|
data/lib/ffi-icu/lib.rb
CHANGED
@@ -2,6 +2,9 @@ module ICU
|
|
2
2
|
class Error < StandardError
|
3
3
|
end
|
4
4
|
|
5
|
+
class BufferOverflowError < StandardError
|
6
|
+
end
|
7
|
+
|
5
8
|
module Lib
|
6
9
|
extend FFI::Library
|
7
10
|
|
@@ -14,6 +17,21 @@ module ICU
|
|
14
17
|
def self.find_icu
|
15
18
|
suffix = ''
|
16
19
|
|
20
|
+
# let the user tell us where the lib is
|
21
|
+
if ENV['FFI_ICU_LIB']
|
22
|
+
libs = ENV['FFI_ICU_LIB'].split(",")
|
23
|
+
ffi_lib *libs
|
24
|
+
|
25
|
+
if ENV['FFI_ICU_VERSION_SUFFIX']
|
26
|
+
return ENV['FFI_ICU_VERSION_SUFFIX']
|
27
|
+
elsif num = libs.first[/\d+$/]
|
28
|
+
return num.split(//).join("_")
|
29
|
+
else
|
30
|
+
return suffix
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# ok, try to find it
|
17
35
|
case ICU.platform
|
18
36
|
when :osx
|
19
37
|
ffi_lib "icucore"
|
@@ -40,7 +58,12 @@ module ICU
|
|
40
58
|
error_code = ptr.read_int
|
41
59
|
|
42
60
|
if error_code > 0
|
43
|
-
|
61
|
+
name = Lib.u_errorName error_code
|
62
|
+
if name == "U_BUFFER_OVERFLOW_ERROR"
|
63
|
+
raise BufferOverflowError
|
64
|
+
else
|
65
|
+
raise Error, name
|
66
|
+
end
|
44
67
|
elsif error_code < 0
|
45
68
|
warn "ffi-icu: #{Lib.u_errorName error_code}"
|
46
69
|
end
|
@@ -49,12 +72,14 @@ module ICU
|
|
49
72
|
end
|
50
73
|
|
51
74
|
def self.enum_ptr_to_array(enum_ptr)
|
52
|
-
length =
|
53
|
-
|
75
|
+
length = check_error do |status|
|
76
|
+
uenum_count(enum_ptr, status)
|
54
77
|
end
|
55
78
|
|
79
|
+
len = FFI::MemoryPointer.new(:int)
|
80
|
+
|
56
81
|
(0...length).map do |idx|
|
57
|
-
|
82
|
+
check_error { |st| uenum_next(enum_ptr, len, st) }
|
58
83
|
end
|
59
84
|
end
|
60
85
|
|
@@ -71,7 +96,8 @@ module ICU
|
|
71
96
|
attach_function :uenum_count, "uenum_count#{suffix}", [:pointer, :pointer], :int
|
72
97
|
attach_function :uenum_close, "uenum_close#{suffix}", [:pointer], :void
|
73
98
|
attach_function :uenum_next, "uenum_next#{suffix}", [:pointer, :pointer, :pointer], :string
|
74
|
-
|
99
|
+
attach_function :u_charsToUChars, "u_charsToUChars#{suffix}", [:string, :pointer, :int32_t], :void
|
100
|
+
attach_function :u_UCharsToChars, "u_UCharsToChars#{suffix}", [:pointer, :string, :int32_t], :void
|
75
101
|
|
76
102
|
# CharDet
|
77
103
|
#
|
@@ -80,12 +106,12 @@ module ICU
|
|
80
106
|
|
81
107
|
attach_function :ucsdet_open, "ucsdet_open#{suffix}", [:pointer], :pointer
|
82
108
|
attach_function :ucsdet_close, "ucsdet_close#{suffix}", [:pointer], :void
|
83
|
-
attach_function :ucsdet_setText, "ucsdet_setText#{suffix}", [:pointer, :string, :
|
84
|
-
attach_function :ucsdet_setDeclaredEncoding, "ucsdet_setDeclaredEncoding#{suffix}", [:pointer, :string, :
|
109
|
+
attach_function :ucsdet_setText, "ucsdet_setText#{suffix}", [:pointer, :string, :int32_t, :pointer], :void
|
110
|
+
attach_function :ucsdet_setDeclaredEncoding, "ucsdet_setDeclaredEncoding#{suffix}", [:pointer, :string, :int32_t, :pointer], :void
|
85
111
|
attach_function :ucsdet_detect, "ucsdet_detect#{suffix}", [:pointer, :pointer], :pointer
|
86
112
|
attach_function :ucsdet_detectAll, "ucsdet_detectAll#{suffix}", [:pointer, :pointer, :pointer], :pointer
|
87
113
|
attach_function :ucsdet_getName, "ucsdet_getName#{suffix}", [:pointer, :pointer], :string
|
88
|
-
attach_function :ucsdet_getConfidence, "ucsdet_getConfidence#{suffix}", [:pointer, :pointer], :
|
114
|
+
attach_function :ucsdet_getConfidence, "ucsdet_getConfidence#{suffix}", [:pointer, :pointer], :int32_t
|
89
115
|
attach_function :ucsdet_getLanguage, "ucsdet_getLanguage#{suffix}", [:pointer, :pointer], :string
|
90
116
|
attach_function :ucsdet_getAllDetectableCharsets, "ucsdet_getAllDetectableCharsets#{suffix}", [:pointer, :pointer], :pointer
|
91
117
|
attach_function :ucsdet_isInputFilterEnabled, "ucsdet_isInputFilterEnabled#{suffix}", [:pointer], :bool
|
@@ -98,15 +124,60 @@ module ICU
|
|
98
124
|
|
99
125
|
attach_function :ucol_open, "ucol_open#{suffix}", [:string, :pointer], :pointer
|
100
126
|
attach_function :ucol_close, "ucol_close#{suffix}", [:pointer], :void
|
101
|
-
attach_function :ucol_strcoll, "ucol_strcoll#{suffix}", [:pointer, :pointer, :
|
127
|
+
attach_function :ucol_strcoll, "ucol_strcoll#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :int32_t], :int
|
102
128
|
attach_function :ucol_getKeywords, "ucol_getKeywords#{suffix}", [:pointer], :pointer
|
103
129
|
attach_function :ucol_getKeywordValues, "ucol_getKeywordValues#{suffix}", [:string, :pointer], :pointer
|
104
|
-
attach_function :ucol_getAvailable, "ucol_getAvailable#{suffix}", [:
|
105
|
-
attach_function :ucol_countAvailable, "ucol_countAvailable#{suffix}", [], :
|
130
|
+
attach_function :ucol_getAvailable, "ucol_getAvailable#{suffix}", [:int32_t], :string
|
131
|
+
attach_function :ucol_countAvailable, "ucol_countAvailable#{suffix}", [], :int32_t
|
106
132
|
attach_function :ucol_getLocale, "ucol_getLocale#{suffix}", [:pointer, :int, :pointer], :string
|
107
|
-
attach_function :ucol_greater, "ucol_greater#{suffix}", [:pointer, :pointer, :
|
108
|
-
attach_function :ucol_greaterOrEqual, "ucol_greaterOrEqual#{suffix}", [:pointer, :pointer, :
|
109
|
-
attach_function :ucol_equal, "ucol_equal#{suffix}", [:pointer, :pointer, :
|
133
|
+
attach_function :ucol_greater, "ucol_greater#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :int32_t], :bool
|
134
|
+
attach_function :ucol_greaterOrEqual, "ucol_greaterOrEqual#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :int32_t], :bool
|
135
|
+
attach_function :ucol_equal, "ucol_equal#{suffix}", [:pointer, :pointer, :int32_t, :pointer, :int32_t], :bool
|
136
|
+
|
137
|
+
# Transliteration
|
138
|
+
#
|
139
|
+
# http://icu-project.org/apiref/icu4c/utrans_8h.html
|
140
|
+
#
|
141
|
+
|
142
|
+
class UParseError < FFI::Struct
|
143
|
+
layout :line, :int32_t,
|
144
|
+
:offset, :int32_t,
|
145
|
+
:pre_context, :pointer,
|
146
|
+
:post_context, :pointer
|
147
|
+
|
148
|
+
|
149
|
+
end
|
150
|
+
|
151
|
+
class UTransPosition < FFI::Struct
|
152
|
+
layout :context_start, :int32_t,
|
153
|
+
:context_limit, :int32_t,
|
154
|
+
:start, :int32_t,
|
155
|
+
:end, :int32_t
|
156
|
+
|
157
|
+
end
|
158
|
+
|
159
|
+
enum :trans_direction, [:forward, :reverse]
|
160
|
+
|
161
|
+
attach_function :utrans_openIDs, "utrans_openIDs#{suffix}", [:pointer], :pointer
|
162
|
+
attach_function :utrans_openU, "utrans_openU#{suffix}", [:pointer, :int32_t, :trans_direction, :pointer, :int32_t, :pointer, :pointer], :pointer
|
163
|
+
attach_function :utrans_open, "utrans_open#{suffix}", [:string, :trans_direction, :pointer, :int32_t, :pointer, :pointer], :pointer
|
164
|
+
attach_function :utrans_close, "utrans_close#{suffix}", [:pointer], :void
|
165
|
+
attach_function :utrans_transUChars, "utrans_transUChars#{suffix}", [:pointer, :pointer, :pointer, :int32_t, :int32_t, :pointer, :pointer], :void
|
166
|
+
|
167
|
+
# Normalization
|
168
|
+
#
|
169
|
+
# http://icu-project.org/apiref/icu4c/unorm_8h.html
|
170
|
+
#
|
171
|
+
|
172
|
+
enum :normalization_mode, [ :none, 1,
|
173
|
+
:nfd, 2,
|
174
|
+
:nfkd, 3,
|
175
|
+
:nfc, 4,
|
176
|
+
:default, 4,
|
177
|
+
:nfkc, 5,
|
178
|
+
:fcd, 6
|
179
|
+
]
|
110
180
|
|
181
|
+
attach_function :unorm_normalize, "unorm_normalize#{suffix}", [:pointer, :int32_t, :normalization_mode, :int32_t, :pointer, :int32_t, :pointer], :int32_t
|
111
182
|
end # Lib
|
112
183
|
end # ICU
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module ICU
|
2
|
+
module Normalization
|
3
|
+
|
4
|
+
def self.normalize(input, mode = :default)
|
5
|
+
input_length = ICU.ruby19? ? input.length : input.jlength
|
6
|
+
needed_length = 0
|
7
|
+
result_length = 0
|
8
|
+
|
9
|
+
retried = false
|
10
|
+
ptr = nil
|
11
|
+
|
12
|
+
begin
|
13
|
+
Lib.check_error do |error|
|
14
|
+
needed_length = Lib.unorm_normalize(UCharPointer.from_string(input), input_length, mode, 0, ptr, result_length, error)
|
15
|
+
end
|
16
|
+
rescue BufferOverflowError
|
17
|
+
raise if retried
|
18
|
+
ptr = UCharPointer.from_string("\0" * needed_length)
|
19
|
+
result_length = needed_length + 1
|
20
|
+
|
21
|
+
retried = true
|
22
|
+
retry
|
23
|
+
end
|
24
|
+
|
25
|
+
ptr.string if ptr
|
26
|
+
end
|
27
|
+
|
28
|
+
end # Normalization
|
29
|
+
end # ICU
|
@@ -0,0 +1,79 @@
|
|
1
|
+
module ICU
|
2
|
+
module Transliteration
|
3
|
+
|
4
|
+
class << self
|
5
|
+
def transliterate(translit_id, str)
|
6
|
+
t = Transliterator.new translit_id
|
7
|
+
res = t.transliterate str
|
8
|
+
t.close
|
9
|
+
|
10
|
+
res
|
11
|
+
end
|
12
|
+
alias_method :translit, :transliterate
|
13
|
+
|
14
|
+
def available_ids
|
15
|
+
enum_ptr = Lib.check_error do |error|
|
16
|
+
Lib.utrans_openIDs(error)
|
17
|
+
end
|
18
|
+
|
19
|
+
result = Lib.enum_ptr_to_array(enum_ptr)
|
20
|
+
Lib.uenum_close(enum_ptr)
|
21
|
+
|
22
|
+
result
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class Transliterator
|
27
|
+
|
28
|
+
def initialize(id, direction = :forward)
|
29
|
+
@parse_error = Lib::UParseError.new
|
30
|
+
Lib.check_error do |status|
|
31
|
+
# couldn't get utrans_openU to work properly, so using deprecated utrans_open for now
|
32
|
+
@tr = Lib.utrans_open(id, direction, nil, 0, @parse_error, status)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def transliterate(from)
|
37
|
+
# this is a bit unpleasant
|
38
|
+
|
39
|
+
unicode_size = from.unpack("U*").size
|
40
|
+
capacity = from.bytesize + 1
|
41
|
+
buf = UCharPointer.from_string(from)
|
42
|
+
limit = FFI::MemoryPointer.new :int32
|
43
|
+
text_length = FFI::MemoryPointer.new :int32
|
44
|
+
|
45
|
+
retried = false
|
46
|
+
|
47
|
+
begin
|
48
|
+
# resets to original size on retry
|
49
|
+
[limit, text_length].each do |ptr|
|
50
|
+
ptr.put_int32(0, unicode_size)
|
51
|
+
end
|
52
|
+
|
53
|
+
Lib.check_error do |error|
|
54
|
+
Lib.utrans_transUChars(@tr, buf, text_length, capacity, 0, limit, error)
|
55
|
+
end
|
56
|
+
rescue BufferOverflowError
|
57
|
+
new_size = text_length.get_int32(0)
|
58
|
+
$stderr.puts "BufferOverflowError, needs: #{new_size}" if $DEBUG
|
59
|
+
|
60
|
+
raise BufferOverflowError, "needed #{new_size}" if retried
|
61
|
+
|
62
|
+
capacity = new_size + 1
|
63
|
+
buf = buf.resized_to capacity
|
64
|
+
retried = true
|
65
|
+
|
66
|
+
retry
|
67
|
+
end
|
68
|
+
|
69
|
+
buf.string text_length.get_int32(0)
|
70
|
+
end
|
71
|
+
|
72
|
+
def close
|
73
|
+
Lib.utrans_close @tr
|
74
|
+
end
|
75
|
+
|
76
|
+
end # Transliterator
|
77
|
+
end # Translit
|
78
|
+
end # ICU
|
79
|
+
|
data/lib/ffi-icu/uchar.rb
CHANGED
@@ -1,11 +1,34 @@
|
|
1
1
|
module ICU
|
2
|
-
|
3
2
|
class UCharPointer < FFI::MemoryPointer
|
3
|
+
|
4
|
+
UCHAR_TYPE = :uint16 # not sure how platform-dependent this is..
|
5
|
+
TYPE_SIZE = FFI.type_size(UCHAR_TYPE)
|
6
|
+
|
4
7
|
def self.from_string(str)
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
+
str = str.encode("UTF-8") if str.respond_to? :encode
|
9
|
+
bytes = str.unpack("U*")
|
10
|
+
|
11
|
+
ptr = new UCHAR_TYPE, bytes.size
|
12
|
+
ptr.put_array_of_uint16 0, bytes
|
13
|
+
|
14
|
+
ptr
|
15
|
+
end
|
16
|
+
|
17
|
+
def resized_to(new_size)
|
18
|
+
raise "new_size must be larger than current size" if new_size < size
|
19
|
+
resized = self.class.new UCHAR_TYPE, new_size
|
20
|
+
resized.put_bytes(0, get_bytes(0, size))
|
21
|
+
|
22
|
+
resized
|
23
|
+
end
|
24
|
+
|
25
|
+
def string(length = nil)
|
26
|
+
length ||= size / TYPE_SIZE
|
27
|
+
|
28
|
+
wstring = get_array_of_uint16(0, length)
|
29
|
+
wstring.pack("U*")
|
8
30
|
end
|
9
|
-
end
|
10
31
|
|
11
|
-
|
32
|
+
|
33
|
+
end # UCharPointer
|
34
|
+
end # ICU
|
data/spec/collation_spec.rb
CHANGED
@@ -6,7 +6,7 @@ module ICU
|
|
6
6
|
module Collation
|
7
7
|
describe Collator do
|
8
8
|
|
9
|
-
before { @c = Collator.new("
|
9
|
+
before { @c = Collator.new("nb") }
|
10
10
|
after { @c.close }
|
11
11
|
|
12
12
|
it "should collate an array of strings" do
|
@@ -43,7 +43,8 @@ module ICU
|
|
43
43
|
end
|
44
44
|
|
45
45
|
it "should know if a string is equal to another" do
|
46
|
-
@c.should
|
46
|
+
@c.should be_equal("a", "a")
|
47
|
+
@c.should_not be_equal("a", "b")
|
47
48
|
end
|
48
49
|
|
49
50
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
module ICU
|
6
|
+
module Normalization
|
7
|
+
# http://bugs.icu-project.org/trac/browser/icu/trunk/source/test/cintltst/cnormtst.c
|
8
|
+
|
9
|
+
describe "Normalization" do
|
10
|
+
|
11
|
+
it "should normalize a string - decomposed" do
|
12
|
+
ICU::Normalization.normalize("Å", :nfd).unpack("U*").should == [65, 778]
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should normalize a string - composed" do
|
16
|
+
ICU::Normalization.normalize("Å", :nfc).unpack("U*").should == [197]
|
17
|
+
end
|
18
|
+
|
19
|
+
# TODO: add more normalization tests
|
20
|
+
|
21
|
+
|
22
|
+
end
|
23
|
+
end # Normalization
|
24
|
+
end # ICU
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
module ICU
|
6
|
+
describe Transliteration::Transliterator do
|
7
|
+
|
8
|
+
def transliterator(*args)
|
9
|
+
@t = Transliteration::Transliterator.new(*args)
|
10
|
+
end
|
11
|
+
|
12
|
+
after { @t.close if @t }
|
13
|
+
|
14
|
+
[
|
15
|
+
{ :id => "Any-Hex", :input => "abcde", :output => "\\u0061\\u0062\\u0063\\u0064\\u0065" },
|
16
|
+
{ :id => "Lower", :input => "ABC", :output => "abc" },
|
17
|
+
].each do |test|
|
18
|
+
|
19
|
+
it "should transliterate #{test[:id]}" do
|
20
|
+
transliterator(test[:id]).transliterate(test[:input]).should == test[:output]
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end # Transliterator
|
25
|
+
|
26
|
+
describe Transliteration do
|
27
|
+
it "should provide a list of available ids" do
|
28
|
+
ids = ICU::Transliteration.available_ids
|
29
|
+
ids.should be_kind_of(Array)
|
30
|
+
ids.should_not be_empty
|
31
|
+
end
|
32
|
+
end # Transliteration
|
33
|
+
end # ICU
|
data/test.c
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
#include <unicode/unorm.h>
|
2
|
+
#include <unicode/ustdio.h>
|
3
|
+
#include <stdio.h>
|
4
|
+
#include <stdarg.h>
|
5
|
+
#include <iconv.h>
|
6
|
+
#include <string.h>
|
7
|
+
#include <stdlib.h>
|
8
|
+
|
9
|
+
void print_uchars(UChar *str) {
|
10
|
+
UFILE *out = u_finit(stdout, NULL, NULL);
|
11
|
+
u_fprintf(out, "uchars: %S\n", str);
|
12
|
+
u_fclose(out);
|
13
|
+
}
|
14
|
+
|
15
|
+
void print_error(UErrorCode status) {
|
16
|
+
printf("err: %s (%d)\n", u_errorName(status), status);
|
17
|
+
}
|
18
|
+
|
19
|
+
int main (int argc, char const *argv[])
|
20
|
+
{
|
21
|
+
|
22
|
+
UTransliterator* trans = NULL;
|
23
|
+
UErrorCode status = U_ZERO_ERROR;
|
24
|
+
|
25
|
+
trans = utrans_open("Any-Hex", UTRANS_FORWARD, NULL, 0, NULL, &status);
|
26
|
+
if(U_FAILURE(status)) {
|
27
|
+
print_error(status);
|
28
|
+
exit(1);
|
29
|
+
}
|
30
|
+
|
31
|
+
UChar from[256];
|
32
|
+
UChar buf[6];
|
33
|
+
|
34
|
+
int32_t text_length, limit;
|
35
|
+
|
36
|
+
u_uastrcpy(from, "abcde");
|
37
|
+
u_strcpy(buf, from);
|
38
|
+
|
39
|
+
limit = text_length = u_strlen(buf);
|
40
|
+
printf("limit: %d\n", limit);
|
41
|
+
printf("text_length: %d\n", limit);
|
42
|
+
|
43
|
+
utrans_transUChars(trans, buf, &text_length, 256, 0, &limit, &status);
|
44
|
+
|
45
|
+
printf("uchar ptr length after: %d\n", u_strlen(buf));
|
46
|
+
printf("text_length after: %d\n", text_length);
|
47
|
+
|
48
|
+
if(U_FAILURE(status)) {
|
49
|
+
print_error(status);
|
50
|
+
exit(1);
|
51
|
+
}
|
52
|
+
|
53
|
+
print_uchars(buf);
|
54
|
+
|
55
|
+
return 0;
|
56
|
+
}
|
metadata
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ffi-icu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
4
5
|
prerelease: false
|
5
6
|
segments:
|
6
7
|
- 0
|
7
8
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
9
|
+
- 2
|
10
|
+
version: 0.0.2
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
13
|
- Jari Bakken
|
@@ -14,16 +15,18 @@ autorequire:
|
|
14
15
|
bindir: bin
|
15
16
|
cert_chain: []
|
16
17
|
|
17
|
-
date: 2010-05-
|
18
|
+
date: 2010-05-29 00:00:00 +02:00
|
18
19
|
default_executable:
|
19
20
|
dependencies:
|
20
21
|
- !ruby/object:Gem::Dependency
|
21
22
|
name: ffi
|
22
23
|
prerelease: false
|
23
24
|
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
24
26
|
requirements:
|
25
27
|
- - ">="
|
26
28
|
- !ruby/object:Gem::Version
|
29
|
+
hash: 1
|
27
30
|
segments:
|
28
31
|
- 0
|
29
32
|
- 6
|
@@ -35,9 +38,11 @@ dependencies:
|
|
35
38
|
name: rspec
|
36
39
|
prerelease: false
|
37
40
|
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
38
42
|
requirements:
|
39
43
|
- - ">="
|
40
44
|
- !ruby/object:Gem::Version
|
45
|
+
hash: 27
|
41
46
|
segments:
|
42
47
|
- 1
|
43
48
|
- 3
|
@@ -67,11 +72,16 @@ files:
|
|
67
72
|
- lib/ffi-icu/chardet.rb
|
68
73
|
- lib/ffi-icu/collation.rb
|
69
74
|
- lib/ffi-icu/lib.rb
|
75
|
+
- lib/ffi-icu/normalization.rb
|
76
|
+
- lib/ffi-icu/transliteration.rb
|
70
77
|
- lib/ffi-icu/uchar.rb
|
71
78
|
- spec/chardet_spec.rb
|
72
79
|
- spec/collation_spec.rb
|
80
|
+
- spec/normalization_spec.rb
|
73
81
|
- spec/spec.opts
|
74
82
|
- spec/spec_helper.rb
|
83
|
+
- spec/transliteration_spec.rb
|
84
|
+
- test.c
|
75
85
|
has_rdoc: true
|
76
86
|
homepage: http://github.com/jarib/ffi-icu
|
77
87
|
licenses: []
|
@@ -82,27 +92,33 @@ rdoc_options:
|
|
82
92
|
require_paths:
|
83
93
|
- lib
|
84
94
|
required_ruby_version: !ruby/object:Gem::Requirement
|
95
|
+
none: false
|
85
96
|
requirements:
|
86
97
|
- - ">="
|
87
98
|
- !ruby/object:Gem::Version
|
99
|
+
hash: 3
|
88
100
|
segments:
|
89
101
|
- 0
|
90
102
|
version: "0"
|
91
103
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
104
|
+
none: false
|
92
105
|
requirements:
|
93
106
|
- - ">="
|
94
107
|
- !ruby/object:Gem::Version
|
108
|
+
hash: 3
|
95
109
|
segments:
|
96
110
|
- 0
|
97
111
|
version: "0"
|
98
112
|
requirements: []
|
99
113
|
|
100
114
|
rubyforge_project:
|
101
|
-
rubygems_version: 1.3.
|
115
|
+
rubygems_version: 1.3.7
|
102
116
|
signing_key:
|
103
117
|
specification_version: 3
|
104
118
|
summary: Simple FFI wrappers for things I need from ICU.
|
105
119
|
test_files:
|
106
120
|
- spec/chardet_spec.rb
|
121
|
+
- spec/normalization_spec.rb
|
122
|
+
- spec/transliteration_spec.rb
|
107
123
|
- spec/spec_helper.rb
|
108
124
|
- spec/collation_spec.rb
|