icu4r_19 1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +87 -0
- data/MIT-LICENSE +20 -0
- data/README +156 -0
- data/Rakefile +32 -0
- data/calendar.c +636 -0
- data/collator.c +233 -0
- data/converter.c +322 -0
- data/docs/FORMATTING +131 -0
- data/docs/UNICODE_REGEXPS +204 -0
- data/extconf.rb +17 -0
- data/fmt.cpp +156 -0
- data/icu4r.c +18 -0
- data/icu_common.h +45 -0
- data/lib/dummy +0 -0
- data/samples/demo_each.rb +23 -0
- data/samples/demo_locales.rb +16 -0
- data/samples/demo_regexp.rb +11 -0
- data/samples/resbundle/appmsg/root.res +0 -0
- data/samples/resbundle/appmsg/ru.res +0 -0
- data/samples/resbundle/demo_bundle.rb +4 -0
- data/samples/resbundle/mkres.sh +4 -0
- data/samples/resbundle/root.txt +10 -0
- data/samples/resbundle/ru.txt +4 -0
- data/test/test_calendar.rb +123 -0
- data/test/test_collator.rb +33 -0
- data/test/test_converter.rb +72 -0
- data/test/test_ustring.rb +508 -0
- data/tools/doc.sh +2 -0
- data/tools/km.rb +425 -0
- data/ubundle.c +223 -0
- data/ucore_ext.c +168 -0
- data/uregex.c +697 -0
- data/uregex.h +27 -0
- data/ustring.c +3039 -0
- metadata +164 -0
data/collator.c
ADDED
@@ -0,0 +1,233 @@
|
|
1
|
+
#include "icu_common.h"
|
2
|
+
extern VALUE rb_cUString;
|
3
|
+
extern VALUE rb_cUCollator;
|
4
|
+
extern int icu_collator_cmp (UCollator * collator, VALUE str1, VALUE str2) ;
|
5
|
+
|
6
|
+
/**
|
7
|
+
* Document-class: UCollator
|
8
|
+
*
|
9
|
+
* API for UCollator performs locale-sensitive string comparison. You use this service to build searching and
|
10
|
+
* sorting routines for natural language text.
|
11
|
+
*
|
12
|
+
* Attributes that collation service understands:
|
13
|
+
*
|
14
|
+
* UCOL_FRENCH_COLLATION Attribute for direction of secondary weights - used in French. UCOL_ON, UCOL_OFF
|
15
|
+
*
|
16
|
+
* UCOL_ALTERNATE_HANDLING Attribute for handling variable elements. UCOL_NON_IGNORABLE (default), UCOL_SHIFTED
|
17
|
+
*
|
18
|
+
* UCOL_CASE_FIRST Controls the ordering of upper and lower case letters.
|
19
|
+
* UCOL_OFF (default), UCOL_UPPER_FIRST, UCOL_LOWER_FIRST
|
20
|
+
*
|
21
|
+
* UCOL_CASE_LEVEL Controls whether an extra case level (positioned before the third level) is
|
22
|
+
* generated or not. UCOL_OFF (default), UCOL_ON
|
23
|
+
*
|
24
|
+
* UCOL_NORMALIZATION_MODE Controls whether the normalization check and necessary normalizations are performed.
|
25
|
+
* When set to UCOL_ON, an incremental check is performed to see whether the input data
|
26
|
+
* is in the FCD form. If the data is not in the FCD form, incremental NFD normalization
|
27
|
+
* is performed.
|
28
|
+
*
|
29
|
+
* UCOL_DECOMPOSITION_MODE An alias for UCOL_NORMALIZATION_MODE attribute
|
30
|
+
*
|
31
|
+
* UCOL_STRENGTH The strength attribute.
|
32
|
+
* Can be either UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL.
|
33
|
+
* The usual strength for most locales (except Japanese) is tertiary.
|
34
|
+
*
|
35
|
+
* UCOL_HIRAGANA_QUATERNARY_MODE when turned on, this attribute positions Hiragana before all non-ignorables on
|
36
|
+
* quaternary level This is a sneaky way to produce JIS sort order
|
37
|
+
* UCOL_NUMERIC_COLLATION when turned on, this attribute generates a collation key for the numeric value of
|
38
|
+
* substrings of digits. This is a way to get '100' to sort AFTER '2'.
|
39
|
+
*
|
40
|
+
* Attribute values:
|
41
|
+
*
|
42
|
+
* UCOL_DEFAULT accepted by most attributes
|
43
|
+
* UCOL_PRIMARY Primary collation strength
|
44
|
+
* UCOL_SECONDARY Secondary collation strength
|
45
|
+
* UCOL_TERTIARY Tertiary collation strength
|
46
|
+
* UCOL_DEFAULT_STRENGTH Default collation strength
|
47
|
+
* UCOL_QUATERNARY Quaternary collation strength
|
48
|
+
* UCOL_IDENTICAL Identical collation strength
|
49
|
+
* UCOL_OFF Turn the feature off - works for
|
50
|
+
* UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL,
|
51
|
+
* UCOL_HIRAGANA_QUATERNARY_MODE & UCOL_DECOMPOSITION_MODE
|
52
|
+
*
|
53
|
+
* UCOL_ON Turn the feature on - works for UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL,
|
54
|
+
* UCOL_HIRAGANA_QUATERNARY_MODE & UCOL_DECOMPOSITION_MODE
|
55
|
+
*
|
56
|
+
* UCOL_SHIFTED Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted
|
57
|
+
* UCOL_NON_IGNORABLE Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable
|
58
|
+
* UCOL_LOWER_FIRST Valid for UCOL_CASE_FIRST - lower case sorts before upper case
|
59
|
+
* UCOL_UPPER_FIRST upper case sorts before lower case
|
60
|
+
**/
|
61
|
+
|
62
|
+
#define UCOLLATOR(obj) ((UCollator *)DATA_PTR(obj))
|
63
|
+
|
64
|
+
void icu4r_col_free(UCollator * col)
|
65
|
+
{
|
66
|
+
ucol_close(col);
|
67
|
+
}
|
68
|
+
static VALUE icu4r_col_alloc(VALUE klass)
|
69
|
+
{
|
70
|
+
return Data_Wrap_Struct(klass, 0, icu4r_col_free, 0);
|
71
|
+
}
|
72
|
+
/**
|
73
|
+
* call-seq:
|
74
|
+
* col = UCollator.new(locale = nil)
|
75
|
+
*
|
76
|
+
* Open a UCollator for comparing strings for the given locale containing the required collation rules.
|
77
|
+
* Special values for locales can be passed in - if +nil+ is passed for the locale, the default locale
|
78
|
+
* collation rules will be used. If empty string ("") or "root" are passed, UCA rules will be used.
|
79
|
+
*/
|
80
|
+
VALUE icu4r_col_init(int argc, VALUE * argv, VALUE self)
|
81
|
+
{
|
82
|
+
UCollator * col;
|
83
|
+
UErrorCode status = U_ZERO_ERROR;
|
84
|
+
VALUE loc;
|
85
|
+
char * locale = NULL;
|
86
|
+
if( rb_scan_args(argc, argv, "01", &loc))
|
87
|
+
{
|
88
|
+
Check_Type(loc, T_STRING);
|
89
|
+
locale = RSTRING_PTR(loc);
|
90
|
+
}
|
91
|
+
col = ucol_open(locale, &status);
|
92
|
+
ICU_RAISE(status);
|
93
|
+
DATA_PTR(self)=col;
|
94
|
+
return self;
|
95
|
+
}
|
96
|
+
|
97
|
+
/**
|
98
|
+
* call-seq:
|
99
|
+
* collator.strength
|
100
|
+
*
|
101
|
+
* Get the collation strength used in a UCollator. The strength influences how strings are compared.
|
102
|
+
**/
|
103
|
+
VALUE icu4r_col_get_strength(VALUE self)
|
104
|
+
{
|
105
|
+
return INT2NUM(ucol_getStrength(UCOLLATOR(self)));
|
106
|
+
}
|
107
|
+
|
108
|
+
/**
|
109
|
+
* call-seq:
|
110
|
+
* collator.strength = new_strength
|
111
|
+
*
|
112
|
+
* Sets the collation strength used in a UCollator. The strength influences how strings are compared.
|
113
|
+
**/
|
114
|
+
VALUE icu4r_col_set_strength(VALUE self, VALUE obj)
|
115
|
+
{
|
116
|
+
Check_Type(obj, T_FIXNUM);
|
117
|
+
ucol_setStrength(UCOLLATOR(self), FIX2INT(obj));
|
118
|
+
return Qnil;
|
119
|
+
}
|
120
|
+
|
121
|
+
/**
|
122
|
+
* call-seq:
|
123
|
+
* collator.get_attr(attribute)
|
124
|
+
* collator[attribute]
|
125
|
+
*
|
126
|
+
* Universal attribute setter. See above for valid attributes and their values
|
127
|
+
**/
|
128
|
+
VALUE icu4r_col_get_attr(VALUE self, VALUE obj)
|
129
|
+
{
|
130
|
+
UErrorCode status = U_ZERO_ERROR;
|
131
|
+
UColAttributeValue val;
|
132
|
+
Check_Type(obj, T_FIXNUM);
|
133
|
+
val = ucol_getAttribute(UCOLLATOR(self), FIX2INT(obj), &status);
|
134
|
+
ICU_RAISE(status);
|
135
|
+
return INT2FIX(val);
|
136
|
+
}
|
137
|
+
|
138
|
+
/**
|
139
|
+
* call-seq:
|
140
|
+
* collator.set_attr(attribute, value)
|
141
|
+
* collator[attribute]=value
|
142
|
+
*
|
143
|
+
* Universal attribute setter. See above for valid attributes and their values
|
144
|
+
**/
|
145
|
+
VALUE icu4r_col_set_attr(VALUE self, VALUE obj, VALUE new_val)
|
146
|
+
{
|
147
|
+
UErrorCode status = U_ZERO_ERROR;
|
148
|
+
Check_Type(obj, T_FIXNUM);
|
149
|
+
Check_Type(new_val, T_FIXNUM);
|
150
|
+
ucol_setAttribute(UCOLLATOR(self), FIX2INT(obj), FIX2INT(new_val), &status);
|
151
|
+
ICU_RAISE(status);
|
152
|
+
return Qnil;
|
153
|
+
}
|
154
|
+
/**
|
155
|
+
* call-seq:
|
156
|
+
* collator.strcoll(ustr1, ustr2)
|
157
|
+
*
|
158
|
+
* Compare two UString's. The strings will be compared using the options already specified.
|
159
|
+
**/
|
160
|
+
VALUE icu4r_col_strcoll(VALUE self, VALUE str1, VALUE str2)
|
161
|
+
{
|
162
|
+
Check_Class(str1, rb_cUString);
|
163
|
+
Check_Class(str2, rb_cUString);
|
164
|
+
return INT2FIX(icu_collator_cmp(UCOLLATOR(self), str1, str2));
|
165
|
+
}
|
166
|
+
/**
|
167
|
+
* call-seq:
|
168
|
+
* collator.sort_key(an_ustring) -> String
|
169
|
+
*
|
170
|
+
* Get a sort key for a string from a UCollator. Sort keys may be compared using strcmp.
|
171
|
+
**/
|
172
|
+
VALUE icu4r_col_sort_key(VALUE self, VALUE str)
|
173
|
+
{
|
174
|
+
int32_t needed , capa ;
|
175
|
+
unsigned char * buffer ;
|
176
|
+
VALUE ret;
|
177
|
+
Check_Class(str, rb_cUString);
|
178
|
+
capa = ICU_LEN(str);
|
179
|
+
buffer = ALLOC_N(unsigned char, capa);
|
180
|
+
needed = ucol_getSortKey(UCOLLATOR(self), ICU_PTR(str), ICU_LEN(str), buffer, capa);
|
181
|
+
if(needed > capa){
|
182
|
+
REALLOC_N(buffer,unsigned char, needed);
|
183
|
+
needed = ucol_getSortKey(UCOLLATOR(self), ICU_PTR(str), ICU_LEN(str), buffer, needed);
|
184
|
+
}
|
185
|
+
ret = rb_str_new((char *)buffer, needed);
|
186
|
+
free(buffer);
|
187
|
+
return ret;
|
188
|
+
}
|
189
|
+
void initialize_collator()
|
190
|
+
{
|
191
|
+
rb_cUCollator = rb_define_class("UCollator", rb_cObject);
|
192
|
+
rb_define_alloc_func(rb_cUCollator, icu4r_col_alloc);
|
193
|
+
|
194
|
+
rb_define_method(rb_cUCollator, "initialize", icu4r_col_init, -1);
|
195
|
+
rb_define_method(rb_cUCollator, "strength", icu4r_col_get_strength, 0);
|
196
|
+
rb_define_method(rb_cUCollator, "strength=", icu4r_col_set_strength, 1);
|
197
|
+
rb_define_method(rb_cUCollator, "get_attr", icu4r_col_get_attr, 1);
|
198
|
+
rb_define_alias(rb_cUCollator, "[]", "get_attr");
|
199
|
+
rb_define_method(rb_cUCollator, "set_attr", icu4r_col_set_attr, 2);
|
200
|
+
rb_define_alias(rb_cUCollator, "[]=", "set_attr");
|
201
|
+
rb_define_method(rb_cUCollator, "strcoll", icu4r_col_strcoll, 2);
|
202
|
+
rb_define_method(rb_cUCollator, "sort_key",icu4r_col_sort_key, 1);
|
203
|
+
|
204
|
+
/* attributes */
|
205
|
+
rb_define_const(rb_cUCollator, "UCOL_FRENCH_COLLATION", INT2FIX(UCOL_FRENCH_COLLATION));
|
206
|
+
rb_define_const(rb_cUCollator, "UCOL_ALTERNATE_HANDLING", INT2FIX(UCOL_ALTERNATE_HANDLING));
|
207
|
+
rb_define_const(rb_cUCollator, "UCOL_CASE_FIRST", INT2FIX(UCOL_CASE_FIRST));
|
208
|
+
rb_define_const(rb_cUCollator, "UCOL_CASE_LEVEL", INT2FIX(UCOL_CASE_LEVEL));
|
209
|
+
rb_define_const(rb_cUCollator, "UCOL_NORMALIZATION_MODE", INT2FIX(UCOL_NORMALIZATION_MODE));
|
210
|
+
rb_define_const(rb_cUCollator, "UCOL_DECOMPOSITION_MODE", INT2FIX(UCOL_DECOMPOSITION_MODE));
|
211
|
+
rb_define_const(rb_cUCollator, "UCOL_STRENGTH", INT2FIX(UCOL_STRENGTH));
|
212
|
+
rb_define_const(rb_cUCollator, "UCOL_HIRAGANA_QUATERNARY_MODE", INT2FIX(UCOL_HIRAGANA_QUATERNARY_MODE));
|
213
|
+
rb_define_const(rb_cUCollator, "UCOL_NUMERIC_COLLATION", INT2FIX(UCOL_NUMERIC_COLLATION));
|
214
|
+
rb_define_const(rb_cUCollator, "UCOL_ATTRIBUTE_COUNT", INT2FIX(UCOL_ATTRIBUTE_COUNT));
|
215
|
+
|
216
|
+
/* attribute values */
|
217
|
+
rb_define_const(rb_cUCollator, "UCOL_DEFAULT", INT2FIX(UCOL_DEFAULT));
|
218
|
+
rb_define_const(rb_cUCollator, "UCOL_PRIMARY", INT2FIX(UCOL_PRIMARY));
|
219
|
+
rb_define_const(rb_cUCollator, "UCOL_SECONDARY", INT2FIX(UCOL_SECONDARY));
|
220
|
+
rb_define_const(rb_cUCollator, "UCOL_TERTIARY", INT2FIX(UCOL_TERTIARY));
|
221
|
+
rb_define_const(rb_cUCollator, "UCOL_DEFAULT_STRENGTH", INT2FIX(UCOL_DEFAULT_STRENGTH));
|
222
|
+
rb_define_const(rb_cUCollator, "UCOL_CE_STRENGTH_LIMIT", INT2FIX(UCOL_CE_STRENGTH_LIMIT));
|
223
|
+
rb_define_const(rb_cUCollator, "UCOL_QUATERNARY", INT2FIX(UCOL_QUATERNARY));
|
224
|
+
rb_define_const(rb_cUCollator, "UCOL_IDENTICAL", INT2FIX(UCOL_IDENTICAL));
|
225
|
+
rb_define_const(rb_cUCollator, "UCOL_STRENGTH_LIMIT", INT2FIX(UCOL_STRENGTH_LIMIT));
|
226
|
+
rb_define_const(rb_cUCollator, "UCOL_OFF", INT2FIX(UCOL_OFF));
|
227
|
+
rb_define_const(rb_cUCollator, "UCOL_ON", INT2FIX(UCOL_ON));
|
228
|
+
rb_define_const(rb_cUCollator, "UCOL_SHIFTED", INT2FIX(UCOL_SHIFTED));
|
229
|
+
rb_define_const(rb_cUCollator, "UCOL_NON_IGNORABLE", INT2FIX(UCOL_NON_IGNORABLE));
|
230
|
+
rb_define_const(rb_cUCollator, "UCOL_LOWER_FIRST", INT2FIX(UCOL_LOWER_FIRST));
|
231
|
+
rb_define_const(rb_cUCollator, "UCOL_UPPER_FIRST", INT2FIX(UCOL_UPPER_FIRST));
|
232
|
+
|
233
|
+
}
|
data/converter.c
ADDED
@@ -0,0 +1,322 @@
|
|
1
|
+
#include "icu_common.h"
|
2
|
+
extern VALUE rb_cUString;
|
3
|
+
extern VALUE icu_ustr_new_set(UChar * ptr, long len, long capa);
|
4
|
+
extern VALUE rb_cUConverter;
|
5
|
+
|
6
|
+
#define UCONVERTER(obj) ((UConverter *)DATA_PTR(obj))
|
7
|
+
|
8
|
+
static void icu4r_cnv_free(UConverter * conv)
|
9
|
+
{
|
10
|
+
ucnv_close(conv);
|
11
|
+
}
|
12
|
+
static VALUE icu4r_cnv_alloc(VALUE klass)
|
13
|
+
{
|
14
|
+
return Data_Wrap_Struct(klass, 0, icu4r_cnv_free, 0);
|
15
|
+
}
|
16
|
+
|
17
|
+
|
18
|
+
/**
|
19
|
+
* call-seq:
|
20
|
+
* conv = UConverter.new(name)
|
21
|
+
*
|
22
|
+
* Creates new converter, by given name. Name must be a Ruby String and may contain
|
23
|
+
* additional options, e.g.:
|
24
|
+
*
|
25
|
+
* "SCSU,locale=ja" # Converter option for specifying a locale
|
26
|
+
* "UTF-7,version=1" # Converter option for specifying a version selector (0..9) for some converters.
|
27
|
+
* "ibm-1047,swaplfnl" # Converter option for EBCDIC SBCS or mixed-SBCS/DBCS (stateful) codepages.
|
28
|
+
*
|
29
|
+
* To get list of available converters call UConverter.list_available
|
30
|
+
*/
|
31
|
+
VALUE icu4r_cnv_init(VALUE self, VALUE name)
|
32
|
+
{
|
33
|
+
UConverter * converter;
|
34
|
+
UErrorCode status = U_ZERO_ERROR;
|
35
|
+
|
36
|
+
Check_Type(name, T_STRING);
|
37
|
+
converter = ucnv_open(RSTRING_PTR(name), &status);
|
38
|
+
ICU_RAISE(status);
|
39
|
+
DATA_PTR(self) = converter;
|
40
|
+
return self;
|
41
|
+
}
|
42
|
+
/**
|
43
|
+
* call-seq:
|
44
|
+
* UConverter.list_available # => Array
|
45
|
+
*
|
46
|
+
* Returns the names of available converters.
|
47
|
+
*/
|
48
|
+
VALUE icu4r_cnv_list(VALUE self)
|
49
|
+
{
|
50
|
+
VALUE ret ;
|
51
|
+
int32_t count, i;
|
52
|
+
count = ucnv_countAvailable();
|
53
|
+
ret = rb_ary_new2(count);
|
54
|
+
for( i = 0; i < count ; i++)
|
55
|
+
{
|
56
|
+
rb_ary_store(ret, i, rb_str_new2(ucnv_getAvailableName(i)));
|
57
|
+
}
|
58
|
+
return ret;
|
59
|
+
}
|
60
|
+
|
61
|
+
/**
|
62
|
+
* call-seq:
|
63
|
+
* converter.subst_chars
|
64
|
+
*
|
65
|
+
* Returns substitution characters as multiple bytes
|
66
|
+
*/
|
67
|
+
VALUE icu4r_cnv_get_subst_chars(VALUE self)
|
68
|
+
{
|
69
|
+
char buf[16];
|
70
|
+
int8_t len = 16;
|
71
|
+
UErrorCode status = U_ZERO_ERROR;
|
72
|
+
ucnv_getSubstChars(UCONVERTER(self), buf, &len, &status);
|
73
|
+
ICU_RAISE(status);
|
74
|
+
return rb_str_new(buf, len);
|
75
|
+
}
|
76
|
+
|
77
|
+
/**
|
78
|
+
* call-seq:
|
79
|
+
* converter.subst_chars=chars
|
80
|
+
*
|
81
|
+
* Sets the substitution chars when converting from unicode to a codepage.
|
82
|
+
* The substitution is specified as a string of 1-4 bytes
|
83
|
+
*/
|
84
|
+
VALUE icu4r_cnv_set_subst_chars(VALUE self, VALUE str)
|
85
|
+
{
|
86
|
+
UErrorCode status = U_ZERO_ERROR;
|
87
|
+
Check_Type(str, T_STRING);
|
88
|
+
ucnv_setSubstChars(UCONVERTER(self), RSTRING_PTR(str), RSTRING_LEN(str), &status);
|
89
|
+
ICU_RAISE(status);
|
90
|
+
return Qnil;
|
91
|
+
}
|
92
|
+
|
93
|
+
/**
|
94
|
+
* call-seq:
|
95
|
+
* conv.name
|
96
|
+
*
|
97
|
+
* Gets the internal, canonical name of the converter.
|
98
|
+
*/
|
99
|
+
VALUE icu4r_cnv_name(VALUE self)
|
100
|
+
{
|
101
|
+
UConverter * cnv = UCONVERTER(self);
|
102
|
+
UErrorCode status = U_ZERO_ERROR;
|
103
|
+
return rb_str_new2(ucnv_getName(cnv, &status));
|
104
|
+
}
|
105
|
+
|
106
|
+
/**
|
107
|
+
* call-seq:
|
108
|
+
* converter.reset
|
109
|
+
*
|
110
|
+
* Resets the state of a converter to the default state.
|
111
|
+
* This is used in the case of an error, to restart a conversion from a known default state.
|
112
|
+
* It will also empty the internal output buffers.
|
113
|
+
*/
|
114
|
+
VALUE icu4r_cnv_reset(VALUE self)
|
115
|
+
{
|
116
|
+
UConverter * cnv = UCONVERTER(self);
|
117
|
+
ucnv_reset(cnv);
|
118
|
+
return Qnil;
|
119
|
+
}
|
120
|
+
|
121
|
+
/**
|
122
|
+
* call-seq:
|
123
|
+
* conv.from_u(ustring) -> String
|
124
|
+
*
|
125
|
+
* Convert the Unicode string into a codepage string using an existing UConverter.
|
126
|
+
*/
|
127
|
+
VALUE icu4r_cnv_from_unicode(VALUE self, VALUE str)
|
128
|
+
{
|
129
|
+
UConverter * conv = UCONVERTER(self);
|
130
|
+
UErrorCode status = U_ZERO_ERROR;
|
131
|
+
int32_t enclen, capa;
|
132
|
+
char * buf;
|
133
|
+
VALUE s = Qnil;
|
134
|
+
Check_Class(str, rb_cUString);
|
135
|
+
capa = ICU_LEN(str) + 1;
|
136
|
+
buf = ALLOC_N(char, capa);
|
137
|
+
enclen = ucnv_fromUChars(conv, buf, capa-1, ICU_PTR(str), ICU_LEN(str), &status);
|
138
|
+
if (U_BUFFER_OVERFLOW_ERROR == status) {
|
139
|
+
REALLOC_N(buf, char, enclen + 1);
|
140
|
+
status = 0;
|
141
|
+
ucnv_fromUChars(conv, buf, enclen, ICU_PTR(str), ICU_LEN(str), &status);
|
142
|
+
}
|
143
|
+
if( U_FAILURE(status) ){
|
144
|
+
free(buf);
|
145
|
+
rb_raise(rb_eArgError, u_errorName(status));
|
146
|
+
}
|
147
|
+
s = rb_str_new(buf, enclen);
|
148
|
+
return s;
|
149
|
+
}
|
150
|
+
|
151
|
+
/**
|
152
|
+
* call-seq:
|
153
|
+
* conv.to_u(string) -> UString
|
154
|
+
*
|
155
|
+
* Convert the codepage string into a Unicode string using an existing UConverter.
|
156
|
+
*/
|
157
|
+
VALUE icu4r_cnv_to_unicode(VALUE self, VALUE str)
|
158
|
+
{
|
159
|
+
UConverter * conv = UCONVERTER(self);
|
160
|
+
UErrorCode status = U_ZERO_ERROR;
|
161
|
+
long len, capa;
|
162
|
+
VALUE s;
|
163
|
+
UChar * buf;
|
164
|
+
Check_Type(str, T_STRING);
|
165
|
+
capa = RSTRING_LEN(str) + 1;
|
166
|
+
buf = ALLOC_N(UChar, capa);
|
167
|
+
len = ucnv_toUChars(conv, buf, capa-1, RSTRING_PTR(str), RSTRING_LEN(str), &status);
|
168
|
+
if (U_BUFFER_OVERFLOW_ERROR == status) {
|
169
|
+
capa = len+1;
|
170
|
+
REALLOC_N(buf, UChar, capa);
|
171
|
+
status = 0;
|
172
|
+
len = ucnv_toUChars(conv, buf, capa-1, RSTRING_PTR(str), RSTRING_LEN(str), &status);
|
173
|
+
if (U_FAILURE(status)) {
|
174
|
+
free(buf);
|
175
|
+
rb_raise(rb_eArgError, u_errorName(status));
|
176
|
+
}
|
177
|
+
}
|
178
|
+
s = icu_ustr_new_set(buf, len, capa);
|
179
|
+
return s;
|
180
|
+
}
|
181
|
+
|
182
|
+
#define BUF_SIZE 1024
|
183
|
+
/**
|
184
|
+
* call-seq:
|
185
|
+
* conv.convert(other_conv, string)
|
186
|
+
*
|
187
|
+
* Convert from one external charset to another using two existing UConverters,
|
188
|
+
* ignoring the location of errors.
|
189
|
+
*/
|
190
|
+
VALUE icu4r_cnv_convert_to(VALUE self, VALUE other, VALUE src)
|
191
|
+
{
|
192
|
+
UConverter * cnv, * other_cnv;
|
193
|
+
UErrorCode status = U_ZERO_ERROR;
|
194
|
+
UChar pivotBuffer[BUF_SIZE];
|
195
|
+
UChar *pivot, *pivot2;
|
196
|
+
char * target,buffer[BUF_SIZE], *target_limit;
|
197
|
+
const char * src_ptr, * src_end;
|
198
|
+
VALUE ret;
|
199
|
+
Check_Class(other, rb_cUConverter);
|
200
|
+
Check_Type(src, T_STRING);
|
201
|
+
pivot=pivot2=pivotBuffer;
|
202
|
+
cnv = UCONVERTER(self);
|
203
|
+
other_cnv = UCONVERTER(other);
|
204
|
+
src_ptr = RSTRING_PTR(src);
|
205
|
+
src_end = src_ptr + RSTRING_LEN(src);
|
206
|
+
ret = rb_str_new2("");
|
207
|
+
ucnv_reset(other_cnv);
|
208
|
+
ucnv_reset(cnv);
|
209
|
+
target_limit = buffer+BUF_SIZE;
|
210
|
+
do {
|
211
|
+
status = U_ZERO_ERROR;
|
212
|
+
target = buffer;
|
213
|
+
ucnv_convertEx( other_cnv, cnv, &target, target_limit,
|
214
|
+
&src_ptr, src_end, pivotBuffer, &pivot, &pivot2, pivotBuffer+BUF_SIZE, FALSE, TRUE, &status);
|
215
|
+
|
216
|
+
if(U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) {
|
217
|
+
ICU_RAISE(status);
|
218
|
+
}
|
219
|
+
rb_str_buf_cat(ret, buffer, (int32_t)(target-buffer));
|
220
|
+
} while (status == U_BUFFER_OVERFLOW_ERROR);
|
221
|
+
return ret;
|
222
|
+
}
|
223
|
+
|
224
|
+
/**
|
225
|
+
* call-seq:
|
226
|
+
* UConverter.convert(to_converter_name, from_converter_name, source) # => String
|
227
|
+
*
|
228
|
+
* Convert from one external charset to another.
|
229
|
+
* Internally, two converters are opened according to the name arguments, then the text is converted to and from using them.
|
230
|
+
*/
|
231
|
+
VALUE icu4r_cnv_convert(VALUE self, VALUE to_conv_name, VALUE from_conv_name, VALUE src)
|
232
|
+
{
|
233
|
+
UErrorCode status = U_ZERO_ERROR;
|
234
|
+
char * target = NULL;
|
235
|
+
int32_t target_capa, len;
|
236
|
+
VALUE ret;
|
237
|
+
target_capa = ucnv_convert( RSTRING_PTR(to_conv_name), RSTRING_PTR(from_conv_name),
|
238
|
+
target, 0,
|
239
|
+
RSTRING_PTR(src), RSTRING_LEN(src), &status);
|
240
|
+
if(status == U_BUFFER_OVERFLOW_ERROR){
|
241
|
+
status = U_ZERO_ERROR;
|
242
|
+
target_capa += 1;
|
243
|
+
target = ALLOC_N(char, target_capa);
|
244
|
+
len = ucnv_convert( RSTRING_PTR(to_conv_name), RSTRING_PTR(from_conv_name),
|
245
|
+
target, target_capa,
|
246
|
+
RSTRING_PTR(src), RSTRING_LEN(src), &status);
|
247
|
+
if(U_FAILURE(status)){
|
248
|
+
free(target);
|
249
|
+
ICU_RAISE(status);
|
250
|
+
}
|
251
|
+
ret = rb_str_new(target, len);
|
252
|
+
free(target);
|
253
|
+
return ret;
|
254
|
+
} else ICU_RAISE(status);
|
255
|
+
return rb_str_new2("");
|
256
|
+
}
|
257
|
+
/**
|
258
|
+
* call-seq:
|
259
|
+
* UConverter.std_names(conv_name, std_name)
|
260
|
+
*
|
261
|
+
* Returns list of alias names for a given converter that are recognized by a standard; MIME and IANA are such standards
|
262
|
+
*/
|
263
|
+
VALUE icu4r_cnv_standard_names(VALUE self, VALUE cnv_name, VALUE std_name)
|
264
|
+
{
|
265
|
+
UEnumeration * name_list;
|
266
|
+
UErrorCode status = U_ZERO_ERROR;
|
267
|
+
VALUE ret ;
|
268
|
+
char * name;
|
269
|
+
int32_t len;
|
270
|
+
Check_Type(cnv_name, T_STRING);
|
271
|
+
Check_Type(std_name, T_STRING);
|
272
|
+
name_list = ucnv_openStandardNames(RSTRING_PTR(cnv_name), RSTRING_PTR(std_name), &status);
|
273
|
+
ICU_RAISE(status);
|
274
|
+
ret = rb_ary_new();
|
275
|
+
while( (name = (char*)uenum_next(name_list, &len, &status))) {
|
276
|
+
rb_ary_push(ret, rb_str_new2(name));
|
277
|
+
}
|
278
|
+
uenum_close(name_list);
|
279
|
+
return ret;
|
280
|
+
}
|
281
|
+
|
282
|
+
/**
|
283
|
+
* call-seq:
|
284
|
+
* UConverter.all_names
|
285
|
+
*
|
286
|
+
* Returns all of the canonical converter names, regardless of the ability to open each converter.
|
287
|
+
*/
|
288
|
+
VALUE icu4r_cnv_all_names(VALUE self)
|
289
|
+
{
|
290
|
+
UEnumeration * name_list;
|
291
|
+
UErrorCode status = U_ZERO_ERROR;
|
292
|
+
VALUE ret ;
|
293
|
+
char * name;
|
294
|
+
int32_t len;
|
295
|
+
name_list = ucnv_openAllNames(&status);
|
296
|
+
ICU_RAISE(status);
|
297
|
+
ret = rb_ary_new();
|
298
|
+
while( (name = (char*)uenum_next(name_list, &len, &status))) {
|
299
|
+
rb_ary_push(ret, rb_str_new2(name));
|
300
|
+
}
|
301
|
+
uenum_close(name_list);
|
302
|
+
return ret;
|
303
|
+
}
|
304
|
+
void initialize_converter(void)
|
305
|
+
{
|
306
|
+
rb_cUConverter = rb_define_class("UConverter", rb_cObject);
|
307
|
+
rb_define_alloc_func(rb_cUConverter, icu4r_cnv_alloc);
|
308
|
+
rb_define_method(rb_cUConverter, "initialize", icu4r_cnv_init, 1);
|
309
|
+
|
310
|
+
rb_define_method(rb_cUConverter, "to_u", icu4r_cnv_to_unicode, 1);
|
311
|
+
rb_define_method(rb_cUConverter, "from_u", icu4r_cnv_from_unicode, 1);
|
312
|
+
rb_define_method(rb_cUConverter, "reset", icu4r_cnv_reset, 0);
|
313
|
+
rb_define_method(rb_cUConverter, "name", icu4r_cnv_name, 0);
|
314
|
+
rb_define_method(rb_cUConverter, "convert", icu4r_cnv_convert_to, 2);
|
315
|
+
rb_define_method(rb_cUConverter, "subst_chars=", icu4r_cnv_set_subst_chars, 1);
|
316
|
+
rb_define_method(rb_cUConverter, "subst_chars", icu4r_cnv_get_subst_chars, 0);
|
317
|
+
rb_define_singleton_method(rb_cUConverter, "convert", icu4r_cnv_convert, 3);
|
318
|
+
rb_define_singleton_method(rb_cUConverter, "list_available", icu4r_cnv_list, 0);
|
319
|
+
rb_define_singleton_method(rb_cUConverter, "std_names", icu4r_cnv_standard_names, 2);
|
320
|
+
rb_define_singleton_method(rb_cUConverter, "all_names", icu4r_cnv_all_names, 0);
|
321
|
+
}
|
322
|
+
|