icu4r_19 1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +87 -0
- data/MIT-LICENSE +20 -0
- data/README +156 -0
- data/Rakefile +32 -0
- data/calendar.c +636 -0
- data/collator.c +233 -0
- data/converter.c +322 -0
- data/docs/FORMATTING +131 -0
- data/docs/UNICODE_REGEXPS +204 -0
- data/extconf.rb +17 -0
- data/fmt.cpp +156 -0
- data/icu4r.c +18 -0
- data/icu_common.h +45 -0
- data/lib/dummy +0 -0
- data/samples/demo_each.rb +23 -0
- data/samples/demo_locales.rb +16 -0
- data/samples/demo_regexp.rb +11 -0
- data/samples/resbundle/appmsg/root.res +0 -0
- data/samples/resbundle/appmsg/ru.res +0 -0
- data/samples/resbundle/demo_bundle.rb +4 -0
- data/samples/resbundle/mkres.sh +4 -0
- data/samples/resbundle/root.txt +10 -0
- data/samples/resbundle/ru.txt +4 -0
- data/test/test_calendar.rb +123 -0
- data/test/test_collator.rb +33 -0
- data/test/test_converter.rb +72 -0
- data/test/test_ustring.rb +508 -0
- data/tools/doc.sh +2 -0
- data/tools/km.rb +425 -0
- data/ubundle.c +223 -0
- data/ucore_ext.c +168 -0
- data/uregex.c +697 -0
- data/uregex.h +27 -0
- data/ustring.c +3039 -0
- metadata +164 -0
data/collator.c
ADDED
@@ -0,0 +1,233 @@
|
|
1
|
+
#include "icu_common.h"
|
2
|
+
extern VALUE rb_cUString;
|
3
|
+
extern VALUE rb_cUCollator;
|
4
|
+
extern int icu_collator_cmp (UCollator * collator, VALUE str1, VALUE str2) ;
|
5
|
+
|
6
|
+
/**
|
7
|
+
* Document-class: UCollator
|
8
|
+
*
|
9
|
+
* API for UCollator performs locale-sensitive string comparison. You use this service to build searching and
|
10
|
+
* sorting routines for natural language text.
|
11
|
+
*
|
12
|
+
* Attributes that collation service understands:
|
13
|
+
*
|
14
|
+
* UCOL_FRENCH_COLLATION Attribute for direction of secondary weights - used in French. UCOL_ON, UCOL_OFF
|
15
|
+
*
|
16
|
+
* UCOL_ALTERNATE_HANDLING Attribute for handling variable elements. UCOL_NON_IGNORABLE (default), UCOL_SHIFTED
|
17
|
+
*
|
18
|
+
* UCOL_CASE_FIRST Controls the ordering of upper and lower case letters.
|
19
|
+
* UCOL_OFF (default), UCOL_UPPER_FIRST, UCOL_LOWER_FIRST
|
20
|
+
*
|
21
|
+
* UCOL_CASE_LEVEL Controls whether an extra case level (positioned before the third level) is
|
22
|
+
* generated or not. UCOL_OFF (default), UCOL_ON
|
23
|
+
*
|
24
|
+
* UCOL_NORMALIZATION_MODE Controls whether the normalization check and necessary normalizations are performed.
|
25
|
+
* When set to UCOL_ON, an incremental check is performed to see whether the input data
|
26
|
+
* is in the FCD form. If the data is not in the FCD form, incremental NFD normalization
|
27
|
+
* is performed.
|
28
|
+
*
|
29
|
+
* UCOL_DECOMPOSITION_MODE An alias for UCOL_NORMALIZATION_MODE attribute
|
30
|
+
*
|
31
|
+
* UCOL_STRENGTH The strength attribute.
|
32
|
+
* Can be either UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL.
|
33
|
+
* The usual strength for most locales (except Japanese) is tertiary.
|
34
|
+
*
|
35
|
+
* UCOL_HIRAGANA_QUATERNARY_MODE when turned on, this attribute positions Hiragana before all non-ignorables on
|
36
|
+
* quaternary level This is a sneaky way to produce JIS sort order
|
37
|
+
* UCOL_NUMERIC_COLLATION when turned on, this attribute generates a collation key for the numeric value of
|
38
|
+
* substrings of digits. This is a way to get '100' to sort AFTER '2'.
|
39
|
+
*
|
40
|
+
* Attribute values:
|
41
|
+
*
|
42
|
+
* UCOL_DEFAULT accepted by most attributes
|
43
|
+
* UCOL_PRIMARY Primary collation strength
|
44
|
+
* UCOL_SECONDARY Secondary collation strength
|
45
|
+
* UCOL_TERTIARY Tertiary collation strength
|
46
|
+
* UCOL_DEFAULT_STRENGTH Default collation strength
|
47
|
+
* UCOL_QUATERNARY Quaternary collation strength
|
48
|
+
* UCOL_IDENTICAL Identical collation strength
|
49
|
+
* UCOL_OFF Turn the feature off - works for
|
50
|
+
* UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL,
|
51
|
+
* UCOL_HIRAGANA_QUATERNARY_MODE & UCOL_DECOMPOSITION_MODE
|
52
|
+
*
|
53
|
+
* UCOL_ON Turn the feature on - works for UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL,
|
54
|
+
* UCOL_HIRAGANA_QUATERNARY_MODE & UCOL_DECOMPOSITION_MODE
|
55
|
+
*
|
56
|
+
* UCOL_SHIFTED Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted
|
57
|
+
* UCOL_NON_IGNORABLE Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable
|
58
|
+
* UCOL_LOWER_FIRST Valid for UCOL_CASE_FIRST - lower case sorts before upper case
|
59
|
+
* UCOL_UPPER_FIRST upper case sorts before lower case
|
60
|
+
**/
|
61
|
+
|
62
|
+
#define UCOLLATOR(obj) ((UCollator *)DATA_PTR(obj))
|
63
|
+
|
64
|
+
void icu4r_col_free(UCollator * col)
|
65
|
+
{
|
66
|
+
ucol_close(col);
|
67
|
+
}
|
68
|
+
static VALUE icu4r_col_alloc(VALUE klass)
|
69
|
+
{
|
70
|
+
return Data_Wrap_Struct(klass, 0, icu4r_col_free, 0);
|
71
|
+
}
|
72
|
+
/**
|
73
|
+
* call-seq:
|
74
|
+
* col = UCollator.new(locale = nil)
|
75
|
+
*
|
76
|
+
* Open a UCollator for comparing strings for the given locale containing the required collation rules.
|
77
|
+
* Special values for locales can be passed in - if +nil+ is passed for the locale, the default locale
|
78
|
+
* collation rules will be used. If empty string ("") or "root" are passed, UCA rules will be used.
|
79
|
+
*/
|
80
|
+
VALUE icu4r_col_init(int argc, VALUE * argv, VALUE self)
|
81
|
+
{
|
82
|
+
UCollator * col;
|
83
|
+
UErrorCode status = U_ZERO_ERROR;
|
84
|
+
VALUE loc;
|
85
|
+
char * locale = NULL;
|
86
|
+
if( rb_scan_args(argc, argv, "01", &loc))
|
87
|
+
{
|
88
|
+
Check_Type(loc, T_STRING);
|
89
|
+
locale = RSTRING_PTR(loc);
|
90
|
+
}
|
91
|
+
col = ucol_open(locale, &status);
|
92
|
+
ICU_RAISE(status);
|
93
|
+
DATA_PTR(self)=col;
|
94
|
+
return self;
|
95
|
+
}
|
96
|
+
|
97
|
+
/**
|
98
|
+
* call-seq:
|
99
|
+
* collator.strength
|
100
|
+
*
|
101
|
+
* Get the collation strength used in a UCollator. The strength influences how strings are compared.
|
102
|
+
**/
|
103
|
+
VALUE icu4r_col_get_strength(VALUE self)
|
104
|
+
{
|
105
|
+
return INT2NUM(ucol_getStrength(UCOLLATOR(self)));
|
106
|
+
}
|
107
|
+
|
108
|
+
/**
|
109
|
+
* call-seq:
|
110
|
+
* collator.strength = new_strength
|
111
|
+
*
|
112
|
+
* Sets the collation strength used in a UCollator. The strength influences how strings are compared.
|
113
|
+
**/
|
114
|
+
VALUE icu4r_col_set_strength(VALUE self, VALUE obj)
|
115
|
+
{
|
116
|
+
Check_Type(obj, T_FIXNUM);
|
117
|
+
ucol_setStrength(UCOLLATOR(self), FIX2INT(obj));
|
118
|
+
return Qnil;
|
119
|
+
}
|
120
|
+
|
121
|
+
/**
|
122
|
+
* call-seq:
|
123
|
+
* collator.get_attr(attribute)
|
124
|
+
* collator[attribute]
|
125
|
+
*
|
126
|
+
* Universal attribute setter. See above for valid attributes and their values
|
127
|
+
**/
|
128
|
+
VALUE icu4r_col_get_attr(VALUE self, VALUE obj)
|
129
|
+
{
|
130
|
+
UErrorCode status = U_ZERO_ERROR;
|
131
|
+
UColAttributeValue val;
|
132
|
+
Check_Type(obj, T_FIXNUM);
|
133
|
+
val = ucol_getAttribute(UCOLLATOR(self), FIX2INT(obj), &status);
|
134
|
+
ICU_RAISE(status);
|
135
|
+
return INT2FIX(val);
|
136
|
+
}
|
137
|
+
|
138
|
+
/**
|
139
|
+
* call-seq:
|
140
|
+
* collator.set_attr(attribute, value)
|
141
|
+
* collator[attribute]=value
|
142
|
+
*
|
143
|
+
* Universal attribute setter. See above for valid attributes and their values
|
144
|
+
**/
|
145
|
+
VALUE icu4r_col_set_attr(VALUE self, VALUE obj, VALUE new_val)
|
146
|
+
{
|
147
|
+
UErrorCode status = U_ZERO_ERROR;
|
148
|
+
Check_Type(obj, T_FIXNUM);
|
149
|
+
Check_Type(new_val, T_FIXNUM);
|
150
|
+
ucol_setAttribute(UCOLLATOR(self), FIX2INT(obj), FIX2INT(new_val), &status);
|
151
|
+
ICU_RAISE(status);
|
152
|
+
return Qnil;
|
153
|
+
}
|
154
|
+
/**
|
155
|
+
* call-seq:
|
156
|
+
* collator.strcoll(ustr1, ustr2)
|
157
|
+
*
|
158
|
+
* Compare two UString's. The strings will be compared using the options already specified.
|
159
|
+
**/
|
160
|
+
VALUE icu4r_col_strcoll(VALUE self, VALUE str1, VALUE str2)
|
161
|
+
{
|
162
|
+
Check_Class(str1, rb_cUString);
|
163
|
+
Check_Class(str2, rb_cUString);
|
164
|
+
return INT2FIX(icu_collator_cmp(UCOLLATOR(self), str1, str2));
|
165
|
+
}
|
166
|
+
/**
|
167
|
+
* call-seq:
|
168
|
+
* collator.sort_key(an_ustring) -> String
|
169
|
+
*
|
170
|
+
* Get a sort key for a string from a UCollator. Sort keys may be compared using strcmp.
|
171
|
+
**/
|
172
|
+
VALUE icu4r_col_sort_key(VALUE self, VALUE str)
|
173
|
+
{
|
174
|
+
int32_t needed , capa ;
|
175
|
+
unsigned char * buffer ;
|
176
|
+
VALUE ret;
|
177
|
+
Check_Class(str, rb_cUString);
|
178
|
+
capa = ICU_LEN(str);
|
179
|
+
buffer = ALLOC_N(unsigned char, capa);
|
180
|
+
needed = ucol_getSortKey(UCOLLATOR(self), ICU_PTR(str), ICU_LEN(str), buffer, capa);
|
181
|
+
if(needed > capa){
|
182
|
+
REALLOC_N(buffer,unsigned char, needed);
|
183
|
+
needed = ucol_getSortKey(UCOLLATOR(self), ICU_PTR(str), ICU_LEN(str), buffer, needed);
|
184
|
+
}
|
185
|
+
ret = rb_str_new((char *)buffer, needed);
|
186
|
+
free(buffer);
|
187
|
+
return ret;
|
188
|
+
}
|
189
|
+
void initialize_collator()
|
190
|
+
{
|
191
|
+
rb_cUCollator = rb_define_class("UCollator", rb_cObject);
|
192
|
+
rb_define_alloc_func(rb_cUCollator, icu4r_col_alloc);
|
193
|
+
|
194
|
+
rb_define_method(rb_cUCollator, "initialize", icu4r_col_init, -1);
|
195
|
+
rb_define_method(rb_cUCollator, "strength", icu4r_col_get_strength, 0);
|
196
|
+
rb_define_method(rb_cUCollator, "strength=", icu4r_col_set_strength, 1);
|
197
|
+
rb_define_method(rb_cUCollator, "get_attr", icu4r_col_get_attr, 1);
|
198
|
+
rb_define_alias(rb_cUCollator, "[]", "get_attr");
|
199
|
+
rb_define_method(rb_cUCollator, "set_attr", icu4r_col_set_attr, 2);
|
200
|
+
rb_define_alias(rb_cUCollator, "[]=", "set_attr");
|
201
|
+
rb_define_method(rb_cUCollator, "strcoll", icu4r_col_strcoll, 2);
|
202
|
+
rb_define_method(rb_cUCollator, "sort_key",icu4r_col_sort_key, 1);
|
203
|
+
|
204
|
+
/* attributes */
|
205
|
+
rb_define_const(rb_cUCollator, "UCOL_FRENCH_COLLATION", INT2FIX(UCOL_FRENCH_COLLATION));
|
206
|
+
rb_define_const(rb_cUCollator, "UCOL_ALTERNATE_HANDLING", INT2FIX(UCOL_ALTERNATE_HANDLING));
|
207
|
+
rb_define_const(rb_cUCollator, "UCOL_CASE_FIRST", INT2FIX(UCOL_CASE_FIRST));
|
208
|
+
rb_define_const(rb_cUCollator, "UCOL_CASE_LEVEL", INT2FIX(UCOL_CASE_LEVEL));
|
209
|
+
rb_define_const(rb_cUCollator, "UCOL_NORMALIZATION_MODE", INT2FIX(UCOL_NORMALIZATION_MODE));
|
210
|
+
rb_define_const(rb_cUCollator, "UCOL_DECOMPOSITION_MODE", INT2FIX(UCOL_DECOMPOSITION_MODE));
|
211
|
+
rb_define_const(rb_cUCollator, "UCOL_STRENGTH", INT2FIX(UCOL_STRENGTH));
|
212
|
+
rb_define_const(rb_cUCollator, "UCOL_HIRAGANA_QUATERNARY_MODE", INT2FIX(UCOL_HIRAGANA_QUATERNARY_MODE));
|
213
|
+
rb_define_const(rb_cUCollator, "UCOL_NUMERIC_COLLATION", INT2FIX(UCOL_NUMERIC_COLLATION));
|
214
|
+
rb_define_const(rb_cUCollator, "UCOL_ATTRIBUTE_COUNT", INT2FIX(UCOL_ATTRIBUTE_COUNT));
|
215
|
+
|
216
|
+
/* attribute values */
|
217
|
+
rb_define_const(rb_cUCollator, "UCOL_DEFAULT", INT2FIX(UCOL_DEFAULT));
|
218
|
+
rb_define_const(rb_cUCollator, "UCOL_PRIMARY", INT2FIX(UCOL_PRIMARY));
|
219
|
+
rb_define_const(rb_cUCollator, "UCOL_SECONDARY", INT2FIX(UCOL_SECONDARY));
|
220
|
+
rb_define_const(rb_cUCollator, "UCOL_TERTIARY", INT2FIX(UCOL_TERTIARY));
|
221
|
+
rb_define_const(rb_cUCollator, "UCOL_DEFAULT_STRENGTH", INT2FIX(UCOL_DEFAULT_STRENGTH));
|
222
|
+
rb_define_const(rb_cUCollator, "UCOL_CE_STRENGTH_LIMIT", INT2FIX(UCOL_CE_STRENGTH_LIMIT));
|
223
|
+
rb_define_const(rb_cUCollator, "UCOL_QUATERNARY", INT2FIX(UCOL_QUATERNARY));
|
224
|
+
rb_define_const(rb_cUCollator, "UCOL_IDENTICAL", INT2FIX(UCOL_IDENTICAL));
|
225
|
+
rb_define_const(rb_cUCollator, "UCOL_STRENGTH_LIMIT", INT2FIX(UCOL_STRENGTH_LIMIT));
|
226
|
+
rb_define_const(rb_cUCollator, "UCOL_OFF", INT2FIX(UCOL_OFF));
|
227
|
+
rb_define_const(rb_cUCollator, "UCOL_ON", INT2FIX(UCOL_ON));
|
228
|
+
rb_define_const(rb_cUCollator, "UCOL_SHIFTED", INT2FIX(UCOL_SHIFTED));
|
229
|
+
rb_define_const(rb_cUCollator, "UCOL_NON_IGNORABLE", INT2FIX(UCOL_NON_IGNORABLE));
|
230
|
+
rb_define_const(rb_cUCollator, "UCOL_LOWER_FIRST", INT2FIX(UCOL_LOWER_FIRST));
|
231
|
+
rb_define_const(rb_cUCollator, "UCOL_UPPER_FIRST", INT2FIX(UCOL_UPPER_FIRST));
|
232
|
+
|
233
|
+
}
|
data/converter.c
ADDED
@@ -0,0 +1,322 @@
|
|
1
|
+
#include "icu_common.h"
|
2
|
+
extern VALUE rb_cUString;
|
3
|
+
extern VALUE icu_ustr_new_set(UChar * ptr, long len, long capa);
|
4
|
+
extern VALUE rb_cUConverter;
|
5
|
+
|
6
|
+
#define UCONVERTER(obj) ((UConverter *)DATA_PTR(obj))
|
7
|
+
|
8
|
+
static void icu4r_cnv_free(UConverter * conv)
|
9
|
+
{
|
10
|
+
ucnv_close(conv);
|
11
|
+
}
|
12
|
+
static VALUE icu4r_cnv_alloc(VALUE klass)
|
13
|
+
{
|
14
|
+
return Data_Wrap_Struct(klass, 0, icu4r_cnv_free, 0);
|
15
|
+
}
|
16
|
+
|
17
|
+
|
18
|
+
/**
|
19
|
+
* call-seq:
|
20
|
+
* conv = UConverter.new(name)
|
21
|
+
*
|
22
|
+
* Creates new converter, by given name. Name must be a Ruby String and may contain
|
23
|
+
* additional options, e.g.:
|
24
|
+
*
|
25
|
+
* "SCSU,locale=ja" # Converter option for specifying a locale
|
26
|
+
* "UTF-7,version=1" # Converter option for specifying a version selector (0..9) for some converters.
|
27
|
+
* "ibm-1047,swaplfnl" # Converter option for EBCDIC SBCS or mixed-SBCS/DBCS (stateful) codepages.
|
28
|
+
*
|
29
|
+
* To get list of available converters call UConverter.list_available
|
30
|
+
*/
|
31
|
+
VALUE icu4r_cnv_init(VALUE self, VALUE name)
|
32
|
+
{
|
33
|
+
UConverter * converter;
|
34
|
+
UErrorCode status = U_ZERO_ERROR;
|
35
|
+
|
36
|
+
Check_Type(name, T_STRING);
|
37
|
+
converter = ucnv_open(RSTRING_PTR(name), &status);
|
38
|
+
ICU_RAISE(status);
|
39
|
+
DATA_PTR(self) = converter;
|
40
|
+
return self;
|
41
|
+
}
|
42
|
+
/**
|
43
|
+
* call-seq:
|
44
|
+
* UConverter.list_available # => Array
|
45
|
+
*
|
46
|
+
* Returns the names of available converters.
|
47
|
+
*/
|
48
|
+
VALUE icu4r_cnv_list(VALUE self)
|
49
|
+
{
|
50
|
+
VALUE ret ;
|
51
|
+
int32_t count, i;
|
52
|
+
count = ucnv_countAvailable();
|
53
|
+
ret = rb_ary_new2(count);
|
54
|
+
for( i = 0; i < count ; i++)
|
55
|
+
{
|
56
|
+
rb_ary_store(ret, i, rb_str_new2(ucnv_getAvailableName(i)));
|
57
|
+
}
|
58
|
+
return ret;
|
59
|
+
}
|
60
|
+
|
61
|
+
/**
|
62
|
+
* call-seq:
|
63
|
+
* converter.subst_chars
|
64
|
+
*
|
65
|
+
* Returns substitution characters as multiple bytes
|
66
|
+
*/
|
67
|
+
VALUE icu4r_cnv_get_subst_chars(VALUE self)
|
68
|
+
{
|
69
|
+
char buf[16];
|
70
|
+
int8_t len = 16;
|
71
|
+
UErrorCode status = U_ZERO_ERROR;
|
72
|
+
ucnv_getSubstChars(UCONVERTER(self), buf, &len, &status);
|
73
|
+
ICU_RAISE(status);
|
74
|
+
return rb_str_new(buf, len);
|
75
|
+
}
|
76
|
+
|
77
|
+
/**
|
78
|
+
* call-seq:
|
79
|
+
* converter.subst_chars=chars
|
80
|
+
*
|
81
|
+
* Sets the substitution chars when converting from unicode to a codepage.
|
82
|
+
* The substitution is specified as a string of 1-4 bytes
|
83
|
+
*/
|
84
|
+
VALUE icu4r_cnv_set_subst_chars(VALUE self, VALUE str)
|
85
|
+
{
|
86
|
+
UErrorCode status = U_ZERO_ERROR;
|
87
|
+
Check_Type(str, T_STRING);
|
88
|
+
ucnv_setSubstChars(UCONVERTER(self), RSTRING_PTR(str), RSTRING_LEN(str), &status);
|
89
|
+
ICU_RAISE(status);
|
90
|
+
return Qnil;
|
91
|
+
}
|
92
|
+
|
93
|
+
/**
|
94
|
+
* call-seq:
|
95
|
+
* conv.name
|
96
|
+
*
|
97
|
+
* Gets the internal, canonical name of the converter.
|
98
|
+
*/
|
99
|
+
VALUE icu4r_cnv_name(VALUE self)
|
100
|
+
{
|
101
|
+
UConverter * cnv = UCONVERTER(self);
|
102
|
+
UErrorCode status = U_ZERO_ERROR;
|
103
|
+
return rb_str_new2(ucnv_getName(cnv, &status));
|
104
|
+
}
|
105
|
+
|
106
|
+
/**
|
107
|
+
* call-seq:
|
108
|
+
* converter.reset
|
109
|
+
*
|
110
|
+
* Resets the state of a converter to the default state.
|
111
|
+
* This is used in the case of an error, to restart a conversion from a known default state.
|
112
|
+
* It will also empty the internal output buffers.
|
113
|
+
*/
|
114
|
+
VALUE icu4r_cnv_reset(VALUE self)
|
115
|
+
{
|
116
|
+
UConverter * cnv = UCONVERTER(self);
|
117
|
+
ucnv_reset(cnv);
|
118
|
+
return Qnil;
|
119
|
+
}
|
120
|
+
|
121
|
+
/**
|
122
|
+
* call-seq:
|
123
|
+
* conv.from_u(ustring) -> String
|
124
|
+
*
|
125
|
+
* Convert the Unicode string into a codepage string using an existing UConverter.
|
126
|
+
*/
|
127
|
+
VALUE icu4r_cnv_from_unicode(VALUE self, VALUE str)
|
128
|
+
{
|
129
|
+
UConverter * conv = UCONVERTER(self);
|
130
|
+
UErrorCode status = U_ZERO_ERROR;
|
131
|
+
int32_t enclen, capa;
|
132
|
+
char * buf;
|
133
|
+
VALUE s = Qnil;
|
134
|
+
Check_Class(str, rb_cUString);
|
135
|
+
capa = ICU_LEN(str) + 1;
|
136
|
+
buf = ALLOC_N(char, capa);
|
137
|
+
enclen = ucnv_fromUChars(conv, buf, capa-1, ICU_PTR(str), ICU_LEN(str), &status);
|
138
|
+
if (U_BUFFER_OVERFLOW_ERROR == status) {
|
139
|
+
REALLOC_N(buf, char, enclen + 1);
|
140
|
+
status = 0;
|
141
|
+
ucnv_fromUChars(conv, buf, enclen, ICU_PTR(str), ICU_LEN(str), &status);
|
142
|
+
}
|
143
|
+
if( U_FAILURE(status) ){
|
144
|
+
free(buf);
|
145
|
+
rb_raise(rb_eArgError, u_errorName(status));
|
146
|
+
}
|
147
|
+
s = rb_str_new(buf, enclen);
|
148
|
+
return s;
|
149
|
+
}
|
150
|
+
|
151
|
+
/**
|
152
|
+
* call-seq:
|
153
|
+
* conv.to_u(string) -> UString
|
154
|
+
*
|
155
|
+
* Convert the codepage string into a Unicode string using an existing UConverter.
|
156
|
+
*/
|
157
|
+
VALUE icu4r_cnv_to_unicode(VALUE self, VALUE str)
|
158
|
+
{
|
159
|
+
UConverter * conv = UCONVERTER(self);
|
160
|
+
UErrorCode status = U_ZERO_ERROR;
|
161
|
+
long len, capa;
|
162
|
+
VALUE s;
|
163
|
+
UChar * buf;
|
164
|
+
Check_Type(str, T_STRING);
|
165
|
+
capa = RSTRING_LEN(str) + 1;
|
166
|
+
buf = ALLOC_N(UChar, capa);
|
167
|
+
len = ucnv_toUChars(conv, buf, capa-1, RSTRING_PTR(str), RSTRING_LEN(str), &status);
|
168
|
+
if (U_BUFFER_OVERFLOW_ERROR == status) {
|
169
|
+
capa = len+1;
|
170
|
+
REALLOC_N(buf, UChar, capa);
|
171
|
+
status = 0;
|
172
|
+
len = ucnv_toUChars(conv, buf, capa-1, RSTRING_PTR(str), RSTRING_LEN(str), &status);
|
173
|
+
if (U_FAILURE(status)) {
|
174
|
+
free(buf);
|
175
|
+
rb_raise(rb_eArgError, u_errorName(status));
|
176
|
+
}
|
177
|
+
}
|
178
|
+
s = icu_ustr_new_set(buf, len, capa);
|
179
|
+
return s;
|
180
|
+
}
|
181
|
+
|
182
|
+
#define BUF_SIZE 1024
|
183
|
+
/**
|
184
|
+
* call-seq:
|
185
|
+
* conv.convert(other_conv, string)
|
186
|
+
*
|
187
|
+
* Convert from one external charset to another using two existing UConverters,
|
188
|
+
* ignoring the location of errors.
|
189
|
+
*/
|
190
|
+
VALUE icu4r_cnv_convert_to(VALUE self, VALUE other, VALUE src)
|
191
|
+
{
|
192
|
+
UConverter * cnv, * other_cnv;
|
193
|
+
UErrorCode status = U_ZERO_ERROR;
|
194
|
+
UChar pivotBuffer[BUF_SIZE];
|
195
|
+
UChar *pivot, *pivot2;
|
196
|
+
char * target,buffer[BUF_SIZE], *target_limit;
|
197
|
+
const char * src_ptr, * src_end;
|
198
|
+
VALUE ret;
|
199
|
+
Check_Class(other, rb_cUConverter);
|
200
|
+
Check_Type(src, T_STRING);
|
201
|
+
pivot=pivot2=pivotBuffer;
|
202
|
+
cnv = UCONVERTER(self);
|
203
|
+
other_cnv = UCONVERTER(other);
|
204
|
+
src_ptr = RSTRING_PTR(src);
|
205
|
+
src_end = src_ptr + RSTRING_LEN(src);
|
206
|
+
ret = rb_str_new2("");
|
207
|
+
ucnv_reset(other_cnv);
|
208
|
+
ucnv_reset(cnv);
|
209
|
+
target_limit = buffer+BUF_SIZE;
|
210
|
+
do {
|
211
|
+
status = U_ZERO_ERROR;
|
212
|
+
target = buffer;
|
213
|
+
ucnv_convertEx( other_cnv, cnv, &target, target_limit,
|
214
|
+
&src_ptr, src_end, pivotBuffer, &pivot, &pivot2, pivotBuffer+BUF_SIZE, FALSE, TRUE, &status);
|
215
|
+
|
216
|
+
if(U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) {
|
217
|
+
ICU_RAISE(status);
|
218
|
+
}
|
219
|
+
rb_str_buf_cat(ret, buffer, (int32_t)(target-buffer));
|
220
|
+
} while (status == U_BUFFER_OVERFLOW_ERROR);
|
221
|
+
return ret;
|
222
|
+
}
|
223
|
+
|
224
|
+
/**
|
225
|
+
* call-seq:
|
226
|
+
* UConverter.convert(to_converter_name, from_converter_name, source) # => String
|
227
|
+
*
|
228
|
+
* Convert from one external charset to another.
|
229
|
+
* Internally, two converters are opened according to the name arguments, then the text is converted to and from using them.
|
230
|
+
*/
|
231
|
+
VALUE icu4r_cnv_convert(VALUE self, VALUE to_conv_name, VALUE from_conv_name, VALUE src)
|
232
|
+
{
|
233
|
+
UErrorCode status = U_ZERO_ERROR;
|
234
|
+
char * target = NULL;
|
235
|
+
int32_t target_capa, len;
|
236
|
+
VALUE ret;
|
237
|
+
target_capa = ucnv_convert( RSTRING_PTR(to_conv_name), RSTRING_PTR(from_conv_name),
|
238
|
+
target, 0,
|
239
|
+
RSTRING_PTR(src), RSTRING_LEN(src), &status);
|
240
|
+
if(status == U_BUFFER_OVERFLOW_ERROR){
|
241
|
+
status = U_ZERO_ERROR;
|
242
|
+
target_capa += 1;
|
243
|
+
target = ALLOC_N(char, target_capa);
|
244
|
+
len = ucnv_convert( RSTRING_PTR(to_conv_name), RSTRING_PTR(from_conv_name),
|
245
|
+
target, target_capa,
|
246
|
+
RSTRING_PTR(src), RSTRING_LEN(src), &status);
|
247
|
+
if(U_FAILURE(status)){
|
248
|
+
free(target);
|
249
|
+
ICU_RAISE(status);
|
250
|
+
}
|
251
|
+
ret = rb_str_new(target, len);
|
252
|
+
free(target);
|
253
|
+
return ret;
|
254
|
+
} else ICU_RAISE(status);
|
255
|
+
return rb_str_new2("");
|
256
|
+
}
|
257
|
+
/**
|
258
|
+
* call-seq:
|
259
|
+
* UConverter.std_names(conv_name, std_name)
|
260
|
+
*
|
261
|
+
* Returns list of alias names for a given converter that are recognized by a standard; MIME and IANA are such standards
|
262
|
+
*/
|
263
|
+
VALUE icu4r_cnv_standard_names(VALUE self, VALUE cnv_name, VALUE std_name)
|
264
|
+
{
|
265
|
+
UEnumeration * name_list;
|
266
|
+
UErrorCode status = U_ZERO_ERROR;
|
267
|
+
VALUE ret ;
|
268
|
+
char * name;
|
269
|
+
int32_t len;
|
270
|
+
Check_Type(cnv_name, T_STRING);
|
271
|
+
Check_Type(std_name, T_STRING);
|
272
|
+
name_list = ucnv_openStandardNames(RSTRING_PTR(cnv_name), RSTRING_PTR(std_name), &status);
|
273
|
+
ICU_RAISE(status);
|
274
|
+
ret = rb_ary_new();
|
275
|
+
while( (name = (char*)uenum_next(name_list, &len, &status))) {
|
276
|
+
rb_ary_push(ret, rb_str_new2(name));
|
277
|
+
}
|
278
|
+
uenum_close(name_list);
|
279
|
+
return ret;
|
280
|
+
}
|
281
|
+
|
282
|
+
/**
|
283
|
+
* call-seq:
|
284
|
+
* UConverter.all_names
|
285
|
+
*
|
286
|
+
* Returns all of the canonical converter names, regardless of the ability to open each converter.
|
287
|
+
*/
|
288
|
+
VALUE icu4r_cnv_all_names(VALUE self)
|
289
|
+
{
|
290
|
+
UEnumeration * name_list;
|
291
|
+
UErrorCode status = U_ZERO_ERROR;
|
292
|
+
VALUE ret ;
|
293
|
+
char * name;
|
294
|
+
int32_t len;
|
295
|
+
name_list = ucnv_openAllNames(&status);
|
296
|
+
ICU_RAISE(status);
|
297
|
+
ret = rb_ary_new();
|
298
|
+
while( (name = (char*)uenum_next(name_list, &len, &status))) {
|
299
|
+
rb_ary_push(ret, rb_str_new2(name));
|
300
|
+
}
|
301
|
+
uenum_close(name_list);
|
302
|
+
return ret;
|
303
|
+
}
|
304
|
+
void initialize_converter(void)
|
305
|
+
{
|
306
|
+
rb_cUConverter = rb_define_class("UConverter", rb_cObject);
|
307
|
+
rb_define_alloc_func(rb_cUConverter, icu4r_cnv_alloc);
|
308
|
+
rb_define_method(rb_cUConverter, "initialize", icu4r_cnv_init, 1);
|
309
|
+
|
310
|
+
rb_define_method(rb_cUConverter, "to_u", icu4r_cnv_to_unicode, 1);
|
311
|
+
rb_define_method(rb_cUConverter, "from_u", icu4r_cnv_from_unicode, 1);
|
312
|
+
rb_define_method(rb_cUConverter, "reset", icu4r_cnv_reset, 0);
|
313
|
+
rb_define_method(rb_cUConverter, "name", icu4r_cnv_name, 0);
|
314
|
+
rb_define_method(rb_cUConverter, "convert", icu4r_cnv_convert_to, 2);
|
315
|
+
rb_define_method(rb_cUConverter, "subst_chars=", icu4r_cnv_set_subst_chars, 1);
|
316
|
+
rb_define_method(rb_cUConverter, "subst_chars", icu4r_cnv_get_subst_chars, 0);
|
317
|
+
rb_define_singleton_method(rb_cUConverter, "convert", icu4r_cnv_convert, 3);
|
318
|
+
rb_define_singleton_method(rb_cUConverter, "list_available", icu4r_cnv_list, 0);
|
319
|
+
rb_define_singleton_method(rb_cUConverter, "std_names", icu4r_cnv_standard_names, 2);
|
320
|
+
rb_define_singleton_method(rb_cUConverter, "all_names", icu4r_cnv_all_names, 0);
|
321
|
+
}
|
322
|
+
|