iconv 0.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/BSDL +22 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +56 -0
- data/README.md +75 -0
- data/Rakefile +38 -0
- data/ext/iconv/depend +2 -0
- data/ext/iconv/extconf.rb +5 -2
- data/ext/iconv/iconv.c +578 -273
- data/iconv.gemspec +20 -0
- data/lib/iconv/version.rb +3 -0
- data/lib/iconv.rb +6 -0
- data/test/test_basic.rb +59 -0
- data/test/test_option.rb +43 -0
- data/test/test_partial.rb +41 -0
- data/test/utils.rb +23 -0
- metadata +46 -46
data/ext/iconv/iconv.c
CHANGED
@@ -4,7 +4,6 @@
|
|
4
4
|
iconv.c -
|
5
5
|
|
6
6
|
$Author$
|
7
|
-
$Date$
|
8
7
|
created at: Wed Dec 1 20:28:09 JST 1999
|
9
8
|
|
10
9
|
All the files in this distribution are covered under the Ruby's
|
@@ -14,12 +13,12 @@
|
|
14
13
|
|
15
14
|
**********************************************************************/
|
16
15
|
|
17
|
-
#include "ruby.h"
|
16
|
+
#include "ruby/ruby.h"
|
18
17
|
#include <errno.h>
|
19
18
|
#include <iconv.h>
|
20
19
|
#include <assert.h>
|
21
|
-
#include "st.h"
|
22
|
-
#include "
|
20
|
+
#include "ruby/st.h"
|
21
|
+
#include "ruby/encoding.h"
|
23
22
|
|
24
23
|
/*
|
25
24
|
* Document-class: Iconv
|
@@ -27,20 +26,20 @@
|
|
27
26
|
* == Summary
|
28
27
|
*
|
29
28
|
* Ruby extension for charset conversion.
|
30
|
-
*
|
29
|
+
*
|
31
30
|
* == Abstract
|
32
31
|
*
|
33
32
|
* Iconv is a wrapper class for the UNIX 95 <tt>iconv()</tt> function family,
|
34
33
|
* which translates string between various encoding systems.
|
35
|
-
*
|
34
|
+
*
|
36
35
|
* See Open Group's on-line documents for more details.
|
37
36
|
* * <tt>iconv.h</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv.h.html
|
38
37
|
* * <tt>iconv_open()</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv_open.html
|
39
38
|
* * <tt>iconv()</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv.html
|
40
39
|
* * <tt>iconv_close()</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv_close.html
|
41
|
-
*
|
40
|
+
*
|
42
41
|
* Which coding systems are available is platform-dependent.
|
43
|
-
*
|
42
|
+
*
|
44
43
|
* == Examples
|
45
44
|
*
|
46
45
|
* 1. Simple conversion between two charsets.
|
@@ -67,6 +66,12 @@
|
|
67
66
|
* 4. Shorthand for (3).
|
68
67
|
*
|
69
68
|
* Iconv.iconv(to, from, *input.to_a)
|
69
|
+
*
|
70
|
+
* == Attentions
|
71
|
+
*
|
72
|
+
* Even if some extentions of implementation dependent are useful,
|
73
|
+
* DON'T USE those extentions in libraries and scripts to widely distribute.
|
74
|
+
* If you want to use those feature, use String#encode.
|
70
75
|
*/
|
71
76
|
|
72
77
|
/* Invalid value for iconv_t is -1 but 0 for VALUE, I hope VALUE is
|
@@ -80,9 +85,18 @@ struct iconv_env_t
|
|
80
85
|
int argc;
|
81
86
|
VALUE *argv;
|
82
87
|
VALUE ret;
|
88
|
+
int toidx;
|
83
89
|
VALUE (*append)_((VALUE, VALUE));
|
84
90
|
};
|
85
91
|
|
92
|
+
struct rb_iconv_opt_t
|
93
|
+
{
|
94
|
+
VALUE transliterate;
|
95
|
+
VALUE discard_ilseq;
|
96
|
+
};
|
97
|
+
|
98
|
+
static ID id_transliterate, id_discard_ilseq;
|
99
|
+
|
86
100
|
static VALUE rb_eIconvInvalidEncoding;
|
87
101
|
static VALUE rb_eIconvFailure;
|
88
102
|
static VALUE rb_eIconvIllegalSeq;
|
@@ -91,26 +105,28 @@ static VALUE rb_eIconvOutOfRange;
|
|
91
105
|
static VALUE rb_eIconvBrokenLibrary;
|
92
106
|
|
93
107
|
static ID rb_success, rb_failed;
|
94
|
-
static VALUE iconv_fail _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env,
|
95
|
-
static VALUE iconv_fail_retry _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env,
|
108
|
+
static VALUE iconv_fail _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, VALUE mesg));
|
109
|
+
static VALUE iconv_fail_retry _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, VALUE mesg));
|
96
110
|
static VALUE iconv_failure_initialize _((VALUE error, VALUE mesg, VALUE success, VALUE failed));
|
97
111
|
static VALUE iconv_failure_success _((VALUE self));
|
98
112
|
static VALUE iconv_failure_failed _((VALUE self));
|
99
113
|
|
100
|
-
static iconv_t iconv_create _((VALUE to, VALUE from));
|
114
|
+
static iconv_t iconv_create _((VALUE to, VALUE from, struct rb_iconv_opt_t *opt, int *idx));
|
101
115
|
static void iconv_dfree _((void *cd));
|
102
116
|
static VALUE iconv_free _((VALUE cd));
|
103
117
|
static VALUE iconv_try _((iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen));
|
104
|
-
static VALUE rb_str_derive _((VALUE str, const char* ptr,
|
105
|
-
static VALUE iconv_convert _((iconv_t cd, VALUE str, long start, long length,
|
118
|
+
static VALUE rb_str_derive _((VALUE str, const char* ptr, long len));
|
119
|
+
static VALUE iconv_convert _((iconv_t cd, VALUE str, long start, long length, int toidx,
|
120
|
+
struct iconv_env_t* env));
|
106
121
|
static VALUE iconv_s_allocate _((VALUE klass));
|
107
|
-
static VALUE iconv_initialize _((
|
108
|
-
static VALUE iconv_s_open _((
|
122
|
+
static VALUE iconv_initialize _((int argc, VALUE *argv, VALUE self));
|
123
|
+
static VALUE iconv_s_open _((int argc, VALUE *argv, VALUE self));
|
109
124
|
static VALUE iconv_s_convert _((struct iconv_env_t* env));
|
110
125
|
static VALUE iconv_s_iconv _((int argc, VALUE *argv, VALUE self));
|
111
126
|
static VALUE iconv_init_state _((VALUE cd));
|
112
127
|
static VALUE iconv_finish _((VALUE self));
|
113
128
|
static VALUE iconv_iconv _((int argc, VALUE *argv, VALUE self));
|
129
|
+
static VALUE iconv_conv _((int argc, VALUE *argv, VALUE self));
|
114
130
|
|
115
131
|
static VALUE charset_map;
|
116
132
|
|
@@ -120,94 +136,162 @@ static VALUE charset_map;
|
|
120
136
|
*
|
121
137
|
* Returns the map from canonical name to system dependent name.
|
122
138
|
*/
|
123
|
-
static VALUE
|
139
|
+
static VALUE
|
140
|
+
charset_map_get(void)
|
124
141
|
{
|
125
142
|
return charset_map;
|
126
143
|
}
|
127
144
|
|
145
|
+
static VALUE
|
146
|
+
strip_glibc_option(VALUE *code)
|
147
|
+
{
|
148
|
+
VALUE val = StringValue(*code);
|
149
|
+
const char *ptr = RSTRING_PTR(val), *pend = RSTRING_END(val);
|
150
|
+
const char *slash = memchr(ptr, '/', pend - ptr);
|
151
|
+
|
152
|
+
if (slash && slash < pend - 1 && slash[1] == '/') {
|
153
|
+
VALUE opt = rb_str_subseq(val, slash - ptr, pend - slash);
|
154
|
+
val = rb_str_subseq(val, 0, slash - ptr);
|
155
|
+
*code = val;
|
156
|
+
return opt;
|
157
|
+
}
|
158
|
+
return 0;
|
159
|
+
}
|
160
|
+
|
128
161
|
static char *
|
129
|
-
map_charset
|
130
|
-
#ifdef HAVE_PROTOTYPES
|
131
|
-
(VALUE *code)
|
132
|
-
#else /* HAVE_PROTOTYPES */
|
133
|
-
(code)
|
134
|
-
VALUE *code;
|
135
|
-
#endif /* HAVE_PROTOTYPES */
|
162
|
+
map_charset(VALUE *code)
|
136
163
|
{
|
137
|
-
VALUE val = *code;
|
164
|
+
VALUE val = StringValue(*code);
|
138
165
|
|
139
|
-
if (
|
166
|
+
if (RHASH_SIZE(charset_map)) {
|
167
|
+
st_data_t data;
|
140
168
|
VALUE key = rb_funcall2(val, rb_intern("downcase"), 0, 0);
|
141
169
|
StringValuePtr(key);
|
142
|
-
if (st_lookup(
|
143
|
-
*code =
|
170
|
+
if (st_lookup(RHASH_TBL(charset_map), key, &data)) {
|
171
|
+
*code = (VALUE)data;
|
144
172
|
}
|
145
173
|
}
|
146
174
|
return StringValuePtr(*code);
|
147
175
|
}
|
148
176
|
|
149
|
-
NORETURN(static void
|
177
|
+
NORETURN(static void rb_iconv_sys_fail_str(VALUE msg));
|
150
178
|
static void
|
151
|
-
|
179
|
+
rb_iconv_sys_fail_str(VALUE msg)
|
152
180
|
{
|
153
181
|
if (errno == 0) {
|
154
|
-
rb_exc_raise(iconv_fail(rb_eIconvBrokenLibrary, Qnil, Qnil, NULL,
|
182
|
+
rb_exc_raise(iconv_fail(rb_eIconvBrokenLibrary, Qnil, Qnil, NULL, msg));
|
155
183
|
}
|
156
|
-
|
184
|
+
rb_sys_fail_str(msg);
|
185
|
+
}
|
186
|
+
|
187
|
+
#define rb_sys_fail_str(s) rb_iconv_sys_fail_str(s)
|
188
|
+
|
189
|
+
NORETURN(static void rb_iconv_sys_fail(const char *s));
|
190
|
+
static void
|
191
|
+
rb_iconv_sys_fail(const char *s)
|
192
|
+
{
|
193
|
+
rb_iconv_sys_fail_str(rb_str_new_cstr(s));
|
157
194
|
}
|
158
195
|
|
159
196
|
#define rb_sys_fail(s) rb_iconv_sys_fail(s)
|
160
197
|
|
161
198
|
static iconv_t
|
162
|
-
iconv_create
|
163
|
-
#ifdef HAVE_PROTOTYPES
|
164
|
-
(VALUE to, VALUE from)
|
165
|
-
#else /* HAVE_PROTOTYPES */
|
166
|
-
(to, from)
|
167
|
-
VALUE to;
|
168
|
-
VALUE from;
|
169
|
-
#endif /* HAVE_PROTOTYPES */
|
199
|
+
iconv_create(VALUE to, VALUE from, struct rb_iconv_opt_t *opt, int *idx)
|
170
200
|
{
|
201
|
+
VALUE toopt = strip_glibc_option(&to);
|
202
|
+
VALUE fromopt = strip_glibc_option(&from);
|
203
|
+
VALUE toenc = 0, fromenc = 0;
|
171
204
|
const char* tocode = map_charset(&to);
|
172
205
|
const char* fromcode = map_charset(&from);
|
206
|
+
iconv_t cd;
|
207
|
+
int retry = 0;
|
173
208
|
|
174
|
-
|
209
|
+
*idx = rb_enc_find_index(tocode);
|
175
210
|
|
176
|
-
if (
|
211
|
+
if (toopt) {
|
212
|
+
toenc = rb_str_plus(to, toopt);
|
213
|
+
tocode = RSTRING_PTR(toenc);
|
214
|
+
}
|
215
|
+
if (fromopt) {
|
216
|
+
fromenc = rb_str_plus(from, fromopt);
|
217
|
+
fromcode = RSTRING_PTR(fromenc);
|
218
|
+
}
|
219
|
+
while ((cd = iconv_open(tocode, fromcode)) == (iconv_t)-1) {
|
220
|
+
int inval = 0;
|
177
221
|
switch (errno) {
|
178
222
|
case EMFILE:
|
179
223
|
case ENFILE:
|
180
224
|
case ENOMEM:
|
181
|
-
|
182
|
-
|
225
|
+
if (!retry++) {
|
226
|
+
rb_gc();
|
227
|
+
continue;
|
228
|
+
}
|
229
|
+
break;
|
230
|
+
case EINVAL:
|
231
|
+
retry = 0;
|
232
|
+
inval = 1;
|
233
|
+
if (toenc) {
|
234
|
+
tocode = RSTRING_PTR(to);
|
235
|
+
rb_str_resize(toenc, 0);
|
236
|
+
toenc = 0;
|
237
|
+
continue;
|
238
|
+
}
|
239
|
+
if (fromenc) {
|
240
|
+
fromcode = RSTRING_PTR(from);
|
241
|
+
rb_str_resize(fromenc, 0);
|
242
|
+
fromenc = 0;
|
243
|
+
continue;
|
244
|
+
}
|
245
|
+
break;
|
183
246
|
}
|
184
|
-
|
185
|
-
int inval = errno == EINVAL;
|
247
|
+
{
|
186
248
|
const char *s = inval ? "invalid encoding " : "iconv";
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
sprintf(RSTRING(msg)->ptr, "%s(\"%s\", \"%s\")",
|
191
|
-
s, RSTRING(to)->ptr, RSTRING(from)->ptr);
|
192
|
-
s = RSTRING(msg)->ptr;
|
193
|
-
RSTRING(msg)->len = strlen(s);
|
194
|
-
if (!inval) rb_sys_fail(s);
|
249
|
+
VALUE msg = rb_sprintf("%s(\"%s\", \"%s\")",
|
250
|
+
s, RSTRING_PTR(to), RSTRING_PTR(from));
|
251
|
+
if (!inval) rb_sys_fail_str(msg);
|
195
252
|
rb_exc_raise(iconv_fail(rb_eIconvInvalidEncoding, Qnil,
|
196
|
-
rb_ary_new3(2, to, from), NULL,
|
253
|
+
rb_ary_new3(2, to, from), NULL, msg));
|
254
|
+
}
|
255
|
+
}
|
256
|
+
|
257
|
+
if (toopt || fromopt) {
|
258
|
+
if (toopt && fromopt && RTEST(rb_str_equal(toopt, fromopt))) {
|
259
|
+
fromopt = 0;
|
260
|
+
}
|
261
|
+
if (toopt && fromopt) {
|
262
|
+
rb_warning("encoding option isn't portable: %s, %s",
|
263
|
+
RSTRING_PTR(toopt) + 2, RSTRING_PTR(fromopt) + 2);
|
264
|
+
}
|
265
|
+
else {
|
266
|
+
rb_warning("encoding option isn't portable: %s",
|
267
|
+
(toopt ? RSTRING_PTR(toopt) : RSTRING_PTR(fromopt)) + 2);
|
268
|
+
}
|
269
|
+
}
|
270
|
+
|
271
|
+
if (opt) {
|
272
|
+
#ifdef ICONV_SET_TRANSLITERATE
|
273
|
+
if (opt->transliterate != Qundef) {
|
274
|
+
int flag = RTEST(opt->transliterate);
|
275
|
+
rb_warning("encoding option isn't portable: transliterate");
|
276
|
+
if (iconvctl(cd, ICONV_SET_TRANSLITERATE, (void *)&flag))
|
277
|
+
rb_sys_fail("ICONV_SET_TRANSLITERATE");
|
197
278
|
}
|
279
|
+
#endif
|
280
|
+
#ifdef ICONV_SET_DISCARD_ILSEQ
|
281
|
+
if (opt->discard_ilseq != Qundef) {
|
282
|
+
int flag = RTEST(opt->discard_ilseq);
|
283
|
+
rb_warning("encoding option isn't portable: discard_ilseq");
|
284
|
+
if (iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, (void *)&flag))
|
285
|
+
rb_sys_fail("ICONV_SET_DISCARD_ILSEQ");
|
286
|
+
}
|
287
|
+
#endif
|
198
288
|
}
|
199
289
|
|
200
290
|
return cd;
|
201
291
|
}
|
202
292
|
|
203
293
|
static void
|
204
|
-
iconv_dfree
|
205
|
-
#ifdef HAVE_PROTOTYPES
|
206
|
-
(void *cd)
|
207
|
-
#else /* HAVE_PROTOTYPES */
|
208
|
-
(cd)
|
209
|
-
void *cd;
|
210
|
-
#endif /* HAVE_PROTOTYPES */
|
294
|
+
iconv_dfree(void *cd)
|
211
295
|
{
|
212
296
|
iconv_close(VALUE2ICONV(cd));
|
213
297
|
}
|
@@ -215,13 +299,7 @@ iconv_dfree
|
|
215
299
|
#define ICONV_FREE iconv_dfree
|
216
300
|
|
217
301
|
static VALUE
|
218
|
-
iconv_free
|
219
|
-
#ifdef HAVE_PROTOTYPES
|
220
|
-
(VALUE cd)
|
221
|
-
#else /* HAVE_PROTOTYPES */
|
222
|
-
(cd)
|
223
|
-
VALUE cd;
|
224
|
-
#endif /* HAVE_PROTOTYPES */
|
302
|
+
iconv_free(VALUE cd)
|
225
303
|
{
|
226
304
|
if (cd && iconv_close(VALUE2ICONV(cd)) == -1)
|
227
305
|
rb_sys_fail("iconv_close");
|
@@ -229,13 +307,7 @@ iconv_free
|
|
229
307
|
}
|
230
308
|
|
231
309
|
static VALUE
|
232
|
-
check_iconv
|
233
|
-
#ifdef HAVE_PROTOTYPES
|
234
|
-
(VALUE obj)
|
235
|
-
#else /* HAVE_PROTOTYPES */
|
236
|
-
(obj)
|
237
|
-
VALUE obj;
|
238
|
-
#endif /* HAVE_PROTOTYPES */
|
310
|
+
check_iconv(VALUE obj)
|
239
311
|
{
|
240
312
|
Check_Type(obj, T_DATA);
|
241
313
|
if (RDATA(obj)->dfree != ICONV_FREE) {
|
@@ -245,17 +317,7 @@ check_iconv
|
|
245
317
|
}
|
246
318
|
|
247
319
|
static VALUE
|
248
|
-
iconv_try
|
249
|
-
#ifdef HAVE_PROTOTYPES
|
250
|
-
(iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen)
|
251
|
-
#else /* HAVE_PROTOTYPES */
|
252
|
-
(cd, inptr, inlen, outptr, outlen)
|
253
|
-
iconv_t cd;
|
254
|
-
const char **inptr;
|
255
|
-
size_t *inlen;
|
256
|
-
char **outptr;
|
257
|
-
size_t *outlen;
|
258
|
-
#endif /* HAVE_PROTOTYPES */
|
320
|
+
iconv_try(iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen)
|
259
321
|
{
|
260
322
|
#ifdef ICONV_INPTR_CONST
|
261
323
|
#define ICONV_INPTR_CAST
|
@@ -295,13 +357,8 @@ iconv_try
|
|
295
357
|
|
296
358
|
#define FAILED_MAXLEN 16
|
297
359
|
|
298
|
-
static VALUE
|
299
|
-
|
300
|
-
(VALUE error, VALUE mesg, VALUE success, VALUE failed)
|
301
|
-
#else /* HAVE_PROTOTYPES */
|
302
|
-
(error, mesg, success, failed)
|
303
|
-
VALUE error, mesg, success, failed;
|
304
|
-
#endif /* HAVE_PROTOTYPES */
|
360
|
+
static VALUE
|
361
|
+
iconv_failure_initialize(VALUE error, VALUE mesg, VALUE success, VALUE failed)
|
305
362
|
{
|
306
363
|
rb_call_super(1, &mesg);
|
307
364
|
rb_ivar_set(error, rb_success, success);
|
@@ -310,22 +367,14 @@ static VALUE iconv_failure_initialize
|
|
310
367
|
}
|
311
368
|
|
312
369
|
static VALUE
|
313
|
-
iconv_fail
|
314
|
-
#ifdef HAVE_PROTOTYPES
|
315
|
-
(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg)
|
316
|
-
#else /* HAVE_PROTOTYPES */
|
317
|
-
(error, success, failed, env, mesg)
|
318
|
-
VALUE error, success, failed;
|
319
|
-
struct iconv_env_t *env;
|
320
|
-
const char *mesg;
|
321
|
-
#endif /* HAVE_PROTOTYPES */
|
370
|
+
iconv_fail(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, VALUE mesg)
|
322
371
|
{
|
323
372
|
VALUE args[3];
|
324
373
|
|
325
|
-
if (mesg
|
326
|
-
args[0] =
|
374
|
+
if (!NIL_P(mesg)) {
|
375
|
+
args[0] = mesg;
|
327
376
|
}
|
328
|
-
else if (TYPE(failed) != T_STRING ||
|
377
|
+
else if (TYPE(failed) != T_STRING || RSTRING_LEN(failed) < FAILED_MAXLEN) {
|
329
378
|
args[0] = rb_inspect(failed);
|
330
379
|
}
|
331
380
|
else {
|
@@ -345,33 +394,23 @@ iconv_fail
|
|
345
394
|
}
|
346
395
|
|
347
396
|
static VALUE
|
348
|
-
iconv_fail_retry(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env,
|
397
|
+
iconv_fail_retry(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, VALUE mesg)
|
349
398
|
{
|
350
399
|
error = iconv_fail(error, success, failed, env, mesg);
|
351
400
|
if (!rb_block_given_p()) rb_exc_raise(error);
|
352
|
-
|
401
|
+
rb_set_errinfo(error);
|
353
402
|
return rb_yield(failed);
|
354
403
|
}
|
355
404
|
|
356
405
|
static VALUE
|
357
|
-
rb_str_derive
|
358
|
-
#ifdef HAVE_PROTOTYPES
|
359
|
-
(VALUE str, const char* ptr, int len)
|
360
|
-
#else /* HAVE_PROTOTYPES */
|
361
|
-
(str, ptr, len)
|
362
|
-
VALUE str;
|
363
|
-
const char *ptr;
|
364
|
-
int len;
|
365
|
-
#endif /* HAVE_PROTOTYPES */
|
406
|
+
rb_str_derive(VALUE str, const char* ptr, long len)
|
366
407
|
{
|
367
408
|
VALUE ret;
|
368
409
|
|
369
410
|
if (NIL_P(str))
|
370
411
|
return rb_str_new(ptr, len);
|
371
|
-
if (
|
372
|
-
|
373
|
-
if (RSTRING(str)->ptr + RSTRING(str)->len == ptr + len)
|
374
|
-
ret = rb_str_substr(str, ptr - RSTRING(str)->ptr, len);
|
412
|
+
if (RSTRING_PTR(str) + RSTRING_LEN(str) == ptr + len)
|
413
|
+
ret = rb_str_subseq(str, ptr - RSTRING_PTR(str), len);
|
375
414
|
else
|
376
415
|
ret = rb_str_new(ptr, len);
|
377
416
|
OBJ_INFECT(ret, str);
|
@@ -379,17 +418,7 @@ rb_str_derive
|
|
379
418
|
}
|
380
419
|
|
381
420
|
static VALUE
|
382
|
-
iconv_convert
|
383
|
-
#ifdef HAVE_PROTOTYPES
|
384
|
-
(iconv_t cd, VALUE str, long start, long length, struct iconv_env_t* env)
|
385
|
-
#else /* HAVE_PROTOTYPES */
|
386
|
-
(cd, str, start, length, env)
|
387
|
-
iconv_t cd;
|
388
|
-
VALUE str;
|
389
|
-
long start;
|
390
|
-
long length;
|
391
|
-
struct iconv_env_t *env;
|
392
|
-
#endif /* HAVE_PROTOTYPES */
|
421
|
+
iconv_convert(iconv_t cd, VALUE str, long start, long length, int toidx, struct iconv_env_t* env)
|
393
422
|
{
|
394
423
|
VALUE ret = Qfalse;
|
395
424
|
VALUE error = Qfalse;
|
@@ -413,9 +442,9 @@ iconv_convert
|
|
413
442
|
error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen);
|
414
443
|
if (RTEST(error)) {
|
415
444
|
unsigned int i;
|
416
|
-
rescue = iconv_fail_retry(error, Qnil, Qnil, env,
|
445
|
+
rescue = iconv_fail_retry(error, Qnil, Qnil, env, Qnil);
|
417
446
|
if (TYPE(rescue) == T_ARRAY) {
|
418
|
-
str =
|
447
|
+
str = RARRAY_LEN(rescue) > 0 ? RARRAY_PTR(rescue)[0] : Qnil;
|
419
448
|
}
|
420
449
|
if (FIXNUM_P(str) && (i = FIX2INT(str)) <= 0xff) {
|
421
450
|
char c = i;
|
@@ -430,11 +459,11 @@ iconv_convert
|
|
430
459
|
length = 0;
|
431
460
|
}
|
432
461
|
else {
|
433
|
-
|
462
|
+
long slen;
|
434
463
|
|
435
464
|
StringValue(str);
|
436
|
-
slen =
|
437
|
-
inptr =
|
465
|
+
slen = RSTRING_LEN(str);
|
466
|
+
inptr = RSTRING_PTR(str);
|
438
467
|
|
439
468
|
inptr += start;
|
440
469
|
if (length < 0 || length > start + slen)
|
@@ -444,23 +473,27 @@ iconv_convert
|
|
444
473
|
inlen = length;
|
445
474
|
|
446
475
|
do {
|
447
|
-
|
476
|
+
VALUE errmsg = Qnil;
|
448
477
|
const char *tmpstart = inptr;
|
449
478
|
outptr = buffer;
|
450
479
|
outlen = sizeof(buffer);
|
451
480
|
|
452
|
-
errmsg[0] = 0;
|
453
481
|
error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen);
|
454
482
|
|
455
|
-
if (
|
483
|
+
if (
|
484
|
+
#if SIGNEDNESS_OF_SIZE_T < 0
|
485
|
+
0 <= outlen &&
|
486
|
+
#endif
|
487
|
+
outlen <= sizeof(buffer)) {
|
456
488
|
outlen = sizeof(buffer) - outlen;
|
457
489
|
if (NIL_P(error) || /* something converted */
|
458
|
-
outlen > inptr - tmpstart || /* input can't contain output */
|
459
|
-
(outlen < inptr - tmpstart && inlen > 0) || /* something skipped */
|
490
|
+
outlen > (size_t)(inptr - tmpstart) || /* input can't contain output */
|
491
|
+
(outlen < (size_t)(inptr - tmpstart) && inlen > 0) || /* something skipped */
|
460
492
|
memcmp(buffer, tmpstart, outlen)) /* something differs */
|
461
493
|
{
|
462
494
|
if (NIL_P(str)) {
|
463
495
|
ret = rb_str_new(buffer, outlen);
|
496
|
+
if (toidx >= 0) rb_enc_associate_index(ret, toidx);
|
464
497
|
}
|
465
498
|
else {
|
466
499
|
if (ret) {
|
@@ -468,6 +501,7 @@ iconv_convert
|
|
468
501
|
}
|
469
502
|
else {
|
470
503
|
ret = rb_str_new(instart, tmpstart - instart);
|
504
|
+
if (toidx >= 0) rb_enc_associate_index(ret, toidx);
|
471
505
|
OBJ_INFECT(ret, str);
|
472
506
|
}
|
473
507
|
ret = rb_str_buf_cat(ret, buffer, outlen);
|
@@ -480,26 +514,29 @@ iconv_convert
|
|
480
514
|
}
|
481
515
|
else {
|
482
516
|
/* Some iconv() have a bug, return *outlen out of range */
|
483
|
-
|
517
|
+
errmsg = rb_sprintf("bug?(output length = %ld)", (long)(sizeof(buffer) - outlen));
|
484
518
|
error = rb_eIconvOutOfRange;
|
485
519
|
}
|
486
520
|
|
487
521
|
if (RTEST(error)) {
|
488
522
|
long len = 0;
|
489
523
|
|
490
|
-
if (!ret)
|
524
|
+
if (!ret) {
|
491
525
|
ret = rb_str_derive(str, instart, inptr - instart);
|
492
|
-
|
526
|
+
if (toidx >= 0) rb_enc_associate_index(ret, toidx);
|
527
|
+
}
|
528
|
+
else if (inptr > instart) {
|
493
529
|
rb_str_cat(ret, instart, inptr - instart);
|
530
|
+
}
|
494
531
|
str = rb_str_derive(str, inptr, inlen);
|
495
532
|
rescue = iconv_fail_retry(error, ret, str, env, errmsg);
|
496
533
|
if (TYPE(rescue) == T_ARRAY) {
|
497
|
-
if ((len =
|
498
|
-
rb_str_concat(ret,
|
499
|
-
if (len > 1 && !NIL_P(str =
|
534
|
+
if ((len = RARRAY_LEN(rescue)) > 0)
|
535
|
+
rb_str_concat(ret, RARRAY_PTR(rescue)[0]);
|
536
|
+
if (len > 1 && !NIL_P(str = RARRAY_PTR(rescue)[1])) {
|
500
537
|
StringValue(str);
|
501
|
-
inlen = length =
|
502
|
-
instart = inptr =
|
538
|
+
inlen = length = RSTRING_LEN(str);
|
539
|
+
instart = inptr = RSTRING_PTR(str);
|
503
540
|
continue;
|
504
541
|
}
|
505
542
|
}
|
@@ -510,36 +547,107 @@ iconv_convert
|
|
510
547
|
}
|
511
548
|
} while (inlen > 0);
|
512
549
|
|
513
|
-
if (!ret)
|
550
|
+
if (!ret) {
|
514
551
|
ret = rb_str_derive(str, instart, inptr - instart);
|
515
|
-
|
552
|
+
if (toidx >= 0) rb_enc_associate_index(ret, toidx);
|
553
|
+
}
|
554
|
+
else if (inptr > instart) {
|
516
555
|
rb_str_cat(ret, instart, inptr - instart);
|
556
|
+
}
|
517
557
|
return ret;
|
518
558
|
}
|
519
559
|
|
520
560
|
static VALUE
|
521
|
-
iconv_s_allocate
|
522
|
-
#ifdef HAVE_PROTOTYPES
|
523
|
-
(VALUE klass)
|
524
|
-
#else /* HAVE_PROTOTYPES */
|
525
|
-
(klass)
|
526
|
-
VALUE klass;
|
527
|
-
#endif /* HAVE_PROTOTYPES */
|
561
|
+
iconv_s_allocate(VALUE klass)
|
528
562
|
{
|
529
563
|
return Data_Wrap_Struct(klass, 0, ICONV_FREE, 0);
|
530
564
|
}
|
531
565
|
|
566
|
+
static VALUE
|
567
|
+
get_iconv_opt_i(VALUE i, VALUE arg)
|
568
|
+
{
|
569
|
+
VALUE name;
|
570
|
+
#if defined ICONV_SET_TRANSLITERATE || defined ICONV_SET_DISCARD_ILSEQ
|
571
|
+
VALUE val;
|
572
|
+
struct rb_iconv_opt_t *opt = (struct rb_iconv_opt_t *)arg;
|
573
|
+
#endif
|
574
|
+
|
575
|
+
i = rb_Array(i);
|
576
|
+
name = rb_ary_entry(i, 0);
|
577
|
+
#if defined ICONV_SET_TRANSLITERATE || defined ICONV_SET_DISCARD_ILSEQ
|
578
|
+
val = rb_ary_entry(i, 1);
|
579
|
+
#endif
|
580
|
+
do {
|
581
|
+
if (SYMBOL_P(name)) {
|
582
|
+
ID id = SYM2ID(name);
|
583
|
+
if (id == id_transliterate) {
|
584
|
+
#ifdef ICONV_SET_TRANSLITERATE
|
585
|
+
opt->transliterate = val;
|
586
|
+
#else
|
587
|
+
rb_notimplement();
|
588
|
+
#endif
|
589
|
+
break;
|
590
|
+
}
|
591
|
+
if (id == id_discard_ilseq) {
|
592
|
+
#ifdef ICONV_SET_DISCARD_ILSEQ
|
593
|
+
opt->discard_ilseq = val;
|
594
|
+
#else
|
595
|
+
rb_notimplement();
|
596
|
+
#endif
|
597
|
+
break;
|
598
|
+
}
|
599
|
+
}
|
600
|
+
else {
|
601
|
+
const char *s = StringValueCStr(name);
|
602
|
+
if (strcmp(s, "transliterate") == 0) {
|
603
|
+
#ifdef ICONV_SET_TRANSLITERATE
|
604
|
+
opt->transliterate = val;
|
605
|
+
#else
|
606
|
+
rb_notimplement();
|
607
|
+
#endif
|
608
|
+
break;
|
609
|
+
}
|
610
|
+
if (strcmp(s, "discard_ilseq") == 0) {
|
611
|
+
#ifdef ICONV_SET_DISCARD_ILSEQ
|
612
|
+
opt->discard_ilseq = val;
|
613
|
+
#else
|
614
|
+
rb_notimplement();
|
615
|
+
#endif
|
616
|
+
break;
|
617
|
+
}
|
618
|
+
}
|
619
|
+
name = rb_inspect(name);
|
620
|
+
rb_raise(rb_eArgError, "unknown option - %s", StringValueCStr(name));
|
621
|
+
} while (0);
|
622
|
+
return Qnil;
|
623
|
+
}
|
624
|
+
|
625
|
+
static void
|
626
|
+
get_iconv_opt(struct rb_iconv_opt_t *opt, VALUE options)
|
627
|
+
{
|
628
|
+
opt->transliterate = Qundef;
|
629
|
+
opt->discard_ilseq = Qundef;
|
630
|
+
if (!NIL_P(options)) {
|
631
|
+
rb_block_call(options, rb_intern("each"), 0, 0, get_iconv_opt_i, (VALUE)opt);
|
632
|
+
}
|
633
|
+
}
|
634
|
+
|
635
|
+
#define iconv_ctl(self, func, val) (\
|
636
|
+
iconvctl(VALUE2ICONV(check_iconv(self)), func, (void *)&(val)) ? \
|
637
|
+
rb_sys_fail(#func) : (void)0)
|
638
|
+
|
532
639
|
/*
|
533
640
|
* Document-method: new
|
534
|
-
* call-seq: Iconv.new(to, from)
|
641
|
+
* call-seq: Iconv.new(to, from, [options])
|
535
642
|
*
|
536
643
|
* Creates new code converter from a coding-system designated with +from+
|
537
644
|
* to another one designated with +to+.
|
538
|
-
*
|
645
|
+
*
|
539
646
|
* === Parameters
|
540
647
|
*
|
541
648
|
* +to+:: encoding name for destination
|
542
649
|
* +from+:: encoding name for source
|
650
|
+
* +options+:: options for converter
|
543
651
|
*
|
544
652
|
* === Exceptions
|
545
653
|
*
|
@@ -548,19 +656,18 @@ iconv_s_allocate
|
|
548
656
|
* SystemCallError:: if <tt>iconv_open(3)</tt> fails
|
549
657
|
*/
|
550
658
|
static VALUE
|
551
|
-
iconv_initialize
|
552
|
-
#ifdef HAVE_PROTOTYPES
|
553
|
-
(VALUE self, VALUE to, VALUE from)
|
554
|
-
#else /* HAVE_PROTOTYPES */
|
555
|
-
(self, to, from)
|
556
|
-
VALUE self;
|
557
|
-
VALUE to;
|
558
|
-
VALUE from;
|
559
|
-
#endif /* HAVE_PROTOTYPES */
|
659
|
+
iconv_initialize(int argc, VALUE *argv, VALUE self)
|
560
660
|
{
|
661
|
+
VALUE to, from, options;
|
662
|
+
struct rb_iconv_opt_t opt;
|
663
|
+
int idx;
|
664
|
+
|
665
|
+
rb_scan_args(argc, argv, "21", &to, &from, &options);
|
666
|
+
get_iconv_opt(&opt, options);
|
561
667
|
iconv_free(check_iconv(self));
|
562
668
|
DATA_PTR(self) = NULL;
|
563
|
-
DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from));
|
669
|
+
DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from, &opt, &idx));
|
670
|
+
if (idx >= 0) ENCODING_SET(self, idx);
|
564
671
|
return self;
|
565
672
|
}
|
566
673
|
|
@@ -573,19 +680,19 @@ iconv_initialize
|
|
573
680
|
* returned from the block.
|
574
681
|
*/
|
575
682
|
static VALUE
|
576
|
-
iconv_s_open
|
577
|
-
#ifdef HAVE_PROTOTYPES
|
578
|
-
(VALUE self, VALUE to, VALUE from)
|
579
|
-
#else /* HAVE_PROTOTYPES */
|
580
|
-
(self, to, from)
|
581
|
-
VALUE self;
|
582
|
-
VALUE to;
|
583
|
-
VALUE from;
|
584
|
-
#endif /* HAVE_PROTOTYPES */
|
683
|
+
iconv_s_open(int argc, VALUE *argv, VALUE self)
|
585
684
|
{
|
586
|
-
VALUE
|
685
|
+
VALUE to, from, options, cd;
|
686
|
+
struct rb_iconv_opt_t opt;
|
687
|
+
int idx;
|
688
|
+
|
689
|
+
rb_scan_args(argc, argv, "21", &to, &from, &options);
|
690
|
+
get_iconv_opt(&opt, options);
|
691
|
+
cd = ICONV2VALUE(iconv_create(to, from, &opt, &idx));
|
587
692
|
|
588
693
|
self = Data_Wrap_Struct(self, NULL, ICONV_FREE, (void *)cd);
|
694
|
+
if (idx >= 0) ENCODING_SET(self, idx);
|
695
|
+
|
589
696
|
if (rb_block_given_p()) {
|
590
697
|
return rb_ensure(rb_yield, self, (VALUE(*)())iconv_finish, self);
|
591
698
|
}
|
@@ -595,24 +702,19 @@ iconv_s_open
|
|
595
702
|
}
|
596
703
|
|
597
704
|
static VALUE
|
598
|
-
iconv_s_convert
|
599
|
-
#ifdef HAVE_PROTOTYPES
|
600
|
-
(struct iconv_env_t* env)
|
601
|
-
#else /* HAVE_PROTOTYPES */
|
602
|
-
(env)
|
603
|
-
struct iconv_env_t *env;
|
604
|
-
#endif /* HAVE_PROTOTYPES */
|
705
|
+
iconv_s_convert(struct iconv_env_t* env)
|
605
706
|
{
|
606
707
|
VALUE last = 0;
|
607
708
|
|
608
709
|
for (; env->argc > 0; --env->argc, ++env->argv) {
|
609
|
-
VALUE s = iconv_convert(env->cd, last = *(env->argv),
|
710
|
+
VALUE s = iconv_convert(env->cd, last = *(env->argv),
|
711
|
+
0, -1, env->toidx, env);
|
610
712
|
env->append(env->ret, s);
|
611
713
|
}
|
612
714
|
|
613
715
|
if (!NIL_P(last)) {
|
614
|
-
VALUE s = iconv_convert(env->cd, Qnil, 0, 0, env);
|
615
|
-
if (
|
716
|
+
VALUE s = iconv_convert(env->cd, Qnil, 0, 0, env->toidx, env);
|
717
|
+
if (RSTRING_LEN(s))
|
616
718
|
env->append(env->ret, s);
|
617
719
|
}
|
618
720
|
|
@@ -638,15 +740,7 @@ iconv_s_convert
|
|
638
740
|
* Exceptions thrown by Iconv.new, Iconv.open and Iconv#iconv.
|
639
741
|
*/
|
640
742
|
static VALUE
|
641
|
-
iconv_s_iconv
|
642
|
-
#ifdef HAVE_PROTOTYPES
|
643
|
-
(int argc, VALUE *argv, VALUE self)
|
644
|
-
#else /* HAVE_PROTOTYPES */
|
645
|
-
(argc, argv, self)
|
646
|
-
int argc;
|
647
|
-
VALUE *argv;
|
648
|
-
VALUE self;
|
649
|
-
#endif /* HAVE_PROTOTYPES */
|
743
|
+
iconv_s_iconv(int argc, VALUE *argv, VALUE self)
|
650
744
|
{
|
651
745
|
struct iconv_env_t arg;
|
652
746
|
|
@@ -657,7 +751,7 @@ iconv_s_iconv
|
|
657
751
|
arg.argv = argv + 2;
|
658
752
|
arg.append = rb_ary_push;
|
659
753
|
arg.ret = rb_ary_new2(argc);
|
660
|
-
arg.cd = iconv_create(argv[0], argv[1]);
|
754
|
+
arg.cd = iconv_create(argv[0], argv[1], NULL, &arg.toidx);
|
661
755
|
return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
|
662
756
|
}
|
663
757
|
|
@@ -670,13 +764,7 @@ iconv_s_iconv
|
|
670
764
|
* See Iconv.iconv.
|
671
765
|
*/
|
672
766
|
static VALUE
|
673
|
-
iconv_s_conv
|
674
|
-
#ifdef HAVE_PROTOTYPES
|
675
|
-
(VALUE self, VALUE to, VALUE from, VALUE str)
|
676
|
-
#else /* HAVE_PROTOTYPES */
|
677
|
-
(self, to, from, str)
|
678
|
-
VALUE self, to, from, str;
|
679
|
-
#endif /* HAVE_PROTOTYPES */
|
767
|
+
iconv_s_conv(VALUE self, VALUE to, VALUE from, VALUE str)
|
680
768
|
{
|
681
769
|
struct iconv_env_t arg;
|
682
770
|
|
@@ -684,10 +772,94 @@ iconv_s_conv
|
|
684
772
|
arg.argv = &str;
|
685
773
|
arg.append = rb_str_append;
|
686
774
|
arg.ret = rb_str_new(0, 0);
|
687
|
-
arg.cd = iconv_create(to, from);
|
775
|
+
arg.cd = iconv_create(to, from, NULL, &arg.toidx);
|
688
776
|
return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
|
689
777
|
}
|
690
778
|
|
779
|
+
/*
|
780
|
+
* Document-method: list
|
781
|
+
* call-seq: Iconv.list {|*aliases| ... }
|
782
|
+
*
|
783
|
+
* Iterates each alias sets.
|
784
|
+
*/
|
785
|
+
|
786
|
+
#ifdef HAVE_ICONVLIST
|
787
|
+
struct iconv_name_list
|
788
|
+
{
|
789
|
+
unsigned int namescount;
|
790
|
+
const char *const *names;
|
791
|
+
VALUE array;
|
792
|
+
};
|
793
|
+
|
794
|
+
static VALUE
|
795
|
+
list_iconv_i(VALUE ptr)
|
796
|
+
{
|
797
|
+
struct iconv_name_list *p = (struct iconv_name_list *)ptr;
|
798
|
+
unsigned int i, namescount = p->namescount;
|
799
|
+
const char *const *names = p->names;
|
800
|
+
VALUE ary = rb_ary_new2(namescount);
|
801
|
+
|
802
|
+
for (i = 0; i < namescount; i++) {
|
803
|
+
rb_ary_push(ary, rb_str_new2(names[i]));
|
804
|
+
}
|
805
|
+
if (p->array) {
|
806
|
+
return rb_ary_push(p->array, ary);
|
807
|
+
}
|
808
|
+
return rb_yield(ary);
|
809
|
+
}
|
810
|
+
|
811
|
+
static int
|
812
|
+
list_iconv(unsigned int namescount, const char *const *names, void *data)
|
813
|
+
{
|
814
|
+
int *state = data;
|
815
|
+
struct iconv_name_list list;
|
816
|
+
|
817
|
+
list.namescount = namescount;
|
818
|
+
list.names = names;
|
819
|
+
list.array = ((VALUE *)data)[1];
|
820
|
+
rb_protect(list_iconv_i, (VALUE)&list, state);
|
821
|
+
return *state;
|
822
|
+
}
|
823
|
+
#endif
|
824
|
+
|
825
|
+
#if defined(HAVE_ICONVLIST) || defined(HAVE___ICONV_FREE_LIST)
|
826
|
+
static VALUE
|
827
|
+
iconv_s_list(void)
|
828
|
+
{
|
829
|
+
#ifdef HAVE_ICONVLIST
|
830
|
+
int state;
|
831
|
+
VALUE args[2];
|
832
|
+
|
833
|
+
args[1] = rb_block_given_p() ? 0 : rb_ary_new();
|
834
|
+
iconvlist(list_iconv, args);
|
835
|
+
state = *(int *)args;
|
836
|
+
if (state) rb_jump_tag(state);
|
837
|
+
if (args[1]) return args[1];
|
838
|
+
#elif defined(HAVE___ICONV_FREE_LIST)
|
839
|
+
char **list;
|
840
|
+
size_t sz, i;
|
841
|
+
VALUE ary;
|
842
|
+
|
843
|
+
if (__iconv_get_list(&list, &sz)) return Qnil;
|
844
|
+
|
845
|
+
ary = rb_ary_new2(sz);
|
846
|
+
for (i = 0; i < sz; i++) {
|
847
|
+
rb_ary_push(ary, rb_str_new2(list[i]));
|
848
|
+
}
|
849
|
+
__iconv_free_list(list, sz);
|
850
|
+
|
851
|
+
if (!rb_block_given_p())
|
852
|
+
return ary;
|
853
|
+
for (i = 0; i < RARRAY_LEN(ary); i++) {
|
854
|
+
rb_yield(RARRAY_PTR(ary)[i]);
|
855
|
+
}
|
856
|
+
#endif
|
857
|
+
return Qnil;
|
858
|
+
}
|
859
|
+
#else
|
860
|
+
#define iconv_s_list rb_f_notimplement
|
861
|
+
#endif
|
862
|
+
|
691
863
|
/*
|
692
864
|
* Document-method: close
|
693
865
|
*
|
@@ -700,32 +872,20 @@ iconv_s_conv
|
|
700
872
|
* its initial shift state.
|
701
873
|
*/
|
702
874
|
static VALUE
|
703
|
-
iconv_init_state
|
704
|
-
#ifdef HAVE_PROTOTYPES
|
705
|
-
(VALUE cd)
|
706
|
-
#else /* HAVE_PROTOTYPES */
|
707
|
-
(cd)
|
708
|
-
VALUE cd;
|
709
|
-
#endif /* HAVE_PROTOTYPES */
|
875
|
+
iconv_init_state(VALUE self)
|
710
876
|
{
|
711
|
-
|
877
|
+
iconv_t cd = VALUE2ICONV((VALUE)DATA_PTR(self));
|
878
|
+
DATA_PTR(self) = NULL;
|
879
|
+
return iconv_convert(cd, Qnil, 0, 0, ENCODING_GET(self), NULL);
|
712
880
|
}
|
713
881
|
|
714
882
|
static VALUE
|
715
|
-
iconv_finish
|
716
|
-
#ifdef HAVE_PROTOTYPES
|
717
|
-
(VALUE self)
|
718
|
-
#else /* HAVE_PROTOTYPES */
|
719
|
-
(self)
|
720
|
-
VALUE self;
|
721
|
-
#endif /* HAVE_PROTOTYPES */
|
883
|
+
iconv_finish(VALUE self)
|
722
884
|
{
|
723
885
|
VALUE cd = check_iconv(self);
|
724
886
|
|
725
887
|
if (!cd) return Qnil;
|
726
|
-
|
727
|
-
|
728
|
-
return rb_ensure(iconv_init_state, cd, iconv_free, cd);
|
888
|
+
return rb_ensure(iconv_init_state, self, iconv_free, cd);
|
729
889
|
}
|
730
890
|
|
731
891
|
/*
|
@@ -756,34 +916,186 @@ iconv_finish
|
|
756
916
|
* See the Iconv documentation.
|
757
917
|
*/
|
758
918
|
static VALUE
|
759
|
-
iconv_iconv
|
760
|
-
#ifdef HAVE_PROTOTYPES
|
761
|
-
(int argc, VALUE *argv, VALUE self)
|
762
|
-
#else /* HAVE_PROTOTYPES */
|
763
|
-
(argc, argv, self)
|
764
|
-
int argc;
|
765
|
-
VALUE *argv;
|
766
|
-
VALUE self;
|
767
|
-
#endif /* HAVE_PROTOTYPES */
|
919
|
+
iconv_iconv(int argc, VALUE *argv, VALUE self)
|
768
920
|
{
|
769
921
|
VALUE str, n1, n2;
|
770
922
|
VALUE cd = check_iconv(self);
|
771
923
|
long start = 0, length = 0, slen = 0;
|
772
924
|
|
773
925
|
rb_scan_args(argc, argv, "12", &str, &n1, &n2);
|
774
|
-
if (!NIL_P(str))
|
926
|
+
if (!NIL_P(str)) {
|
927
|
+
VALUE n = rb_str_length(StringValue(str));
|
928
|
+
slen = NUM2LONG(n);
|
929
|
+
}
|
775
930
|
if (argc != 2 || !RTEST(rb_range_beg_len(n1, &start, &length, slen, 0))) {
|
776
931
|
if (NIL_P(n1) || ((start = NUM2LONG(n1)) < 0 ? (start += slen) >= 0 : start < slen)) {
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
932
|
+
length = NIL_P(n2) ? -1 : NUM2LONG(n2);
|
933
|
+
}
|
934
|
+
}
|
935
|
+
if (start > 0 || length > 0) {
|
936
|
+
rb_encoding *enc = rb_enc_get(str);
|
937
|
+
const char *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
|
938
|
+
const char *ps = s;
|
939
|
+
if (start > 0) {
|
940
|
+
start = (ps = rb_enc_nth(s, e, start, enc)) - s;
|
941
|
+
}
|
942
|
+
if (length > 0) {
|
943
|
+
length = rb_enc_nth(ps, e, length, enc) - ps;
|
783
944
|
}
|
784
945
|
}
|
785
946
|
|
786
|
-
return iconv_convert(VALUE2ICONV(cd), str, start, length, NULL);
|
947
|
+
return iconv_convert(VALUE2ICONV(cd), str, start, length, ENCODING_GET(self), NULL);
|
948
|
+
}
|
949
|
+
|
950
|
+
/*
|
951
|
+
* Document-method: conv
|
952
|
+
* call-seq: conv(str...)
|
953
|
+
*
|
954
|
+
* Equivalent to
|
955
|
+
*
|
956
|
+
* iconv(nil, str..., nil).join
|
957
|
+
*/
|
958
|
+
static VALUE
|
959
|
+
iconv_conv(int argc, VALUE *argv, VALUE self)
|
960
|
+
{
|
961
|
+
iconv_t cd = VALUE2ICONV(check_iconv(self));
|
962
|
+
VALUE str, s;
|
963
|
+
int toidx = ENCODING_GET(self);
|
964
|
+
|
965
|
+
str = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
|
966
|
+
if (argc > 0) {
|
967
|
+
do {
|
968
|
+
s = iconv_convert(cd, *argv++, 0, -1, toidx, NULL);
|
969
|
+
if (RSTRING_LEN(s))
|
970
|
+
rb_str_buf_append(str, s);
|
971
|
+
} while (--argc);
|
972
|
+
s = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
|
973
|
+
if (RSTRING_LEN(s))
|
974
|
+
rb_str_buf_append(str, s);
|
975
|
+
}
|
976
|
+
|
977
|
+
return str;
|
978
|
+
}
|
979
|
+
|
980
|
+
#ifdef ICONV_TRIVIALP
|
981
|
+
/*
|
982
|
+
* Document-method: trivial?
|
983
|
+
* call-seq: trivial?
|
984
|
+
*
|
985
|
+
* Returns trivial flag.
|
986
|
+
*/
|
987
|
+
static VALUE
|
988
|
+
iconv_trivialp(VALUE self)
|
989
|
+
{
|
990
|
+
int trivial = 0;
|
991
|
+
iconv_ctl(self, ICONV_TRIVIALP, trivial);
|
992
|
+
if (trivial) return Qtrue;
|
993
|
+
return Qfalse;
|
994
|
+
}
|
995
|
+
#else
|
996
|
+
#define iconv_trivialp rb_f_notimplement
|
997
|
+
#endif
|
998
|
+
|
999
|
+
#ifdef ICONV_GET_TRANSLITERATE
|
1000
|
+
/*
|
1001
|
+
* Document-method: transliterate?
|
1002
|
+
* call-seq: transliterate?
|
1003
|
+
*
|
1004
|
+
* Returns transliterate flag.
|
1005
|
+
*/
|
1006
|
+
static VALUE
|
1007
|
+
iconv_get_transliterate(VALUE self)
|
1008
|
+
{
|
1009
|
+
int trans = 0;
|
1010
|
+
iconv_ctl(self, ICONV_GET_TRANSLITERATE, trans);
|
1011
|
+
if (trans) return Qtrue;
|
1012
|
+
return Qfalse;
|
1013
|
+
}
|
1014
|
+
#else
|
1015
|
+
#define iconv_get_transliterate rb_f_notimplement
|
1016
|
+
#endif
|
1017
|
+
|
1018
|
+
#ifdef ICONV_SET_TRANSLITERATE
|
1019
|
+
/*
|
1020
|
+
* Document-method: transliterate=
|
1021
|
+
* call-seq: cd.transliterate = flag
|
1022
|
+
*
|
1023
|
+
* Sets transliterate flag.
|
1024
|
+
*/
|
1025
|
+
static VALUE
|
1026
|
+
iconv_set_transliterate(VALUE self, VALUE transliterate)
|
1027
|
+
{
|
1028
|
+
int trans = RTEST(transliterate);
|
1029
|
+
iconv_ctl(self, ICONV_SET_TRANSLITERATE, trans);
|
1030
|
+
return self;
|
1031
|
+
}
|
1032
|
+
#else
|
1033
|
+
#define iconv_set_transliterate rb_f_notimplement
|
1034
|
+
#endif
|
1035
|
+
|
1036
|
+
#ifdef ICONV_GET_DISCARD_ILSEQ
|
1037
|
+
/*
|
1038
|
+
* Document-method: discard_ilseq?
|
1039
|
+
* call-seq: discard_ilseq?
|
1040
|
+
*
|
1041
|
+
* Returns discard_ilseq flag.
|
1042
|
+
*/
|
1043
|
+
static VALUE
|
1044
|
+
iconv_get_discard_ilseq(VALUE self)
|
1045
|
+
{
|
1046
|
+
int dis = 0;
|
1047
|
+
iconv_ctl(self, ICONV_GET_DISCARD_ILSEQ, dis);
|
1048
|
+
if (dis) return Qtrue;
|
1049
|
+
return Qfalse;
|
1050
|
+
}
|
1051
|
+
#else
|
1052
|
+
#define iconv_get_discard_ilseq rb_f_notimplement
|
1053
|
+
#endif
|
1054
|
+
|
1055
|
+
#ifdef ICONV_SET_DISCARD_ILSEQ
|
1056
|
+
/*
|
1057
|
+
* Document-method: discard_ilseq=
|
1058
|
+
* call-seq: cd.discard_ilseq = flag
|
1059
|
+
*
|
1060
|
+
* Sets discard_ilseq flag.
|
1061
|
+
*/
|
1062
|
+
static VALUE
|
1063
|
+
iconv_set_discard_ilseq(VALUE self, VALUE discard_ilseq)
|
1064
|
+
{
|
1065
|
+
int dis = RTEST(discard_ilseq);
|
1066
|
+
iconv_ctl(self, ICONV_SET_DISCARD_ILSEQ, dis);
|
1067
|
+
return self;
|
1068
|
+
}
|
1069
|
+
#else
|
1070
|
+
#define iconv_set_discard_ilseq rb_f_notimplement
|
1071
|
+
#endif
|
1072
|
+
|
1073
|
+
/*
|
1074
|
+
* Document-method: ctlmethods
|
1075
|
+
* call-seq: Iconv.ctlmethods => array
|
1076
|
+
*
|
1077
|
+
* Returns available iconvctl() method list.
|
1078
|
+
*/
|
1079
|
+
static VALUE
|
1080
|
+
iconv_s_ctlmethods(VALUE klass)
|
1081
|
+
{
|
1082
|
+
VALUE ary = rb_ary_new();
|
1083
|
+
#ifdef ICONV_TRIVIALP
|
1084
|
+
rb_ary_push(ary, ID2SYM(rb_intern("trivial?")));
|
1085
|
+
#endif
|
1086
|
+
#ifdef ICONV_GET_TRANSLITERATE
|
1087
|
+
rb_ary_push(ary, ID2SYM(rb_intern("transliterate?")));
|
1088
|
+
#endif
|
1089
|
+
#ifdef ICONV_SET_TRANSLITERATE
|
1090
|
+
rb_ary_push(ary, ID2SYM(rb_intern("transliterate=")));
|
1091
|
+
#endif
|
1092
|
+
#ifdef ICONV_GET_DISCARD_ILSEQ
|
1093
|
+
rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq?")));
|
1094
|
+
#endif
|
1095
|
+
#ifdef ICONV_SET_DISCARD_ILSEQ
|
1096
|
+
rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq=")));
|
1097
|
+
#endif
|
1098
|
+
return ary;
|
787
1099
|
}
|
788
1100
|
|
789
1101
|
/*
|
@@ -802,13 +1114,7 @@ iconv_iconv
|
|
802
1114
|
* failure and the last element is string on the way.
|
803
1115
|
*/
|
804
1116
|
static VALUE
|
805
|
-
iconv_failure_success
|
806
|
-
#ifdef HAVE_PROTOTYPES
|
807
|
-
(VALUE self)
|
808
|
-
#else /* HAVE_PROTOTYPES */
|
809
|
-
(self)
|
810
|
-
VALUE self;
|
811
|
-
#endif /* HAVE_PROTOTYPES */
|
1117
|
+
iconv_failure_success(VALUE self)
|
812
1118
|
{
|
813
1119
|
return rb_attr_get(self, rb_success);
|
814
1120
|
}
|
@@ -818,16 +1124,10 @@ iconv_failure_success
|
|
818
1124
|
* call-seq: failed
|
819
1125
|
*
|
820
1126
|
* Returns substring of the original string passed to Iconv that starts at the
|
821
|
-
* character caused the exception.
|
1127
|
+
* character caused the exception.
|
822
1128
|
*/
|
823
1129
|
static VALUE
|
824
|
-
iconv_failure_failed
|
825
|
-
#ifdef HAVE_PROTOTYPES
|
826
|
-
(VALUE self)
|
827
|
-
#else /* HAVE_PROTOTYPES */
|
828
|
-
(self)
|
829
|
-
VALUE self;
|
830
|
-
#endif /* HAVE_PROTOTYPES */
|
1130
|
+
iconv_failure_failed(VALUE self)
|
831
1131
|
{
|
832
1132
|
return rb_attr_get(self, rb_failed);
|
833
1133
|
}
|
@@ -839,13 +1139,7 @@ iconv_failure_failed
|
|
839
1139
|
* Returns inspected string like as: #<_class_: _success_, _failed_>
|
840
1140
|
*/
|
841
1141
|
static VALUE
|
842
|
-
iconv_failure_inspect
|
843
|
-
#ifdef HAVE_PROTOTYPES
|
844
|
-
(VALUE self)
|
845
|
-
#else /* HAVE_PROTOTYPES */
|
846
|
-
(self)
|
847
|
-
VALUE self;
|
848
|
-
#endif /* HAVE_PROTOTYPES */
|
1142
|
+
iconv_failure_inspect(VALUE self)
|
849
1143
|
{
|
850
1144
|
const char *cname = rb_class2name(CLASS_OF(self));
|
851
1145
|
VALUE success = rb_attr_get(self, rb_success);
|
@@ -860,13 +1154,13 @@ iconv_failure_inspect
|
|
860
1154
|
|
861
1155
|
/*
|
862
1156
|
* Document-class: Iconv::InvalidEncoding
|
863
|
-
*
|
1157
|
+
*
|
864
1158
|
* Requested coding-system is not available on this system.
|
865
1159
|
*/
|
866
1160
|
|
867
1161
|
/*
|
868
1162
|
* Document-class: Iconv::IllegalSequence
|
869
|
-
*
|
1163
|
+
*
|
870
1164
|
* Input conversion stopped due to an input byte that does not belong to
|
871
1165
|
* the input codeset, or the output codeset does not contain the
|
872
1166
|
* character.
|
@@ -874,36 +1168,44 @@ iconv_failure_inspect
|
|
874
1168
|
|
875
1169
|
/*
|
876
1170
|
* Document-class: Iconv::InvalidCharacter
|
877
|
-
*
|
1171
|
+
*
|
878
1172
|
* Input conversion stopped due to an incomplete character or shift
|
879
1173
|
* sequence at the end of the input buffer.
|
880
1174
|
*/
|
881
1175
|
|
882
1176
|
/*
|
883
1177
|
* Document-class: Iconv::OutOfRange
|
884
|
-
*
|
1178
|
+
*
|
885
1179
|
* Iconv library internal error. Must not occur.
|
886
1180
|
*/
|
887
1181
|
|
888
1182
|
/*
|
889
1183
|
* Document-class: Iconv::BrokenLibrary
|
890
|
-
*
|
1184
|
+
*
|
891
1185
|
* Detected a bug of underlying iconv(3) libray.
|
892
1186
|
* * returns an error without setting errno properly
|
893
1187
|
*/
|
894
1188
|
|
895
1189
|
void
|
896
|
-
Init_iconv
|
1190
|
+
Init_iconv(void)
|
897
1191
|
{
|
898
1192
|
VALUE rb_cIconv = rb_define_class("Iconv", rb_cData);
|
899
1193
|
|
900
1194
|
rb_define_alloc_func(rb_cIconv, iconv_s_allocate);
|
901
|
-
rb_define_singleton_method(rb_cIconv, "open", iconv_s_open,
|
1195
|
+
rb_define_singleton_method(rb_cIconv, "open", iconv_s_open, -1);
|
902
1196
|
rb_define_singleton_method(rb_cIconv, "iconv", iconv_s_iconv, -1);
|
903
1197
|
rb_define_singleton_method(rb_cIconv, "conv", iconv_s_conv, 3);
|
904
|
-
|
1198
|
+
rb_define_singleton_method(rb_cIconv, "list", iconv_s_list, 0);
|
1199
|
+
rb_define_singleton_method(rb_cIconv, "ctlmethods", iconv_s_ctlmethods, 0);
|
1200
|
+
rb_define_method(rb_cIconv, "initialize", iconv_initialize, -1);
|
905
1201
|
rb_define_method(rb_cIconv, "close", iconv_finish, 0);
|
906
1202
|
rb_define_method(rb_cIconv, "iconv", iconv_iconv, -1);
|
1203
|
+
rb_define_method(rb_cIconv, "conv", iconv_conv, -1);
|
1204
|
+
rb_define_method(rb_cIconv, "trivial?", iconv_trivialp, 0);
|
1205
|
+
rb_define_method(rb_cIconv, "transliterate?", iconv_get_transliterate, 0);
|
1206
|
+
rb_define_method(rb_cIconv, "transliterate=", iconv_set_transliterate, 1);
|
1207
|
+
rb_define_method(rb_cIconv, "discard_ilseq?", iconv_get_discard_ilseq, 0);
|
1208
|
+
rb_define_method(rb_cIconv, "discard_ilseq=", iconv_set_discard_ilseq, 1);
|
907
1209
|
|
908
1210
|
rb_eIconvFailure = rb_define_module_under(rb_cIconv, "Failure");
|
909
1211
|
rb_define_method(rb_eIconvFailure, "initialize", iconv_failure_initialize, 3);
|
@@ -924,8 +1226,11 @@ Init_iconv _((void))
|
|
924
1226
|
|
925
1227
|
rb_success = rb_intern("success");
|
926
1228
|
rb_failed = rb_intern("failed");
|
1229
|
+
id_transliterate = rb_intern("transliterate");
|
1230
|
+
id_discard_ilseq = rb_intern("discard_ilseq");
|
927
1231
|
|
928
1232
|
rb_gc_register_address(&charset_map);
|
929
1233
|
charset_map = rb_hash_new();
|
930
1234
|
rb_define_singleton_method(rb_cIconv, "charset_map", charset_map_get, 0);
|
931
1235
|
}
|
1236
|
+
|