iconv 0.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/BSDL +22 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +56 -0
- data/README.md +75 -0
- data/Rakefile +38 -0
- data/ext/iconv/depend +2 -0
- data/ext/iconv/extconf.rb +5 -2
- data/ext/iconv/iconv.c +578 -273
- data/iconv.gemspec +20 -0
- data/lib/iconv/version.rb +3 -0
- data/lib/iconv.rb +6 -0
- data/test/test_basic.rb +59 -0
- data/test/test_option.rb +43 -0
- data/test/test_partial.rb +41 -0
- data/test/utils.rb +23 -0
- metadata +46 -46
data/ext/iconv/iconv.c
CHANGED
@@ -4,7 +4,6 @@
|
|
4
4
|
iconv.c -
|
5
5
|
|
6
6
|
$Author$
|
7
|
-
$Date$
|
8
7
|
created at: Wed Dec 1 20:28:09 JST 1999
|
9
8
|
|
10
9
|
All the files in this distribution are covered under the Ruby's
|
@@ -14,12 +13,12 @@
|
|
14
13
|
|
15
14
|
**********************************************************************/
|
16
15
|
|
17
|
-
#include "ruby.h"
|
16
|
+
#include "ruby/ruby.h"
|
18
17
|
#include <errno.h>
|
19
18
|
#include <iconv.h>
|
20
19
|
#include <assert.h>
|
21
|
-
#include "st.h"
|
22
|
-
#include "
|
20
|
+
#include "ruby/st.h"
|
21
|
+
#include "ruby/encoding.h"
|
23
22
|
|
24
23
|
/*
|
25
24
|
* Document-class: Iconv
|
@@ -27,20 +26,20 @@
|
|
27
26
|
* == Summary
|
28
27
|
*
|
29
28
|
* Ruby extension for charset conversion.
|
30
|
-
*
|
29
|
+
*
|
31
30
|
* == Abstract
|
32
31
|
*
|
33
32
|
* Iconv is a wrapper class for the UNIX 95 <tt>iconv()</tt> function family,
|
34
33
|
* which translates string between various encoding systems.
|
35
|
-
*
|
34
|
+
*
|
36
35
|
* See Open Group's on-line documents for more details.
|
37
36
|
* * <tt>iconv.h</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv.h.html
|
38
37
|
* * <tt>iconv_open()</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv_open.html
|
39
38
|
* * <tt>iconv()</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv.html
|
40
39
|
* * <tt>iconv_close()</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv_close.html
|
41
|
-
*
|
40
|
+
*
|
42
41
|
* Which coding systems are available is platform-dependent.
|
43
|
-
*
|
42
|
+
*
|
44
43
|
* == Examples
|
45
44
|
*
|
46
45
|
* 1. Simple conversion between two charsets.
|
@@ -67,6 +66,12 @@
|
|
67
66
|
* 4. Shorthand for (3).
|
68
67
|
*
|
69
68
|
* Iconv.iconv(to, from, *input.to_a)
|
69
|
+
*
|
70
|
+
* == Attentions
|
71
|
+
*
|
72
|
+
* Even if some extentions of implementation dependent are useful,
|
73
|
+
* DON'T USE those extentions in libraries and scripts to widely distribute.
|
74
|
+
* If you want to use those feature, use String#encode.
|
70
75
|
*/
|
71
76
|
|
72
77
|
/* Invalid value for iconv_t is -1 but 0 for VALUE, I hope VALUE is
|
@@ -80,9 +85,18 @@ struct iconv_env_t
|
|
80
85
|
int argc;
|
81
86
|
VALUE *argv;
|
82
87
|
VALUE ret;
|
88
|
+
int toidx;
|
83
89
|
VALUE (*append)_((VALUE, VALUE));
|
84
90
|
};
|
85
91
|
|
92
|
+
struct rb_iconv_opt_t
|
93
|
+
{
|
94
|
+
VALUE transliterate;
|
95
|
+
VALUE discard_ilseq;
|
96
|
+
};
|
97
|
+
|
98
|
+
static ID id_transliterate, id_discard_ilseq;
|
99
|
+
|
86
100
|
static VALUE rb_eIconvInvalidEncoding;
|
87
101
|
static VALUE rb_eIconvFailure;
|
88
102
|
static VALUE rb_eIconvIllegalSeq;
|
@@ -91,26 +105,28 @@ static VALUE rb_eIconvOutOfRange;
|
|
91
105
|
static VALUE rb_eIconvBrokenLibrary;
|
92
106
|
|
93
107
|
static ID rb_success, rb_failed;
|
94
|
-
static VALUE iconv_fail _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env,
|
95
|
-
static VALUE iconv_fail_retry _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env,
|
108
|
+
static VALUE iconv_fail _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, VALUE mesg));
|
109
|
+
static VALUE iconv_fail_retry _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, VALUE mesg));
|
96
110
|
static VALUE iconv_failure_initialize _((VALUE error, VALUE mesg, VALUE success, VALUE failed));
|
97
111
|
static VALUE iconv_failure_success _((VALUE self));
|
98
112
|
static VALUE iconv_failure_failed _((VALUE self));
|
99
113
|
|
100
|
-
static iconv_t iconv_create _((VALUE to, VALUE from));
|
114
|
+
static iconv_t iconv_create _((VALUE to, VALUE from, struct rb_iconv_opt_t *opt, int *idx));
|
101
115
|
static void iconv_dfree _((void *cd));
|
102
116
|
static VALUE iconv_free _((VALUE cd));
|
103
117
|
static VALUE iconv_try _((iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen));
|
104
|
-
static VALUE rb_str_derive _((VALUE str, const char* ptr,
|
105
|
-
static VALUE iconv_convert _((iconv_t cd, VALUE str, long start, long length,
|
118
|
+
static VALUE rb_str_derive _((VALUE str, const char* ptr, long len));
|
119
|
+
static VALUE iconv_convert _((iconv_t cd, VALUE str, long start, long length, int toidx,
|
120
|
+
struct iconv_env_t* env));
|
106
121
|
static VALUE iconv_s_allocate _((VALUE klass));
|
107
|
-
static VALUE iconv_initialize _((
|
108
|
-
static VALUE iconv_s_open _((
|
122
|
+
static VALUE iconv_initialize _((int argc, VALUE *argv, VALUE self));
|
123
|
+
static VALUE iconv_s_open _((int argc, VALUE *argv, VALUE self));
|
109
124
|
static VALUE iconv_s_convert _((struct iconv_env_t* env));
|
110
125
|
static VALUE iconv_s_iconv _((int argc, VALUE *argv, VALUE self));
|
111
126
|
static VALUE iconv_init_state _((VALUE cd));
|
112
127
|
static VALUE iconv_finish _((VALUE self));
|
113
128
|
static VALUE iconv_iconv _((int argc, VALUE *argv, VALUE self));
|
129
|
+
static VALUE iconv_conv _((int argc, VALUE *argv, VALUE self));
|
114
130
|
|
115
131
|
static VALUE charset_map;
|
116
132
|
|
@@ -120,94 +136,162 @@ static VALUE charset_map;
|
|
120
136
|
*
|
121
137
|
* Returns the map from canonical name to system dependent name.
|
122
138
|
*/
|
123
|
-
static VALUE
|
139
|
+
static VALUE
|
140
|
+
charset_map_get(void)
|
124
141
|
{
|
125
142
|
return charset_map;
|
126
143
|
}
|
127
144
|
|
145
|
+
static VALUE
|
146
|
+
strip_glibc_option(VALUE *code)
|
147
|
+
{
|
148
|
+
VALUE val = StringValue(*code);
|
149
|
+
const char *ptr = RSTRING_PTR(val), *pend = RSTRING_END(val);
|
150
|
+
const char *slash = memchr(ptr, '/', pend - ptr);
|
151
|
+
|
152
|
+
if (slash && slash < pend - 1 && slash[1] == '/') {
|
153
|
+
VALUE opt = rb_str_subseq(val, slash - ptr, pend - slash);
|
154
|
+
val = rb_str_subseq(val, 0, slash - ptr);
|
155
|
+
*code = val;
|
156
|
+
return opt;
|
157
|
+
}
|
158
|
+
return 0;
|
159
|
+
}
|
160
|
+
|
128
161
|
static char *
|
129
|
-
map_charset
|
130
|
-
#ifdef HAVE_PROTOTYPES
|
131
|
-
(VALUE *code)
|
132
|
-
#else /* HAVE_PROTOTYPES */
|
133
|
-
(code)
|
134
|
-
VALUE *code;
|
135
|
-
#endif /* HAVE_PROTOTYPES */
|
162
|
+
map_charset(VALUE *code)
|
136
163
|
{
|
137
|
-
VALUE val = *code;
|
164
|
+
VALUE val = StringValue(*code);
|
138
165
|
|
139
|
-
if (
|
166
|
+
if (RHASH_SIZE(charset_map)) {
|
167
|
+
st_data_t data;
|
140
168
|
VALUE key = rb_funcall2(val, rb_intern("downcase"), 0, 0);
|
141
169
|
StringValuePtr(key);
|
142
|
-
if (st_lookup(
|
143
|
-
*code =
|
170
|
+
if (st_lookup(RHASH_TBL(charset_map), key, &data)) {
|
171
|
+
*code = (VALUE)data;
|
144
172
|
}
|
145
173
|
}
|
146
174
|
return StringValuePtr(*code);
|
147
175
|
}
|
148
176
|
|
149
|
-
NORETURN(static void
|
177
|
+
NORETURN(static void rb_iconv_sys_fail_str(VALUE msg));
|
150
178
|
static void
|
151
|
-
|
179
|
+
rb_iconv_sys_fail_str(VALUE msg)
|
152
180
|
{
|
153
181
|
if (errno == 0) {
|
154
|
-
rb_exc_raise(iconv_fail(rb_eIconvBrokenLibrary, Qnil, Qnil, NULL,
|
182
|
+
rb_exc_raise(iconv_fail(rb_eIconvBrokenLibrary, Qnil, Qnil, NULL, msg));
|
155
183
|
}
|
156
|
-
|
184
|
+
rb_sys_fail_str(msg);
|
185
|
+
}
|
186
|
+
|
187
|
+
#define rb_sys_fail_str(s) rb_iconv_sys_fail_str(s)
|
188
|
+
|
189
|
+
NORETURN(static void rb_iconv_sys_fail(const char *s));
|
190
|
+
static void
|
191
|
+
rb_iconv_sys_fail(const char *s)
|
192
|
+
{
|
193
|
+
rb_iconv_sys_fail_str(rb_str_new_cstr(s));
|
157
194
|
}
|
158
195
|
|
159
196
|
#define rb_sys_fail(s) rb_iconv_sys_fail(s)
|
160
197
|
|
161
198
|
static iconv_t
|
162
|
-
iconv_create
|
163
|
-
#ifdef HAVE_PROTOTYPES
|
164
|
-
(VALUE to, VALUE from)
|
165
|
-
#else /* HAVE_PROTOTYPES */
|
166
|
-
(to, from)
|
167
|
-
VALUE to;
|
168
|
-
VALUE from;
|
169
|
-
#endif /* HAVE_PROTOTYPES */
|
199
|
+
iconv_create(VALUE to, VALUE from, struct rb_iconv_opt_t *opt, int *idx)
|
170
200
|
{
|
201
|
+
VALUE toopt = strip_glibc_option(&to);
|
202
|
+
VALUE fromopt = strip_glibc_option(&from);
|
203
|
+
VALUE toenc = 0, fromenc = 0;
|
171
204
|
const char* tocode = map_charset(&to);
|
172
205
|
const char* fromcode = map_charset(&from);
|
206
|
+
iconv_t cd;
|
207
|
+
int retry = 0;
|
173
208
|
|
174
|
-
|
209
|
+
*idx = rb_enc_find_index(tocode);
|
175
210
|
|
176
|
-
if (
|
211
|
+
if (toopt) {
|
212
|
+
toenc = rb_str_plus(to, toopt);
|
213
|
+
tocode = RSTRING_PTR(toenc);
|
214
|
+
}
|
215
|
+
if (fromopt) {
|
216
|
+
fromenc = rb_str_plus(from, fromopt);
|
217
|
+
fromcode = RSTRING_PTR(fromenc);
|
218
|
+
}
|
219
|
+
while ((cd = iconv_open(tocode, fromcode)) == (iconv_t)-1) {
|
220
|
+
int inval = 0;
|
177
221
|
switch (errno) {
|
178
222
|
case EMFILE:
|
179
223
|
case ENFILE:
|
180
224
|
case ENOMEM:
|
181
|
-
|
182
|
-
|
225
|
+
if (!retry++) {
|
226
|
+
rb_gc();
|
227
|
+
continue;
|
228
|
+
}
|
229
|
+
break;
|
230
|
+
case EINVAL:
|
231
|
+
retry = 0;
|
232
|
+
inval = 1;
|
233
|
+
if (toenc) {
|
234
|
+
tocode = RSTRING_PTR(to);
|
235
|
+
rb_str_resize(toenc, 0);
|
236
|
+
toenc = 0;
|
237
|
+
continue;
|
238
|
+
}
|
239
|
+
if (fromenc) {
|
240
|
+
fromcode = RSTRING_PTR(from);
|
241
|
+
rb_str_resize(fromenc, 0);
|
242
|
+
fromenc = 0;
|
243
|
+
continue;
|
244
|
+
}
|
245
|
+
break;
|
183
246
|
}
|
184
|
-
|
185
|
-
int inval = errno == EINVAL;
|
247
|
+
{
|
186
248
|
const char *s = inval ? "invalid encoding " : "iconv";
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
sprintf(RSTRING(msg)->ptr, "%s(\"%s\", \"%s\")",
|
191
|
-
s, RSTRING(to)->ptr, RSTRING(from)->ptr);
|
192
|
-
s = RSTRING(msg)->ptr;
|
193
|
-
RSTRING(msg)->len = strlen(s);
|
194
|
-
if (!inval) rb_sys_fail(s);
|
249
|
+
VALUE msg = rb_sprintf("%s(\"%s\", \"%s\")",
|
250
|
+
s, RSTRING_PTR(to), RSTRING_PTR(from));
|
251
|
+
if (!inval) rb_sys_fail_str(msg);
|
195
252
|
rb_exc_raise(iconv_fail(rb_eIconvInvalidEncoding, Qnil,
|
196
|
-
rb_ary_new3(2, to, from), NULL,
|
253
|
+
rb_ary_new3(2, to, from), NULL, msg));
|
254
|
+
}
|
255
|
+
}
|
256
|
+
|
257
|
+
if (toopt || fromopt) {
|
258
|
+
if (toopt && fromopt && RTEST(rb_str_equal(toopt, fromopt))) {
|
259
|
+
fromopt = 0;
|
260
|
+
}
|
261
|
+
if (toopt && fromopt) {
|
262
|
+
rb_warning("encoding option isn't portable: %s, %s",
|
263
|
+
RSTRING_PTR(toopt) + 2, RSTRING_PTR(fromopt) + 2);
|
264
|
+
}
|
265
|
+
else {
|
266
|
+
rb_warning("encoding option isn't portable: %s",
|
267
|
+
(toopt ? RSTRING_PTR(toopt) : RSTRING_PTR(fromopt)) + 2);
|
268
|
+
}
|
269
|
+
}
|
270
|
+
|
271
|
+
if (opt) {
|
272
|
+
#ifdef ICONV_SET_TRANSLITERATE
|
273
|
+
if (opt->transliterate != Qundef) {
|
274
|
+
int flag = RTEST(opt->transliterate);
|
275
|
+
rb_warning("encoding option isn't portable: transliterate");
|
276
|
+
if (iconvctl(cd, ICONV_SET_TRANSLITERATE, (void *)&flag))
|
277
|
+
rb_sys_fail("ICONV_SET_TRANSLITERATE");
|
197
278
|
}
|
279
|
+
#endif
|
280
|
+
#ifdef ICONV_SET_DISCARD_ILSEQ
|
281
|
+
if (opt->discard_ilseq != Qundef) {
|
282
|
+
int flag = RTEST(opt->discard_ilseq);
|
283
|
+
rb_warning("encoding option isn't portable: discard_ilseq");
|
284
|
+
if (iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, (void *)&flag))
|
285
|
+
rb_sys_fail("ICONV_SET_DISCARD_ILSEQ");
|
286
|
+
}
|
287
|
+
#endif
|
198
288
|
}
|
199
289
|
|
200
290
|
return cd;
|
201
291
|
}
|
202
292
|
|
203
293
|
static void
|
204
|
-
iconv_dfree
|
205
|
-
#ifdef HAVE_PROTOTYPES
|
206
|
-
(void *cd)
|
207
|
-
#else /* HAVE_PROTOTYPES */
|
208
|
-
(cd)
|
209
|
-
void *cd;
|
210
|
-
#endif /* HAVE_PROTOTYPES */
|
294
|
+
iconv_dfree(void *cd)
|
211
295
|
{
|
212
296
|
iconv_close(VALUE2ICONV(cd));
|
213
297
|
}
|
@@ -215,13 +299,7 @@ iconv_dfree
|
|
215
299
|
#define ICONV_FREE iconv_dfree
|
216
300
|
|
217
301
|
static VALUE
|
218
|
-
iconv_free
|
219
|
-
#ifdef HAVE_PROTOTYPES
|
220
|
-
(VALUE cd)
|
221
|
-
#else /* HAVE_PROTOTYPES */
|
222
|
-
(cd)
|
223
|
-
VALUE cd;
|
224
|
-
#endif /* HAVE_PROTOTYPES */
|
302
|
+
iconv_free(VALUE cd)
|
225
303
|
{
|
226
304
|
if (cd && iconv_close(VALUE2ICONV(cd)) == -1)
|
227
305
|
rb_sys_fail("iconv_close");
|
@@ -229,13 +307,7 @@ iconv_free
|
|
229
307
|
}
|
230
308
|
|
231
309
|
static VALUE
|
232
|
-
check_iconv
|
233
|
-
#ifdef HAVE_PROTOTYPES
|
234
|
-
(VALUE obj)
|
235
|
-
#else /* HAVE_PROTOTYPES */
|
236
|
-
(obj)
|
237
|
-
VALUE obj;
|
238
|
-
#endif /* HAVE_PROTOTYPES */
|
310
|
+
check_iconv(VALUE obj)
|
239
311
|
{
|
240
312
|
Check_Type(obj, T_DATA);
|
241
313
|
if (RDATA(obj)->dfree != ICONV_FREE) {
|
@@ -245,17 +317,7 @@ check_iconv
|
|
245
317
|
}
|
246
318
|
|
247
319
|
static VALUE
|
248
|
-
iconv_try
|
249
|
-
#ifdef HAVE_PROTOTYPES
|
250
|
-
(iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen)
|
251
|
-
#else /* HAVE_PROTOTYPES */
|
252
|
-
(cd, inptr, inlen, outptr, outlen)
|
253
|
-
iconv_t cd;
|
254
|
-
const char **inptr;
|
255
|
-
size_t *inlen;
|
256
|
-
char **outptr;
|
257
|
-
size_t *outlen;
|
258
|
-
#endif /* HAVE_PROTOTYPES */
|
320
|
+
iconv_try(iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen)
|
259
321
|
{
|
260
322
|
#ifdef ICONV_INPTR_CONST
|
261
323
|
#define ICONV_INPTR_CAST
|
@@ -295,13 +357,8 @@ iconv_try
|
|
295
357
|
|
296
358
|
#define FAILED_MAXLEN 16
|
297
359
|
|
298
|
-
static VALUE
|
299
|
-
|
300
|
-
(VALUE error, VALUE mesg, VALUE success, VALUE failed)
|
301
|
-
#else /* HAVE_PROTOTYPES */
|
302
|
-
(error, mesg, success, failed)
|
303
|
-
VALUE error, mesg, success, failed;
|
304
|
-
#endif /* HAVE_PROTOTYPES */
|
360
|
+
static VALUE
|
361
|
+
iconv_failure_initialize(VALUE error, VALUE mesg, VALUE success, VALUE failed)
|
305
362
|
{
|
306
363
|
rb_call_super(1, &mesg);
|
307
364
|
rb_ivar_set(error, rb_success, success);
|
@@ -310,22 +367,14 @@ static VALUE iconv_failure_initialize
|
|
310
367
|
}
|
311
368
|
|
312
369
|
static VALUE
|
313
|
-
iconv_fail
|
314
|
-
#ifdef HAVE_PROTOTYPES
|
315
|
-
(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg)
|
316
|
-
#else /* HAVE_PROTOTYPES */
|
317
|
-
(error, success, failed, env, mesg)
|
318
|
-
VALUE error, success, failed;
|
319
|
-
struct iconv_env_t *env;
|
320
|
-
const char *mesg;
|
321
|
-
#endif /* HAVE_PROTOTYPES */
|
370
|
+
iconv_fail(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, VALUE mesg)
|
322
371
|
{
|
323
372
|
VALUE args[3];
|
324
373
|
|
325
|
-
if (mesg
|
326
|
-
args[0] =
|
374
|
+
if (!NIL_P(mesg)) {
|
375
|
+
args[0] = mesg;
|
327
376
|
}
|
328
|
-
else if (TYPE(failed) != T_STRING ||
|
377
|
+
else if (TYPE(failed) != T_STRING || RSTRING_LEN(failed) < FAILED_MAXLEN) {
|
329
378
|
args[0] = rb_inspect(failed);
|
330
379
|
}
|
331
380
|
else {
|
@@ -345,33 +394,23 @@ iconv_fail
|
|
345
394
|
}
|
346
395
|
|
347
396
|
static VALUE
|
348
|
-
iconv_fail_retry(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env,
|
397
|
+
iconv_fail_retry(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, VALUE mesg)
|
349
398
|
{
|
350
399
|
error = iconv_fail(error, success, failed, env, mesg);
|
351
400
|
if (!rb_block_given_p()) rb_exc_raise(error);
|
352
|
-
|
401
|
+
rb_set_errinfo(error);
|
353
402
|
return rb_yield(failed);
|
354
403
|
}
|
355
404
|
|
356
405
|
static VALUE
|
357
|
-
rb_str_derive
|
358
|
-
#ifdef HAVE_PROTOTYPES
|
359
|
-
(VALUE str, const char* ptr, int len)
|
360
|
-
#else /* HAVE_PROTOTYPES */
|
361
|
-
(str, ptr, len)
|
362
|
-
VALUE str;
|
363
|
-
const char *ptr;
|
364
|
-
int len;
|
365
|
-
#endif /* HAVE_PROTOTYPES */
|
406
|
+
rb_str_derive(VALUE str, const char* ptr, long len)
|
366
407
|
{
|
367
408
|
VALUE ret;
|
368
409
|
|
369
410
|
if (NIL_P(str))
|
370
411
|
return rb_str_new(ptr, len);
|
371
|
-
if (
|
372
|
-
|
373
|
-
if (RSTRING(str)->ptr + RSTRING(str)->len == ptr + len)
|
374
|
-
ret = rb_str_substr(str, ptr - RSTRING(str)->ptr, len);
|
412
|
+
if (RSTRING_PTR(str) + RSTRING_LEN(str) == ptr + len)
|
413
|
+
ret = rb_str_subseq(str, ptr - RSTRING_PTR(str), len);
|
375
414
|
else
|
376
415
|
ret = rb_str_new(ptr, len);
|
377
416
|
OBJ_INFECT(ret, str);
|
@@ -379,17 +418,7 @@ rb_str_derive
|
|
379
418
|
}
|
380
419
|
|
381
420
|
static VALUE
|
382
|
-
iconv_convert
|
383
|
-
#ifdef HAVE_PROTOTYPES
|
384
|
-
(iconv_t cd, VALUE str, long start, long length, struct iconv_env_t* env)
|
385
|
-
#else /* HAVE_PROTOTYPES */
|
386
|
-
(cd, str, start, length, env)
|
387
|
-
iconv_t cd;
|
388
|
-
VALUE str;
|
389
|
-
long start;
|
390
|
-
long length;
|
391
|
-
struct iconv_env_t *env;
|
392
|
-
#endif /* HAVE_PROTOTYPES */
|
421
|
+
iconv_convert(iconv_t cd, VALUE str, long start, long length, int toidx, struct iconv_env_t* env)
|
393
422
|
{
|
394
423
|
VALUE ret = Qfalse;
|
395
424
|
VALUE error = Qfalse;
|
@@ -413,9 +442,9 @@ iconv_convert
|
|
413
442
|
error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen);
|
414
443
|
if (RTEST(error)) {
|
415
444
|
unsigned int i;
|
416
|
-
rescue = iconv_fail_retry(error, Qnil, Qnil, env,
|
445
|
+
rescue = iconv_fail_retry(error, Qnil, Qnil, env, Qnil);
|
417
446
|
if (TYPE(rescue) == T_ARRAY) {
|
418
|
-
str =
|
447
|
+
str = RARRAY_LEN(rescue) > 0 ? RARRAY_PTR(rescue)[0] : Qnil;
|
419
448
|
}
|
420
449
|
if (FIXNUM_P(str) && (i = FIX2INT(str)) <= 0xff) {
|
421
450
|
char c = i;
|
@@ -430,11 +459,11 @@ iconv_convert
|
|
430
459
|
length = 0;
|
431
460
|
}
|
432
461
|
else {
|
433
|
-
|
462
|
+
long slen;
|
434
463
|
|
435
464
|
StringValue(str);
|
436
|
-
slen =
|
437
|
-
inptr =
|
465
|
+
slen = RSTRING_LEN(str);
|
466
|
+
inptr = RSTRING_PTR(str);
|
438
467
|
|
439
468
|
inptr += start;
|
440
469
|
if (length < 0 || length > start + slen)
|
@@ -444,23 +473,27 @@ iconv_convert
|
|
444
473
|
inlen = length;
|
445
474
|
|
446
475
|
do {
|
447
|
-
|
476
|
+
VALUE errmsg = Qnil;
|
448
477
|
const char *tmpstart = inptr;
|
449
478
|
outptr = buffer;
|
450
479
|
outlen = sizeof(buffer);
|
451
480
|
|
452
|
-
errmsg[0] = 0;
|
453
481
|
error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen);
|
454
482
|
|
455
|
-
if (
|
483
|
+
if (
|
484
|
+
#if SIGNEDNESS_OF_SIZE_T < 0
|
485
|
+
0 <= outlen &&
|
486
|
+
#endif
|
487
|
+
outlen <= sizeof(buffer)) {
|
456
488
|
outlen = sizeof(buffer) - outlen;
|
457
489
|
if (NIL_P(error) || /* something converted */
|
458
|
-
outlen > inptr - tmpstart || /* input can't contain output */
|
459
|
-
(outlen < inptr - tmpstart && inlen > 0) || /* something skipped */
|
490
|
+
outlen > (size_t)(inptr - tmpstart) || /* input can't contain output */
|
491
|
+
(outlen < (size_t)(inptr - tmpstart) && inlen > 0) || /* something skipped */
|
460
492
|
memcmp(buffer, tmpstart, outlen)) /* something differs */
|
461
493
|
{
|
462
494
|
if (NIL_P(str)) {
|
463
495
|
ret = rb_str_new(buffer, outlen);
|
496
|
+
if (toidx >= 0) rb_enc_associate_index(ret, toidx);
|
464
497
|
}
|
465
498
|
else {
|
466
499
|
if (ret) {
|
@@ -468,6 +501,7 @@ iconv_convert
|
|
468
501
|
}
|
469
502
|
else {
|
470
503
|
ret = rb_str_new(instart, tmpstart - instart);
|
504
|
+
if (toidx >= 0) rb_enc_associate_index(ret, toidx);
|
471
505
|
OBJ_INFECT(ret, str);
|
472
506
|
}
|
473
507
|
ret = rb_str_buf_cat(ret, buffer, outlen);
|
@@ -480,26 +514,29 @@ iconv_convert
|
|
480
514
|
}
|
481
515
|
else {
|
482
516
|
/* Some iconv() have a bug, return *outlen out of range */
|
483
|
-
|
517
|
+
errmsg = rb_sprintf("bug?(output length = %ld)", (long)(sizeof(buffer) - outlen));
|
484
518
|
error = rb_eIconvOutOfRange;
|
485
519
|
}
|
486
520
|
|
487
521
|
if (RTEST(error)) {
|
488
522
|
long len = 0;
|
489
523
|
|
490
|
-
if (!ret)
|
524
|
+
if (!ret) {
|
491
525
|
ret = rb_str_derive(str, instart, inptr - instart);
|
492
|
-
|
526
|
+
if (toidx >= 0) rb_enc_associate_index(ret, toidx);
|
527
|
+
}
|
528
|
+
else if (inptr > instart) {
|
493
529
|
rb_str_cat(ret, instart, inptr - instart);
|
530
|
+
}
|
494
531
|
str = rb_str_derive(str, inptr, inlen);
|
495
532
|
rescue = iconv_fail_retry(error, ret, str, env, errmsg);
|
496
533
|
if (TYPE(rescue) == T_ARRAY) {
|
497
|
-
if ((len =
|
498
|
-
rb_str_concat(ret,
|
499
|
-
if (len > 1 && !NIL_P(str =
|
534
|
+
if ((len = RARRAY_LEN(rescue)) > 0)
|
535
|
+
rb_str_concat(ret, RARRAY_PTR(rescue)[0]);
|
536
|
+
if (len > 1 && !NIL_P(str = RARRAY_PTR(rescue)[1])) {
|
500
537
|
StringValue(str);
|
501
|
-
inlen = length =
|
502
|
-
instart = inptr =
|
538
|
+
inlen = length = RSTRING_LEN(str);
|
539
|
+
instart = inptr = RSTRING_PTR(str);
|
503
540
|
continue;
|
504
541
|
}
|
505
542
|
}
|
@@ -510,36 +547,107 @@ iconv_convert
|
|
510
547
|
}
|
511
548
|
} while (inlen > 0);
|
512
549
|
|
513
|
-
if (!ret)
|
550
|
+
if (!ret) {
|
514
551
|
ret = rb_str_derive(str, instart, inptr - instart);
|
515
|
-
|
552
|
+
if (toidx >= 0) rb_enc_associate_index(ret, toidx);
|
553
|
+
}
|
554
|
+
else if (inptr > instart) {
|
516
555
|
rb_str_cat(ret, instart, inptr - instart);
|
556
|
+
}
|
517
557
|
return ret;
|
518
558
|
}
|
519
559
|
|
520
560
|
static VALUE
|
521
|
-
iconv_s_allocate
|
522
|
-
#ifdef HAVE_PROTOTYPES
|
523
|
-
(VALUE klass)
|
524
|
-
#else /* HAVE_PROTOTYPES */
|
525
|
-
(klass)
|
526
|
-
VALUE klass;
|
527
|
-
#endif /* HAVE_PROTOTYPES */
|
561
|
+
iconv_s_allocate(VALUE klass)
|
528
562
|
{
|
529
563
|
return Data_Wrap_Struct(klass, 0, ICONV_FREE, 0);
|
530
564
|
}
|
531
565
|
|
566
|
+
static VALUE
|
567
|
+
get_iconv_opt_i(VALUE i, VALUE arg)
|
568
|
+
{
|
569
|
+
VALUE name;
|
570
|
+
#if defined ICONV_SET_TRANSLITERATE || defined ICONV_SET_DISCARD_ILSEQ
|
571
|
+
VALUE val;
|
572
|
+
struct rb_iconv_opt_t *opt = (struct rb_iconv_opt_t *)arg;
|
573
|
+
#endif
|
574
|
+
|
575
|
+
i = rb_Array(i);
|
576
|
+
name = rb_ary_entry(i, 0);
|
577
|
+
#if defined ICONV_SET_TRANSLITERATE || defined ICONV_SET_DISCARD_ILSEQ
|
578
|
+
val = rb_ary_entry(i, 1);
|
579
|
+
#endif
|
580
|
+
do {
|
581
|
+
if (SYMBOL_P(name)) {
|
582
|
+
ID id = SYM2ID(name);
|
583
|
+
if (id == id_transliterate) {
|
584
|
+
#ifdef ICONV_SET_TRANSLITERATE
|
585
|
+
opt->transliterate = val;
|
586
|
+
#else
|
587
|
+
rb_notimplement();
|
588
|
+
#endif
|
589
|
+
break;
|
590
|
+
}
|
591
|
+
if (id == id_discard_ilseq) {
|
592
|
+
#ifdef ICONV_SET_DISCARD_ILSEQ
|
593
|
+
opt->discard_ilseq = val;
|
594
|
+
#else
|
595
|
+
rb_notimplement();
|
596
|
+
#endif
|
597
|
+
break;
|
598
|
+
}
|
599
|
+
}
|
600
|
+
else {
|
601
|
+
const char *s = StringValueCStr(name);
|
602
|
+
if (strcmp(s, "transliterate") == 0) {
|
603
|
+
#ifdef ICONV_SET_TRANSLITERATE
|
604
|
+
opt->transliterate = val;
|
605
|
+
#else
|
606
|
+
rb_notimplement();
|
607
|
+
#endif
|
608
|
+
break;
|
609
|
+
}
|
610
|
+
if (strcmp(s, "discard_ilseq") == 0) {
|
611
|
+
#ifdef ICONV_SET_DISCARD_ILSEQ
|
612
|
+
opt->discard_ilseq = val;
|
613
|
+
#else
|
614
|
+
rb_notimplement();
|
615
|
+
#endif
|
616
|
+
break;
|
617
|
+
}
|
618
|
+
}
|
619
|
+
name = rb_inspect(name);
|
620
|
+
rb_raise(rb_eArgError, "unknown option - %s", StringValueCStr(name));
|
621
|
+
} while (0);
|
622
|
+
return Qnil;
|
623
|
+
}
|
624
|
+
|
625
|
+
static void
|
626
|
+
get_iconv_opt(struct rb_iconv_opt_t *opt, VALUE options)
|
627
|
+
{
|
628
|
+
opt->transliterate = Qundef;
|
629
|
+
opt->discard_ilseq = Qundef;
|
630
|
+
if (!NIL_P(options)) {
|
631
|
+
rb_block_call(options, rb_intern("each"), 0, 0, get_iconv_opt_i, (VALUE)opt);
|
632
|
+
}
|
633
|
+
}
|
634
|
+
|
635
|
+
#define iconv_ctl(self, func, val) (\
|
636
|
+
iconvctl(VALUE2ICONV(check_iconv(self)), func, (void *)&(val)) ? \
|
637
|
+
rb_sys_fail(#func) : (void)0)
|
638
|
+
|
532
639
|
/*
|
533
640
|
* Document-method: new
|
534
|
-
* call-seq: Iconv.new(to, from)
|
641
|
+
* call-seq: Iconv.new(to, from, [options])
|
535
642
|
*
|
536
643
|
* Creates new code converter from a coding-system designated with +from+
|
537
644
|
* to another one designated with +to+.
|
538
|
-
*
|
645
|
+
*
|
539
646
|
* === Parameters
|
540
647
|
*
|
541
648
|
* +to+:: encoding name for destination
|
542
649
|
* +from+:: encoding name for source
|
650
|
+
* +options+:: options for converter
|
543
651
|
*
|
544
652
|
* === Exceptions
|
545
653
|
*
|
@@ -548,19 +656,18 @@ iconv_s_allocate
|
|
548
656
|
* SystemCallError:: if <tt>iconv_open(3)</tt> fails
|
549
657
|
*/
|
550
658
|
static VALUE
|
551
|
-
iconv_initialize
|
552
|
-
#ifdef HAVE_PROTOTYPES
|
553
|
-
(VALUE self, VALUE to, VALUE from)
|
554
|
-
#else /* HAVE_PROTOTYPES */
|
555
|
-
(self, to, from)
|
556
|
-
VALUE self;
|
557
|
-
VALUE to;
|
558
|
-
VALUE from;
|
559
|
-
#endif /* HAVE_PROTOTYPES */
|
659
|
+
iconv_initialize(int argc, VALUE *argv, VALUE self)
|
560
660
|
{
|
661
|
+
VALUE to, from, options;
|
662
|
+
struct rb_iconv_opt_t opt;
|
663
|
+
int idx;
|
664
|
+
|
665
|
+
rb_scan_args(argc, argv, "21", &to, &from, &options);
|
666
|
+
get_iconv_opt(&opt, options);
|
561
667
|
iconv_free(check_iconv(self));
|
562
668
|
DATA_PTR(self) = NULL;
|
563
|
-
DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from));
|
669
|
+
DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from, &opt, &idx));
|
670
|
+
if (idx >= 0) ENCODING_SET(self, idx);
|
564
671
|
return self;
|
565
672
|
}
|
566
673
|
|
@@ -573,19 +680,19 @@ iconv_initialize
|
|
573
680
|
* returned from the block.
|
574
681
|
*/
|
575
682
|
static VALUE
|
576
|
-
iconv_s_open
|
577
|
-
#ifdef HAVE_PROTOTYPES
|
578
|
-
(VALUE self, VALUE to, VALUE from)
|
579
|
-
#else /* HAVE_PROTOTYPES */
|
580
|
-
(self, to, from)
|
581
|
-
VALUE self;
|
582
|
-
VALUE to;
|
583
|
-
VALUE from;
|
584
|
-
#endif /* HAVE_PROTOTYPES */
|
683
|
+
iconv_s_open(int argc, VALUE *argv, VALUE self)
|
585
684
|
{
|
586
|
-
VALUE
|
685
|
+
VALUE to, from, options, cd;
|
686
|
+
struct rb_iconv_opt_t opt;
|
687
|
+
int idx;
|
688
|
+
|
689
|
+
rb_scan_args(argc, argv, "21", &to, &from, &options);
|
690
|
+
get_iconv_opt(&opt, options);
|
691
|
+
cd = ICONV2VALUE(iconv_create(to, from, &opt, &idx));
|
587
692
|
|
588
693
|
self = Data_Wrap_Struct(self, NULL, ICONV_FREE, (void *)cd);
|
694
|
+
if (idx >= 0) ENCODING_SET(self, idx);
|
695
|
+
|
589
696
|
if (rb_block_given_p()) {
|
590
697
|
return rb_ensure(rb_yield, self, (VALUE(*)())iconv_finish, self);
|
591
698
|
}
|
@@ -595,24 +702,19 @@ iconv_s_open
|
|
595
702
|
}
|
596
703
|
|
597
704
|
static VALUE
|
598
|
-
iconv_s_convert
|
599
|
-
#ifdef HAVE_PROTOTYPES
|
600
|
-
(struct iconv_env_t* env)
|
601
|
-
#else /* HAVE_PROTOTYPES */
|
602
|
-
(env)
|
603
|
-
struct iconv_env_t *env;
|
604
|
-
#endif /* HAVE_PROTOTYPES */
|
705
|
+
iconv_s_convert(struct iconv_env_t* env)
|
605
706
|
{
|
606
707
|
VALUE last = 0;
|
607
708
|
|
608
709
|
for (; env->argc > 0; --env->argc, ++env->argv) {
|
609
|
-
VALUE s = iconv_convert(env->cd, last = *(env->argv),
|
710
|
+
VALUE s = iconv_convert(env->cd, last = *(env->argv),
|
711
|
+
0, -1, env->toidx, env);
|
610
712
|
env->append(env->ret, s);
|
611
713
|
}
|
612
714
|
|
613
715
|
if (!NIL_P(last)) {
|
614
|
-
VALUE s = iconv_convert(env->cd, Qnil, 0, 0, env);
|
615
|
-
if (
|
716
|
+
VALUE s = iconv_convert(env->cd, Qnil, 0, 0, env->toidx, env);
|
717
|
+
if (RSTRING_LEN(s))
|
616
718
|
env->append(env->ret, s);
|
617
719
|
}
|
618
720
|
|
@@ -638,15 +740,7 @@ iconv_s_convert
|
|
638
740
|
* Exceptions thrown by Iconv.new, Iconv.open and Iconv#iconv.
|
639
741
|
*/
|
640
742
|
static VALUE
|
641
|
-
iconv_s_iconv
|
642
|
-
#ifdef HAVE_PROTOTYPES
|
643
|
-
(int argc, VALUE *argv, VALUE self)
|
644
|
-
#else /* HAVE_PROTOTYPES */
|
645
|
-
(argc, argv, self)
|
646
|
-
int argc;
|
647
|
-
VALUE *argv;
|
648
|
-
VALUE self;
|
649
|
-
#endif /* HAVE_PROTOTYPES */
|
743
|
+
iconv_s_iconv(int argc, VALUE *argv, VALUE self)
|
650
744
|
{
|
651
745
|
struct iconv_env_t arg;
|
652
746
|
|
@@ -657,7 +751,7 @@ iconv_s_iconv
|
|
657
751
|
arg.argv = argv + 2;
|
658
752
|
arg.append = rb_ary_push;
|
659
753
|
arg.ret = rb_ary_new2(argc);
|
660
|
-
arg.cd = iconv_create(argv[0], argv[1]);
|
754
|
+
arg.cd = iconv_create(argv[0], argv[1], NULL, &arg.toidx);
|
661
755
|
return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
|
662
756
|
}
|
663
757
|
|
@@ -670,13 +764,7 @@ iconv_s_iconv
|
|
670
764
|
* See Iconv.iconv.
|
671
765
|
*/
|
672
766
|
static VALUE
|
673
|
-
iconv_s_conv
|
674
|
-
#ifdef HAVE_PROTOTYPES
|
675
|
-
(VALUE self, VALUE to, VALUE from, VALUE str)
|
676
|
-
#else /* HAVE_PROTOTYPES */
|
677
|
-
(self, to, from, str)
|
678
|
-
VALUE self, to, from, str;
|
679
|
-
#endif /* HAVE_PROTOTYPES */
|
767
|
+
iconv_s_conv(VALUE self, VALUE to, VALUE from, VALUE str)
|
680
768
|
{
|
681
769
|
struct iconv_env_t arg;
|
682
770
|
|
@@ -684,10 +772,94 @@ iconv_s_conv
|
|
684
772
|
arg.argv = &str;
|
685
773
|
arg.append = rb_str_append;
|
686
774
|
arg.ret = rb_str_new(0, 0);
|
687
|
-
arg.cd = iconv_create(to, from);
|
775
|
+
arg.cd = iconv_create(to, from, NULL, &arg.toidx);
|
688
776
|
return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
|
689
777
|
}
|
690
778
|
|
779
|
+
/*
|
780
|
+
* Document-method: list
|
781
|
+
* call-seq: Iconv.list {|*aliases| ... }
|
782
|
+
*
|
783
|
+
* Iterates each alias sets.
|
784
|
+
*/
|
785
|
+
|
786
|
+
#ifdef HAVE_ICONVLIST
|
787
|
+
struct iconv_name_list
|
788
|
+
{
|
789
|
+
unsigned int namescount;
|
790
|
+
const char *const *names;
|
791
|
+
VALUE array;
|
792
|
+
};
|
793
|
+
|
794
|
+
static VALUE
|
795
|
+
list_iconv_i(VALUE ptr)
|
796
|
+
{
|
797
|
+
struct iconv_name_list *p = (struct iconv_name_list *)ptr;
|
798
|
+
unsigned int i, namescount = p->namescount;
|
799
|
+
const char *const *names = p->names;
|
800
|
+
VALUE ary = rb_ary_new2(namescount);
|
801
|
+
|
802
|
+
for (i = 0; i < namescount; i++) {
|
803
|
+
rb_ary_push(ary, rb_str_new2(names[i]));
|
804
|
+
}
|
805
|
+
if (p->array) {
|
806
|
+
return rb_ary_push(p->array, ary);
|
807
|
+
}
|
808
|
+
return rb_yield(ary);
|
809
|
+
}
|
810
|
+
|
811
|
+
static int
|
812
|
+
list_iconv(unsigned int namescount, const char *const *names, void *data)
|
813
|
+
{
|
814
|
+
int *state = data;
|
815
|
+
struct iconv_name_list list;
|
816
|
+
|
817
|
+
list.namescount = namescount;
|
818
|
+
list.names = names;
|
819
|
+
list.array = ((VALUE *)data)[1];
|
820
|
+
rb_protect(list_iconv_i, (VALUE)&list, state);
|
821
|
+
return *state;
|
822
|
+
}
|
823
|
+
#endif
|
824
|
+
|
825
|
+
#if defined(HAVE_ICONVLIST) || defined(HAVE___ICONV_FREE_LIST)
|
826
|
+
static VALUE
|
827
|
+
iconv_s_list(void)
|
828
|
+
{
|
829
|
+
#ifdef HAVE_ICONVLIST
|
830
|
+
int state;
|
831
|
+
VALUE args[2];
|
832
|
+
|
833
|
+
args[1] = rb_block_given_p() ? 0 : rb_ary_new();
|
834
|
+
iconvlist(list_iconv, args);
|
835
|
+
state = *(int *)args;
|
836
|
+
if (state) rb_jump_tag(state);
|
837
|
+
if (args[1]) return args[1];
|
838
|
+
#elif defined(HAVE___ICONV_FREE_LIST)
|
839
|
+
char **list;
|
840
|
+
size_t sz, i;
|
841
|
+
VALUE ary;
|
842
|
+
|
843
|
+
if (__iconv_get_list(&list, &sz)) return Qnil;
|
844
|
+
|
845
|
+
ary = rb_ary_new2(sz);
|
846
|
+
for (i = 0; i < sz; i++) {
|
847
|
+
rb_ary_push(ary, rb_str_new2(list[i]));
|
848
|
+
}
|
849
|
+
__iconv_free_list(list, sz);
|
850
|
+
|
851
|
+
if (!rb_block_given_p())
|
852
|
+
return ary;
|
853
|
+
for (i = 0; i < RARRAY_LEN(ary); i++) {
|
854
|
+
rb_yield(RARRAY_PTR(ary)[i]);
|
855
|
+
}
|
856
|
+
#endif
|
857
|
+
return Qnil;
|
858
|
+
}
|
859
|
+
#else
|
860
|
+
#define iconv_s_list rb_f_notimplement
|
861
|
+
#endif
|
862
|
+
|
691
863
|
/*
|
692
864
|
* Document-method: close
|
693
865
|
*
|
@@ -700,32 +872,20 @@ iconv_s_conv
|
|
700
872
|
* its initial shift state.
|
701
873
|
*/
|
702
874
|
static VALUE
|
703
|
-
iconv_init_state
|
704
|
-
#ifdef HAVE_PROTOTYPES
|
705
|
-
(VALUE cd)
|
706
|
-
#else /* HAVE_PROTOTYPES */
|
707
|
-
(cd)
|
708
|
-
VALUE cd;
|
709
|
-
#endif /* HAVE_PROTOTYPES */
|
875
|
+
iconv_init_state(VALUE self)
|
710
876
|
{
|
711
|
-
|
877
|
+
iconv_t cd = VALUE2ICONV((VALUE)DATA_PTR(self));
|
878
|
+
DATA_PTR(self) = NULL;
|
879
|
+
return iconv_convert(cd, Qnil, 0, 0, ENCODING_GET(self), NULL);
|
712
880
|
}
|
713
881
|
|
714
882
|
static VALUE
|
715
|
-
iconv_finish
|
716
|
-
#ifdef HAVE_PROTOTYPES
|
717
|
-
(VALUE self)
|
718
|
-
#else /* HAVE_PROTOTYPES */
|
719
|
-
(self)
|
720
|
-
VALUE self;
|
721
|
-
#endif /* HAVE_PROTOTYPES */
|
883
|
+
iconv_finish(VALUE self)
|
722
884
|
{
|
723
885
|
VALUE cd = check_iconv(self);
|
724
886
|
|
725
887
|
if (!cd) return Qnil;
|
726
|
-
|
727
|
-
|
728
|
-
return rb_ensure(iconv_init_state, cd, iconv_free, cd);
|
888
|
+
return rb_ensure(iconv_init_state, self, iconv_free, cd);
|
729
889
|
}
|
730
890
|
|
731
891
|
/*
|
@@ -756,34 +916,186 @@ iconv_finish
|
|
756
916
|
* See the Iconv documentation.
|
757
917
|
*/
|
758
918
|
static VALUE
|
759
|
-
iconv_iconv
|
760
|
-
#ifdef HAVE_PROTOTYPES
|
761
|
-
(int argc, VALUE *argv, VALUE self)
|
762
|
-
#else /* HAVE_PROTOTYPES */
|
763
|
-
(argc, argv, self)
|
764
|
-
int argc;
|
765
|
-
VALUE *argv;
|
766
|
-
VALUE self;
|
767
|
-
#endif /* HAVE_PROTOTYPES */
|
919
|
+
iconv_iconv(int argc, VALUE *argv, VALUE self)
|
768
920
|
{
|
769
921
|
VALUE str, n1, n2;
|
770
922
|
VALUE cd = check_iconv(self);
|
771
923
|
long start = 0, length = 0, slen = 0;
|
772
924
|
|
773
925
|
rb_scan_args(argc, argv, "12", &str, &n1, &n2);
|
774
|
-
if (!NIL_P(str))
|
926
|
+
if (!NIL_P(str)) {
|
927
|
+
VALUE n = rb_str_length(StringValue(str));
|
928
|
+
slen = NUM2LONG(n);
|
929
|
+
}
|
775
930
|
if (argc != 2 || !RTEST(rb_range_beg_len(n1, &start, &length, slen, 0))) {
|
776
931
|
if (NIL_P(n1) || ((start = NUM2LONG(n1)) < 0 ? (start += slen) >= 0 : start < slen)) {
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
932
|
+
length = NIL_P(n2) ? -1 : NUM2LONG(n2);
|
933
|
+
}
|
934
|
+
}
|
935
|
+
if (start > 0 || length > 0) {
|
936
|
+
rb_encoding *enc = rb_enc_get(str);
|
937
|
+
const char *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
|
938
|
+
const char *ps = s;
|
939
|
+
if (start > 0) {
|
940
|
+
start = (ps = rb_enc_nth(s, e, start, enc)) - s;
|
941
|
+
}
|
942
|
+
if (length > 0) {
|
943
|
+
length = rb_enc_nth(ps, e, length, enc) - ps;
|
783
944
|
}
|
784
945
|
}
|
785
946
|
|
786
|
-
return iconv_convert(VALUE2ICONV(cd), str, start, length, NULL);
|
947
|
+
return iconv_convert(VALUE2ICONV(cd), str, start, length, ENCODING_GET(self), NULL);
|
948
|
+
}
|
949
|
+
|
950
|
+
/*
|
951
|
+
* Document-method: conv
|
952
|
+
* call-seq: conv(str...)
|
953
|
+
*
|
954
|
+
* Equivalent to
|
955
|
+
*
|
956
|
+
* iconv(nil, str..., nil).join
|
957
|
+
*/
|
958
|
+
static VALUE
|
959
|
+
iconv_conv(int argc, VALUE *argv, VALUE self)
|
960
|
+
{
|
961
|
+
iconv_t cd = VALUE2ICONV(check_iconv(self));
|
962
|
+
VALUE str, s;
|
963
|
+
int toidx = ENCODING_GET(self);
|
964
|
+
|
965
|
+
str = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
|
966
|
+
if (argc > 0) {
|
967
|
+
do {
|
968
|
+
s = iconv_convert(cd, *argv++, 0, -1, toidx, NULL);
|
969
|
+
if (RSTRING_LEN(s))
|
970
|
+
rb_str_buf_append(str, s);
|
971
|
+
} while (--argc);
|
972
|
+
s = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
|
973
|
+
if (RSTRING_LEN(s))
|
974
|
+
rb_str_buf_append(str, s);
|
975
|
+
}
|
976
|
+
|
977
|
+
return str;
|
978
|
+
}
|
979
|
+
|
980
|
+
#ifdef ICONV_TRIVIALP
|
981
|
+
/*
|
982
|
+
* Document-method: trivial?
|
983
|
+
* call-seq: trivial?
|
984
|
+
*
|
985
|
+
* Returns trivial flag.
|
986
|
+
*/
|
987
|
+
static VALUE
|
988
|
+
iconv_trivialp(VALUE self)
|
989
|
+
{
|
990
|
+
int trivial = 0;
|
991
|
+
iconv_ctl(self, ICONV_TRIVIALP, trivial);
|
992
|
+
if (trivial) return Qtrue;
|
993
|
+
return Qfalse;
|
994
|
+
}
|
995
|
+
#else
|
996
|
+
#define iconv_trivialp rb_f_notimplement
|
997
|
+
#endif
|
998
|
+
|
999
|
+
#ifdef ICONV_GET_TRANSLITERATE
|
1000
|
+
/*
|
1001
|
+
* Document-method: transliterate?
|
1002
|
+
* call-seq: transliterate?
|
1003
|
+
*
|
1004
|
+
* Returns transliterate flag.
|
1005
|
+
*/
|
1006
|
+
static VALUE
|
1007
|
+
iconv_get_transliterate(VALUE self)
|
1008
|
+
{
|
1009
|
+
int trans = 0;
|
1010
|
+
iconv_ctl(self, ICONV_GET_TRANSLITERATE, trans);
|
1011
|
+
if (trans) return Qtrue;
|
1012
|
+
return Qfalse;
|
1013
|
+
}
|
1014
|
+
#else
|
1015
|
+
#define iconv_get_transliterate rb_f_notimplement
|
1016
|
+
#endif
|
1017
|
+
|
1018
|
+
#ifdef ICONV_SET_TRANSLITERATE
|
1019
|
+
/*
|
1020
|
+
* Document-method: transliterate=
|
1021
|
+
* call-seq: cd.transliterate = flag
|
1022
|
+
*
|
1023
|
+
* Sets transliterate flag.
|
1024
|
+
*/
|
1025
|
+
static VALUE
|
1026
|
+
iconv_set_transliterate(VALUE self, VALUE transliterate)
|
1027
|
+
{
|
1028
|
+
int trans = RTEST(transliterate);
|
1029
|
+
iconv_ctl(self, ICONV_SET_TRANSLITERATE, trans);
|
1030
|
+
return self;
|
1031
|
+
}
|
1032
|
+
#else
|
1033
|
+
#define iconv_set_transliterate rb_f_notimplement
|
1034
|
+
#endif
|
1035
|
+
|
1036
|
+
#ifdef ICONV_GET_DISCARD_ILSEQ
|
1037
|
+
/*
|
1038
|
+
* Document-method: discard_ilseq?
|
1039
|
+
* call-seq: discard_ilseq?
|
1040
|
+
*
|
1041
|
+
* Returns discard_ilseq flag.
|
1042
|
+
*/
|
1043
|
+
static VALUE
|
1044
|
+
iconv_get_discard_ilseq(VALUE self)
|
1045
|
+
{
|
1046
|
+
int dis = 0;
|
1047
|
+
iconv_ctl(self, ICONV_GET_DISCARD_ILSEQ, dis);
|
1048
|
+
if (dis) return Qtrue;
|
1049
|
+
return Qfalse;
|
1050
|
+
}
|
1051
|
+
#else
|
1052
|
+
#define iconv_get_discard_ilseq rb_f_notimplement
|
1053
|
+
#endif
|
1054
|
+
|
1055
|
+
#ifdef ICONV_SET_DISCARD_ILSEQ
|
1056
|
+
/*
|
1057
|
+
* Document-method: discard_ilseq=
|
1058
|
+
* call-seq: cd.discard_ilseq = flag
|
1059
|
+
*
|
1060
|
+
* Sets discard_ilseq flag.
|
1061
|
+
*/
|
1062
|
+
static VALUE
|
1063
|
+
iconv_set_discard_ilseq(VALUE self, VALUE discard_ilseq)
|
1064
|
+
{
|
1065
|
+
int dis = RTEST(discard_ilseq);
|
1066
|
+
iconv_ctl(self, ICONV_SET_DISCARD_ILSEQ, dis);
|
1067
|
+
return self;
|
1068
|
+
}
|
1069
|
+
#else
|
1070
|
+
#define iconv_set_discard_ilseq rb_f_notimplement
|
1071
|
+
#endif
|
1072
|
+
|
1073
|
+
/*
|
1074
|
+
* Document-method: ctlmethods
|
1075
|
+
* call-seq: Iconv.ctlmethods => array
|
1076
|
+
*
|
1077
|
+
* Returns available iconvctl() method list.
|
1078
|
+
*/
|
1079
|
+
static VALUE
|
1080
|
+
iconv_s_ctlmethods(VALUE klass)
|
1081
|
+
{
|
1082
|
+
VALUE ary = rb_ary_new();
|
1083
|
+
#ifdef ICONV_TRIVIALP
|
1084
|
+
rb_ary_push(ary, ID2SYM(rb_intern("trivial?")));
|
1085
|
+
#endif
|
1086
|
+
#ifdef ICONV_GET_TRANSLITERATE
|
1087
|
+
rb_ary_push(ary, ID2SYM(rb_intern("transliterate?")));
|
1088
|
+
#endif
|
1089
|
+
#ifdef ICONV_SET_TRANSLITERATE
|
1090
|
+
rb_ary_push(ary, ID2SYM(rb_intern("transliterate=")));
|
1091
|
+
#endif
|
1092
|
+
#ifdef ICONV_GET_DISCARD_ILSEQ
|
1093
|
+
rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq?")));
|
1094
|
+
#endif
|
1095
|
+
#ifdef ICONV_SET_DISCARD_ILSEQ
|
1096
|
+
rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq=")));
|
1097
|
+
#endif
|
1098
|
+
return ary;
|
787
1099
|
}
|
788
1100
|
|
789
1101
|
/*
|
@@ -802,13 +1114,7 @@ iconv_iconv
|
|
802
1114
|
* failure and the last element is string on the way.
|
803
1115
|
*/
|
804
1116
|
static VALUE
|
805
|
-
iconv_failure_success
|
806
|
-
#ifdef HAVE_PROTOTYPES
|
807
|
-
(VALUE self)
|
808
|
-
#else /* HAVE_PROTOTYPES */
|
809
|
-
(self)
|
810
|
-
VALUE self;
|
811
|
-
#endif /* HAVE_PROTOTYPES */
|
1117
|
+
iconv_failure_success(VALUE self)
|
812
1118
|
{
|
813
1119
|
return rb_attr_get(self, rb_success);
|
814
1120
|
}
|
@@ -818,16 +1124,10 @@ iconv_failure_success
|
|
818
1124
|
* call-seq: failed
|
819
1125
|
*
|
820
1126
|
* Returns substring of the original string passed to Iconv that starts at the
|
821
|
-
* character caused the exception.
|
1127
|
+
* character caused the exception.
|
822
1128
|
*/
|
823
1129
|
static VALUE
|
824
|
-
iconv_failure_failed
|
825
|
-
#ifdef HAVE_PROTOTYPES
|
826
|
-
(VALUE self)
|
827
|
-
#else /* HAVE_PROTOTYPES */
|
828
|
-
(self)
|
829
|
-
VALUE self;
|
830
|
-
#endif /* HAVE_PROTOTYPES */
|
1130
|
+
iconv_failure_failed(VALUE self)
|
831
1131
|
{
|
832
1132
|
return rb_attr_get(self, rb_failed);
|
833
1133
|
}
|
@@ -839,13 +1139,7 @@ iconv_failure_failed
|
|
839
1139
|
* Returns inspected string like as: #<_class_: _success_, _failed_>
|
840
1140
|
*/
|
841
1141
|
static VALUE
|
842
|
-
iconv_failure_inspect
|
843
|
-
#ifdef HAVE_PROTOTYPES
|
844
|
-
(VALUE self)
|
845
|
-
#else /* HAVE_PROTOTYPES */
|
846
|
-
(self)
|
847
|
-
VALUE self;
|
848
|
-
#endif /* HAVE_PROTOTYPES */
|
1142
|
+
iconv_failure_inspect(VALUE self)
|
849
1143
|
{
|
850
1144
|
const char *cname = rb_class2name(CLASS_OF(self));
|
851
1145
|
VALUE success = rb_attr_get(self, rb_success);
|
@@ -860,13 +1154,13 @@ iconv_failure_inspect
|
|
860
1154
|
|
861
1155
|
/*
|
862
1156
|
* Document-class: Iconv::InvalidEncoding
|
863
|
-
*
|
1157
|
+
*
|
864
1158
|
* Requested coding-system is not available on this system.
|
865
1159
|
*/
|
866
1160
|
|
867
1161
|
/*
|
868
1162
|
* Document-class: Iconv::IllegalSequence
|
869
|
-
*
|
1163
|
+
*
|
870
1164
|
* Input conversion stopped due to an input byte that does not belong to
|
871
1165
|
* the input codeset, or the output codeset does not contain the
|
872
1166
|
* character.
|
@@ -874,36 +1168,44 @@ iconv_failure_inspect
|
|
874
1168
|
|
875
1169
|
/*
|
876
1170
|
* Document-class: Iconv::InvalidCharacter
|
877
|
-
*
|
1171
|
+
*
|
878
1172
|
* Input conversion stopped due to an incomplete character or shift
|
879
1173
|
* sequence at the end of the input buffer.
|
880
1174
|
*/
|
881
1175
|
|
882
1176
|
/*
|
883
1177
|
* Document-class: Iconv::OutOfRange
|
884
|
-
*
|
1178
|
+
*
|
885
1179
|
* Iconv library internal error. Must not occur.
|
886
1180
|
*/
|
887
1181
|
|
888
1182
|
/*
|
889
1183
|
* Document-class: Iconv::BrokenLibrary
|
890
|
-
*
|
1184
|
+
*
|
891
1185
|
* Detected a bug of underlying iconv(3) libray.
|
892
1186
|
* * returns an error without setting errno properly
|
893
1187
|
*/
|
894
1188
|
|
895
1189
|
void
|
896
|
-
Init_iconv
|
1190
|
+
Init_iconv(void)
|
897
1191
|
{
|
898
1192
|
VALUE rb_cIconv = rb_define_class("Iconv", rb_cData);
|
899
1193
|
|
900
1194
|
rb_define_alloc_func(rb_cIconv, iconv_s_allocate);
|
901
|
-
rb_define_singleton_method(rb_cIconv, "open", iconv_s_open,
|
1195
|
+
rb_define_singleton_method(rb_cIconv, "open", iconv_s_open, -1);
|
902
1196
|
rb_define_singleton_method(rb_cIconv, "iconv", iconv_s_iconv, -1);
|
903
1197
|
rb_define_singleton_method(rb_cIconv, "conv", iconv_s_conv, 3);
|
904
|
-
|
1198
|
+
rb_define_singleton_method(rb_cIconv, "list", iconv_s_list, 0);
|
1199
|
+
rb_define_singleton_method(rb_cIconv, "ctlmethods", iconv_s_ctlmethods, 0);
|
1200
|
+
rb_define_method(rb_cIconv, "initialize", iconv_initialize, -1);
|
905
1201
|
rb_define_method(rb_cIconv, "close", iconv_finish, 0);
|
906
1202
|
rb_define_method(rb_cIconv, "iconv", iconv_iconv, -1);
|
1203
|
+
rb_define_method(rb_cIconv, "conv", iconv_conv, -1);
|
1204
|
+
rb_define_method(rb_cIconv, "trivial?", iconv_trivialp, 0);
|
1205
|
+
rb_define_method(rb_cIconv, "transliterate?", iconv_get_transliterate, 0);
|
1206
|
+
rb_define_method(rb_cIconv, "transliterate=", iconv_set_transliterate, 1);
|
1207
|
+
rb_define_method(rb_cIconv, "discard_ilseq?", iconv_get_discard_ilseq, 0);
|
1208
|
+
rb_define_method(rb_cIconv, "discard_ilseq=", iconv_set_discard_ilseq, 1);
|
907
1209
|
|
908
1210
|
rb_eIconvFailure = rb_define_module_under(rb_cIconv, "Failure");
|
909
1211
|
rb_define_method(rb_eIconvFailure, "initialize", iconv_failure_initialize, 3);
|
@@ -924,8 +1226,11 @@ Init_iconv _((void))
|
|
924
1226
|
|
925
1227
|
rb_success = rb_intern("success");
|
926
1228
|
rb_failed = rb_intern("failed");
|
1229
|
+
id_transliterate = rb_intern("transliterate");
|
1230
|
+
id_discard_ilseq = rb_intern("discard_ilseq");
|
927
1231
|
|
928
1232
|
rb_gc_register_address(&charset_map);
|
929
1233
|
charset_map = rb_hash_new();
|
930
1234
|
rb_define_singleton_method(rb_cIconv, "charset_map", charset_map_get, 0);
|
931
1235
|
}
|
1236
|
+
|