iconv 0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,104 @@
1
+ #! /usr/bin/ruby
2
+ # :stopdoc:
3
+ require 'rbconfig'
4
+ require 'optparse'
5
+
6
+ # http://www.ctan.org/get/macros/texinfo/texinfo/gnulib/lib/config.charset
7
+ # Tue, 25 Dec 2007 00:00:00 GMT
8
+
9
+ OS = RbConfig::CONFIG["target_os"]
10
+ SHELL = RbConfig::CONFIG['SHELL']
11
+
12
+ class Hash::Ordered < Hash
13
+ def [](key)
14
+ val = super and val.last
15
+ end
16
+ def []=(key, val)
17
+ ary = fetch(key) {return super(key, [self.size, key, val])} and
18
+ ary << val
19
+ end
20
+ def sort
21
+ values.sort.collect {|i, *rest| rest}
22
+ end
23
+ def each(&block)
24
+ sort.each(&block)
25
+ end
26
+ end
27
+
28
+ def charset_alias(config_charset, mapfile, target = OS)
29
+ map = Hash::Ordered.new
30
+ comments = []
31
+ open(config_charset) do |input|
32
+ input.find {|line| /^case "\$os" in/ =~ line} or break
33
+ input.find {|line|
34
+ /^\s*([-\w\*]+(?:\s*\|\s*[-\w\*]+)*)(?=\))/ =~ line and
35
+ $&.split('|').any? {|pattern| File.fnmatch?(pattern.strip, target)}
36
+ } or break
37
+ input.find do |line|
38
+ case line
39
+ when /^\s*echo "(?:\$\w+\.)?([-\w*]+)\s+([-\w]+)"/
40
+ sys, can = $1, $2
41
+ can.downcase!
42
+ map[can] = sys
43
+ false
44
+ when /^\s*;;/
45
+ true
46
+ else
47
+ false
48
+ end
49
+ end
50
+ end
51
+ case target
52
+ when /linux|-gnu/
53
+ # map.delete('ascii')
54
+ when /cygwin|os2-emx/
55
+ # get rid of tilde/yen problem.
56
+ map['shift_jis'] = 'cp932'
57
+ end
58
+ st = Hash.new(0)
59
+ map = map.sort.collect do |can, *sys|
60
+ if sys.grep(/^en_us(?=.|$)/i) {break true} == true
61
+ noen = %r"^(?!en_us)\w+_\w+#{Regexp.new($')}$"i #"
62
+ sys.reject! {|s| noen =~ s}
63
+ end
64
+ sys = sys.first
65
+ st[sys] += 1
66
+ [can, sys]
67
+ end
68
+ st.delete_if {|sys, i| i == 1}.empty?
69
+ st.keys.each {|sys| st[sys] = nil}
70
+ st.default = nil
71
+ writer = proc do |f|
72
+ f.puts("require 'iconv.so'")
73
+ f.puts
74
+ f.puts(comments)
75
+ f.puts("class Iconv")
76
+ i = 0
77
+ map.each do |can, sys|
78
+ if s = st[sys]
79
+ sys = s
80
+ elsif st.key?(sys)
81
+ sys = (st[sys] = "sys#{i+=1}") + " = '#{sys}'.freeze"
82
+ else
83
+ sys = "'#{sys}'.freeze"
84
+ end
85
+ f.puts(" charset_map['#{can}'] = #{sys}")
86
+ end
87
+ f.puts("end")
88
+ end
89
+ if mapfile
90
+ open(mapfile, "w", &writer)
91
+ else
92
+ writer[STDOUT]
93
+ end
94
+ end
95
+
96
+ target = OS
97
+ opt = nil
98
+ ARGV.options do |opt2|
99
+ opt = opt2
100
+ opt.banner << " config.status map.rb"
101
+ opt.on("--target OS") {|t| target = t}
102
+ opt.parse! and (1..2) === ARGV.size
103
+ end or abort opt.to_s
104
+ charset_alias(ARGV[0], ARGV[1], target)
@@ -0,0 +1,51 @@
1
+ require 'mkmf'
2
+
3
+ dir_config("iconv")
4
+
5
+ conf = File.exist?(File.join($srcdir, "config.charset"))
6
+ conf = with_config("config-charset", enable_config("config-charset", conf))
7
+
8
+ if have_func("iconv", "iconv.h") or
9
+ have_library("iconv", "iconv", "iconv.h")
10
+ if checking_for("const of iconv() 2nd argument") do
11
+ create_tmpsrc(cpp_include("iconv.h") + "---> iconv(cd,0,0,0,0) <---")
12
+ src = xpopen(cpp_command("")) {|f|f.read}
13
+ if !(func = src[/^--->\s*(\w+).*\s*<---/, 1])
14
+ Logging::message "iconv function name not found"
15
+ false
16
+ elsif !(second = src[%r"\b#{func}\s*\(.*?,(.*?),.*?\)\s*;"m, 1])
17
+ Logging::message "prototype for #{func}() not found"
18
+ false
19
+ else
20
+ Logging::message $&+"\n"
21
+ /\bconst\b/ =~ second
22
+ end
23
+ end
24
+ $defs.push('-DICONV_INPTR_CONST')
25
+ end
26
+ if conf
27
+ prefix = '$(srcdir)'
28
+ prefix = $nmake ? "{#{prefix}}" : "#{prefix}/"
29
+ if $extout
30
+ wrapper = "$(RUBYARCHDIR)/iconv.rb"
31
+ else
32
+ wrapper = "./iconv.rb"
33
+ $INSTALLFILES = [[wrapper, "$(RUBYARCHDIR)"]]
34
+ end
35
+ if String === conf
36
+ require 'uri'
37
+ scheme = URI.parse(conf).scheme
38
+ else
39
+ conf = "$(srcdir)/config.charset"
40
+ end
41
+ $cleanfiles << wrapper
42
+ end
43
+ create_makefile("iconv")
44
+ if conf
45
+ open("Makefile", "a") do |mf|
46
+ mf.print("\nall: #{wrapper}\n\n#{wrapper}: #{prefix}charset_alias.rb")
47
+ mf.print(" ", conf) unless scheme
48
+ mf.print("\n\t$(RUBY) $(srcdir)/charset_alias.rb #{conf} $@\n")
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,931 @@
1
+ /* -*- mode:c; c-file-style:"ruby" -*- */
2
+ /**********************************************************************
3
+
4
+ iconv.c -
5
+
6
+ $Author$
7
+ $Date$
8
+ created at: Wed Dec 1 20:28:09 JST 1999
9
+
10
+ All the files in this distribution are covered under the Ruby's
11
+ license (see the file COPYING).
12
+
13
+ Documentation by Yukihiro Matsumoto and Gavin Sinclair.
14
+
15
+ **********************************************************************/
16
+
17
+ #include "ruby.h"
18
+ #include <errno.h>
19
+ #include <iconv.h>
20
+ #include <assert.h>
21
+ #include "st.h"
22
+ #include "intern.h"
23
+
24
+ /*
25
+ * Document-class: Iconv
26
+ *
27
+ * == Summary
28
+ *
29
+ * Ruby extension for charset conversion.
30
+ *
31
+ * == Abstract
32
+ *
33
+ * Iconv is a wrapper class for the UNIX 95 <tt>iconv()</tt> function family,
34
+ * which translates string between various encoding systems.
35
+ *
36
+ * See Open Group's on-line documents for more details.
37
+ * * <tt>iconv.h</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv.h.html
38
+ * * <tt>iconv_open()</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv_open.html
39
+ * * <tt>iconv()</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv.html
40
+ * * <tt>iconv_close()</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv_close.html
41
+ *
42
+ * Which coding systems are available is platform-dependent.
43
+ *
44
+ * == Examples
45
+ *
46
+ * 1. Simple conversion between two charsets.
47
+ *
48
+ * converted_text = Iconv.conv('iso-8859-15', 'utf-8', text)
49
+ *
50
+ * 2. Instantiate a new Iconv and use method Iconv#iconv.
51
+ *
52
+ * cd = Iconv.new(to, from)
53
+ * begin
54
+ * input.each { |s| output << cd.iconv(s) }
55
+ * output << cd.iconv(nil) # Don't forget this!
56
+ * ensure
57
+ * cd.close
58
+ * end
59
+ *
60
+ * 3. Invoke Iconv.open with a block.
61
+ *
62
+ * Iconv.open(to, from) do |cd|
63
+ * input.each { |s| output << cd.iconv(s) }
64
+ * output << cd.iconv(nil)
65
+ * end
66
+ *
67
+ * 4. Shorthand for (3).
68
+ *
69
+ * Iconv.iconv(to, from, *input.to_a)
70
+ */
71
+
72
+ /* Invalid value for iconv_t is -1 but 0 for VALUE, I hope VALUE is
73
+ big enough to keep iconv_t */
74
+ #define VALUE2ICONV(v) ((iconv_t)((VALUE)(v) ^ -1))
75
+ #define ICONV2VALUE(c) ((VALUE)(c) ^ -1)
76
+
77
+ struct iconv_env_t
78
+ {
79
+ iconv_t cd;
80
+ int argc;
81
+ VALUE *argv;
82
+ VALUE ret;
83
+ VALUE (*append)_((VALUE, VALUE));
84
+ };
85
+
86
+ static VALUE rb_eIconvInvalidEncoding;
87
+ static VALUE rb_eIconvFailure;
88
+ static VALUE rb_eIconvIllegalSeq;
89
+ static VALUE rb_eIconvInvalidChar;
90
+ static VALUE rb_eIconvOutOfRange;
91
+ static VALUE rb_eIconvBrokenLibrary;
92
+
93
+ static ID rb_success, rb_failed;
94
+ static VALUE iconv_fail _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg));
95
+ static VALUE iconv_fail_retry _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg));
96
+ static VALUE iconv_failure_initialize _((VALUE error, VALUE mesg, VALUE success, VALUE failed));
97
+ static VALUE iconv_failure_success _((VALUE self));
98
+ static VALUE iconv_failure_failed _((VALUE self));
99
+
100
+ static iconv_t iconv_create _((VALUE to, VALUE from));
101
+ static void iconv_dfree _((void *cd));
102
+ static VALUE iconv_free _((VALUE cd));
103
+ static VALUE iconv_try _((iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen));
104
+ static VALUE rb_str_derive _((VALUE str, const char* ptr, int len));
105
+ static VALUE iconv_convert _((iconv_t cd, VALUE str, long start, long length, struct iconv_env_t* env));
106
+ static VALUE iconv_s_allocate _((VALUE klass));
107
+ static VALUE iconv_initialize _((VALUE self, VALUE to, VALUE from));
108
+ static VALUE iconv_s_open _((VALUE self, VALUE to, VALUE from));
109
+ static VALUE iconv_s_convert _((struct iconv_env_t* env));
110
+ static VALUE iconv_s_iconv _((int argc, VALUE *argv, VALUE self));
111
+ static VALUE iconv_init_state _((VALUE cd));
112
+ static VALUE iconv_finish _((VALUE self));
113
+ static VALUE iconv_iconv _((int argc, VALUE *argv, VALUE self));
114
+
115
+ static VALUE charset_map;
116
+
117
+ /*
118
+ * Document-method: charset_map
119
+ * call-seq: Iconv.charset_map
120
+ *
121
+ * Returns the map from canonical name to system dependent name.
122
+ */
123
+ static VALUE charset_map_get _((void))
124
+ {
125
+ return charset_map;
126
+ }
127
+
128
+ static char *
129
+ map_charset
130
+ #ifdef HAVE_PROTOTYPES
131
+ (VALUE *code)
132
+ #else /* HAVE_PROTOTYPES */
133
+ (code)
134
+ VALUE *code;
135
+ #endif /* HAVE_PROTOTYPES */
136
+ {
137
+ VALUE val = *code;
138
+
139
+ if (RHASH(charset_map)->tbl && RHASH(charset_map)->tbl->num_entries) {
140
+ VALUE key = rb_funcall2(val, rb_intern("downcase"), 0, 0);
141
+ StringValuePtr(key);
142
+ if (st_lookup(RHASH(charset_map)->tbl, key, &val)) {
143
+ *code = val;
144
+ }
145
+ }
146
+ return StringValuePtr(*code);
147
+ }
148
+
149
+ NORETURN(static void rb_iconv_sys_fail(const char *s));
150
+ static void
151
+ rb_iconv_sys_fail(const char *s)
152
+ {
153
+ if (errno == 0) {
154
+ rb_exc_raise(iconv_fail(rb_eIconvBrokenLibrary, Qnil, Qnil, NULL, s));
155
+ }
156
+ rb_sys_fail(s);
157
+ }
158
+
159
+ #define rb_sys_fail(s) rb_iconv_sys_fail(s)
160
+
161
+ static iconv_t
162
+ iconv_create
163
+ #ifdef HAVE_PROTOTYPES
164
+ (VALUE to, VALUE from)
165
+ #else /* HAVE_PROTOTYPES */
166
+ (to, from)
167
+ VALUE to;
168
+ VALUE from;
169
+ #endif /* HAVE_PROTOTYPES */
170
+ {
171
+ const char* tocode = map_charset(&to);
172
+ const char* fromcode = map_charset(&from);
173
+
174
+ iconv_t cd = iconv_open(tocode, fromcode);
175
+
176
+ if (cd == (iconv_t)-1) {
177
+ switch (errno) {
178
+ case EMFILE:
179
+ case ENFILE:
180
+ case ENOMEM:
181
+ rb_gc();
182
+ cd = iconv_open(tocode, fromcode);
183
+ }
184
+ if (cd == (iconv_t)-1) {
185
+ int inval = errno == EINVAL;
186
+ const char *s = inval ? "invalid encoding " : "iconv";
187
+ volatile VALUE msg = rb_str_new(0, strlen(s) + RSTRING(to)->len +
188
+ RSTRING(from)->len + 8);
189
+
190
+ sprintf(RSTRING(msg)->ptr, "%s(\"%s\", \"%s\")",
191
+ s, RSTRING(to)->ptr, RSTRING(from)->ptr);
192
+ s = RSTRING(msg)->ptr;
193
+ RSTRING(msg)->len = strlen(s);
194
+ if (!inval) rb_sys_fail(s);
195
+ rb_exc_raise(iconv_fail(rb_eIconvInvalidEncoding, Qnil,
196
+ rb_ary_new3(2, to, from), NULL, s));
197
+ }
198
+ }
199
+
200
+ return cd;
201
+ }
202
+
203
+ static void
204
+ iconv_dfree
205
+ #ifdef HAVE_PROTOTYPES
206
+ (void *cd)
207
+ #else /* HAVE_PROTOTYPES */
208
+ (cd)
209
+ void *cd;
210
+ #endif /* HAVE_PROTOTYPES */
211
+ {
212
+ iconv_close(VALUE2ICONV(cd));
213
+ }
214
+
215
+ #define ICONV_FREE iconv_dfree
216
+
217
+ static VALUE
218
+ iconv_free
219
+ #ifdef HAVE_PROTOTYPES
220
+ (VALUE cd)
221
+ #else /* HAVE_PROTOTYPES */
222
+ (cd)
223
+ VALUE cd;
224
+ #endif /* HAVE_PROTOTYPES */
225
+ {
226
+ if (cd && iconv_close(VALUE2ICONV(cd)) == -1)
227
+ rb_sys_fail("iconv_close");
228
+ return Qnil;
229
+ }
230
+
231
+ static VALUE
232
+ check_iconv
233
+ #ifdef HAVE_PROTOTYPES
234
+ (VALUE obj)
235
+ #else /* HAVE_PROTOTYPES */
236
+ (obj)
237
+ VALUE obj;
238
+ #endif /* HAVE_PROTOTYPES */
239
+ {
240
+ Check_Type(obj, T_DATA);
241
+ if (RDATA(obj)->dfree != ICONV_FREE) {
242
+ rb_raise(rb_eArgError, "Iconv expected (%s)", rb_class2name(CLASS_OF(obj)));
243
+ }
244
+ return (VALUE)DATA_PTR(obj);
245
+ }
246
+
247
+ static VALUE
248
+ iconv_try
249
+ #ifdef HAVE_PROTOTYPES
250
+ (iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen)
251
+ #else /* HAVE_PROTOTYPES */
252
+ (cd, inptr, inlen, outptr, outlen)
253
+ iconv_t cd;
254
+ const char **inptr;
255
+ size_t *inlen;
256
+ char **outptr;
257
+ size_t *outlen;
258
+ #endif /* HAVE_PROTOTYPES */
259
+ {
260
+ #ifdef ICONV_INPTR_CONST
261
+ #define ICONV_INPTR_CAST
262
+ #else
263
+ #define ICONV_INPTR_CAST (char **)
264
+ #endif
265
+ size_t ret;
266
+
267
+ errno = 0;
268
+ ret = iconv(cd, ICONV_INPTR_CAST inptr, inlen, outptr, outlen);
269
+ if (ret == (size_t)-1) {
270
+ if (!*inlen)
271
+ return Qfalse;
272
+ switch (errno) {
273
+ case E2BIG:
274
+ /* try the left in next loop */
275
+ break;
276
+ case EILSEQ:
277
+ return rb_eIconvIllegalSeq;
278
+ case EINVAL:
279
+ return rb_eIconvInvalidChar;
280
+ case 0:
281
+ return rb_eIconvBrokenLibrary;
282
+ default:
283
+ rb_sys_fail("iconv");
284
+ }
285
+ }
286
+ else if (*inlen > 0) {
287
+ /* something goes wrong */
288
+ return rb_eIconvIllegalSeq;
289
+ }
290
+ else if (ret) {
291
+ return Qnil; /* conversion */
292
+ }
293
+ return Qfalse;
294
+ }
295
+
296
+ #define FAILED_MAXLEN 16
297
+
298
+ static VALUE iconv_failure_initialize
299
+ #ifdef HAVE_PROTOTYPES
300
+ (VALUE error, VALUE mesg, VALUE success, VALUE failed)
301
+ #else /* HAVE_PROTOTYPES */
302
+ (error, mesg, success, failed)
303
+ VALUE error, mesg, success, failed;
304
+ #endif /* HAVE_PROTOTYPES */
305
+ {
306
+ rb_call_super(1, &mesg);
307
+ rb_ivar_set(error, rb_success, success);
308
+ rb_ivar_set(error, rb_failed, failed);
309
+ return error;
310
+ }
311
+
312
+ static VALUE
313
+ iconv_fail
314
+ #ifdef HAVE_PROTOTYPES
315
+ (VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg)
316
+ #else /* HAVE_PROTOTYPES */
317
+ (error, success, failed, env, mesg)
318
+ VALUE error, success, failed;
319
+ struct iconv_env_t *env;
320
+ const char *mesg;
321
+ #endif /* HAVE_PROTOTYPES */
322
+ {
323
+ VALUE args[3];
324
+
325
+ if (mesg && *mesg) {
326
+ args[0] = rb_str_new2(mesg);
327
+ }
328
+ else if (TYPE(failed) != T_STRING || RSTRING(failed)->len < FAILED_MAXLEN) {
329
+ args[0] = rb_inspect(failed);
330
+ }
331
+ else {
332
+ args[0] = rb_inspect(rb_str_substr(failed, 0, FAILED_MAXLEN));
333
+ rb_str_cat2(args[0], "...");
334
+ }
335
+ args[1] = success;
336
+ args[2] = failed;
337
+ if (env) {
338
+ args[1] = env->append(rb_obj_dup(env->ret), success);
339
+ if (env->argc > 0) {
340
+ *(env->argv) = failed;
341
+ args[2] = rb_ary_new4(env->argc, env->argv);
342
+ }
343
+ }
344
+ return rb_class_new_instance(3, args, error);
345
+ }
346
+
347
+ static VALUE
348
+ iconv_fail_retry(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg)
349
+ {
350
+ error = iconv_fail(error, success, failed, env, mesg);
351
+ if (!rb_block_given_p()) rb_exc_raise(error);
352
+ ruby_errinfo = error;
353
+ return rb_yield(failed);
354
+ }
355
+
356
+ static VALUE
357
+ rb_str_derive
358
+ #ifdef HAVE_PROTOTYPES
359
+ (VALUE str, const char* ptr, int len)
360
+ #else /* HAVE_PROTOTYPES */
361
+ (str, ptr, len)
362
+ VALUE str;
363
+ const char *ptr;
364
+ int len;
365
+ #endif /* HAVE_PROTOTYPES */
366
+ {
367
+ VALUE ret;
368
+
369
+ if (NIL_P(str))
370
+ return rb_str_new(ptr, len);
371
+ if (RSTRING(str)->ptr == ptr && RSTRING(str)->len == len)
372
+ return str;
373
+ if (RSTRING(str)->ptr + RSTRING(str)->len == ptr + len)
374
+ ret = rb_str_substr(str, ptr - RSTRING(str)->ptr, len);
375
+ else
376
+ ret = rb_str_new(ptr, len);
377
+ OBJ_INFECT(ret, str);
378
+ return ret;
379
+ }
380
+
381
+ static VALUE
382
+ iconv_convert
383
+ #ifdef HAVE_PROTOTYPES
384
+ (iconv_t cd, VALUE str, long start, long length, struct iconv_env_t* env)
385
+ #else /* HAVE_PROTOTYPES */
386
+ (cd, str, start, length, env)
387
+ iconv_t cd;
388
+ VALUE str;
389
+ long start;
390
+ long length;
391
+ struct iconv_env_t *env;
392
+ #endif /* HAVE_PROTOTYPES */
393
+ {
394
+ VALUE ret = Qfalse;
395
+ VALUE error = Qfalse;
396
+ VALUE rescue;
397
+ const char *inptr, *instart;
398
+ size_t inlen;
399
+ /* I believe ONE CHARACTER never exceed this. */
400
+ char buffer[BUFSIZ];
401
+ char *outptr;
402
+ size_t outlen;
403
+
404
+ if (cd == (iconv_t)-1)
405
+ rb_raise(rb_eArgError, "closed iconv");
406
+
407
+ if (NIL_P(str)) {
408
+ /* Reset output pointer or something. */
409
+ inptr = "";
410
+ inlen = 0;
411
+ outptr = buffer;
412
+ outlen = sizeof(buffer);
413
+ error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen);
414
+ if (RTEST(error)) {
415
+ unsigned int i;
416
+ rescue = iconv_fail_retry(error, Qnil, Qnil, env, 0);
417
+ if (TYPE(rescue) == T_ARRAY) {
418
+ str = RARRAY(rescue)->len > 0 ? RARRAY(rescue)->ptr[0] : Qnil;
419
+ }
420
+ if (FIXNUM_P(str) && (i = FIX2INT(str)) <= 0xff) {
421
+ char c = i;
422
+ str = rb_str_new(&c, 1);
423
+ }
424
+ else if (!NIL_P(str)) {
425
+ StringValue(str);
426
+ }
427
+ }
428
+
429
+ inptr = NULL;
430
+ length = 0;
431
+ }
432
+ else {
433
+ int slen;
434
+
435
+ StringValue(str);
436
+ slen = RSTRING(str)->len;
437
+ inptr = RSTRING(str)->ptr;
438
+
439
+ inptr += start;
440
+ if (length < 0 || length > start + slen)
441
+ length = slen - start;
442
+ }
443
+ instart = inptr;
444
+ inlen = length;
445
+
446
+ do {
447
+ char errmsg[50];
448
+ const char *tmpstart = inptr;
449
+ outptr = buffer;
450
+ outlen = sizeof(buffer);
451
+
452
+ errmsg[0] = 0;
453
+ error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen);
454
+
455
+ if (0 <= outlen && outlen <= sizeof(buffer)) {
456
+ outlen = sizeof(buffer) - outlen;
457
+ if (NIL_P(error) || /* something converted */
458
+ outlen > inptr - tmpstart || /* input can't contain output */
459
+ (outlen < inptr - tmpstart && inlen > 0) || /* something skipped */
460
+ memcmp(buffer, tmpstart, outlen)) /* something differs */
461
+ {
462
+ if (NIL_P(str)) {
463
+ ret = rb_str_new(buffer, outlen);
464
+ }
465
+ else {
466
+ if (ret) {
467
+ ret = rb_str_buf_cat(ret, instart, tmpstart - instart);
468
+ }
469
+ else {
470
+ ret = rb_str_new(instart, tmpstart - instart);
471
+ OBJ_INFECT(ret, str);
472
+ }
473
+ ret = rb_str_buf_cat(ret, buffer, outlen);
474
+ instart = inptr;
475
+ }
476
+ }
477
+ else if (!inlen) {
478
+ inptr = tmpstart + outlen;
479
+ }
480
+ }
481
+ else {
482
+ /* Some iconv() have a bug, return *outlen out of range */
483
+ sprintf(errmsg, "bug?(output length = %ld)", (long)(sizeof(buffer) - outlen));
484
+ error = rb_eIconvOutOfRange;
485
+ }
486
+
487
+ if (RTEST(error)) {
488
+ long len = 0;
489
+
490
+ if (!ret)
491
+ ret = rb_str_derive(str, instart, inptr - instart);
492
+ else if (inptr > instart)
493
+ rb_str_cat(ret, instart, inptr - instart);
494
+ str = rb_str_derive(str, inptr, inlen);
495
+ rescue = iconv_fail_retry(error, ret, str, env, errmsg);
496
+ if (TYPE(rescue) == T_ARRAY) {
497
+ if ((len = RARRAY(rescue)->len) > 0)
498
+ rb_str_concat(ret, RARRAY(rescue)->ptr[0]);
499
+ if (len > 1 && !NIL_P(str = RARRAY(rescue)->ptr[1])) {
500
+ StringValue(str);
501
+ inlen = length = RSTRING(str)->len;
502
+ instart = inptr = RSTRING(str)->ptr;
503
+ continue;
504
+ }
505
+ }
506
+ else if (!NIL_P(rescue)) {
507
+ rb_str_concat(ret, rescue);
508
+ }
509
+ break;
510
+ }
511
+ } while (inlen > 0);
512
+
513
+ if (!ret)
514
+ ret = rb_str_derive(str, instart, inptr - instart);
515
+ else if (inptr > instart)
516
+ rb_str_cat(ret, instart, inptr - instart);
517
+ return ret;
518
+ }
519
+
520
+ static VALUE
521
+ iconv_s_allocate
522
+ #ifdef HAVE_PROTOTYPES
523
+ (VALUE klass)
524
+ #else /* HAVE_PROTOTYPES */
525
+ (klass)
526
+ VALUE klass;
527
+ #endif /* HAVE_PROTOTYPES */
528
+ {
529
+ return Data_Wrap_Struct(klass, 0, ICONV_FREE, 0);
530
+ }
531
+
532
+ /*
533
+ * Document-method: new
534
+ * call-seq: Iconv.new(to, from)
535
+ *
536
+ * Creates new code converter from a coding-system designated with +from+
537
+ * to another one designated with +to+.
538
+ *
539
+ * === Parameters
540
+ *
541
+ * +to+:: encoding name for destination
542
+ * +from+:: encoding name for source
543
+ *
544
+ * === Exceptions
545
+ *
546
+ * TypeError:: if +to+ or +from+ aren't String
547
+ * InvalidEncoding:: if designated converter couldn't find out
548
+ * SystemCallError:: if <tt>iconv_open(3)</tt> fails
549
+ */
550
+ static VALUE
551
+ iconv_initialize
552
+ #ifdef HAVE_PROTOTYPES
553
+ (VALUE self, VALUE to, VALUE from)
554
+ #else /* HAVE_PROTOTYPES */
555
+ (self, to, from)
556
+ VALUE self;
557
+ VALUE to;
558
+ VALUE from;
559
+ #endif /* HAVE_PROTOTYPES */
560
+ {
561
+ iconv_free(check_iconv(self));
562
+ DATA_PTR(self) = NULL;
563
+ DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from));
564
+ return self;
565
+ }
566
+
567
+ /*
568
+ * Document-method: open
569
+ * call-seq: Iconv.open(to, from) { |iconv| ... }
570
+ *
571
+ * Equivalent to Iconv.new except that when it is called with a block, it
572
+ * yields with the new instance and closes it, and returns the result which
573
+ * returned from the block.
574
+ */
575
+ static VALUE
576
+ iconv_s_open
577
+ #ifdef HAVE_PROTOTYPES
578
+ (VALUE self, VALUE to, VALUE from)
579
+ #else /* HAVE_PROTOTYPES */
580
+ (self, to, from)
581
+ VALUE self;
582
+ VALUE to;
583
+ VALUE from;
584
+ #endif /* HAVE_PROTOTYPES */
585
+ {
586
+ VALUE cd = ICONV2VALUE(iconv_create(to, from));
587
+
588
+ self = Data_Wrap_Struct(self, NULL, ICONV_FREE, (void *)cd);
589
+ if (rb_block_given_p()) {
590
+ return rb_ensure(rb_yield, self, (VALUE(*)())iconv_finish, self);
591
+ }
592
+ else {
593
+ return self;
594
+ }
595
+ }
596
+
597
+ static VALUE
598
+ iconv_s_convert
599
+ #ifdef HAVE_PROTOTYPES
600
+ (struct iconv_env_t* env)
601
+ #else /* HAVE_PROTOTYPES */
602
+ (env)
603
+ struct iconv_env_t *env;
604
+ #endif /* HAVE_PROTOTYPES */
605
+ {
606
+ VALUE last = 0;
607
+
608
+ for (; env->argc > 0; --env->argc, ++env->argv) {
609
+ VALUE s = iconv_convert(env->cd, last = *(env->argv), 0, -1, env);
610
+ env->append(env->ret, s);
611
+ }
612
+
613
+ if (!NIL_P(last)) {
614
+ VALUE s = iconv_convert(env->cd, Qnil, 0, 0, env);
615
+ if (RSTRING(s)->len)
616
+ env->append(env->ret, s);
617
+ }
618
+
619
+ return env->ret;
620
+ }
621
+
622
+ /*
623
+ * Document-method: Iconv::iconv
624
+ * call-seq: Iconv.iconv(to, from, *strs)
625
+ *
626
+ * Shorthand for
627
+ * Iconv.open(to, from) { |cd|
628
+ * (strs + [nil]).collect { |s| cd.iconv(s) }
629
+ * }
630
+ *
631
+ * === Parameters
632
+ *
633
+ * <tt>to, from</tt>:: see Iconv.new
634
+ * <tt>strs</tt>:: strings to be converted
635
+ *
636
+ * === Exceptions
637
+ *
638
+ * Exceptions thrown by Iconv.new, Iconv.open and Iconv#iconv.
639
+ */
640
+ static VALUE
641
+ iconv_s_iconv
642
+ #ifdef HAVE_PROTOTYPES
643
+ (int argc, VALUE *argv, VALUE self)
644
+ #else /* HAVE_PROTOTYPES */
645
+ (argc, argv, self)
646
+ int argc;
647
+ VALUE *argv;
648
+ VALUE self;
649
+ #endif /* HAVE_PROTOTYPES */
650
+ {
651
+ struct iconv_env_t arg;
652
+
653
+ if (argc < 2) /* needs `to' and `from' arguments at least */
654
+ rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, 2);
655
+
656
+ arg.argc = argc -= 2;
657
+ arg.argv = argv + 2;
658
+ arg.append = rb_ary_push;
659
+ arg.ret = rb_ary_new2(argc);
660
+ arg.cd = iconv_create(argv[0], argv[1]);
661
+ return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
662
+ }
663
+
664
+ /*
665
+ * Document-method: Iconv::conv
666
+ * call-seq: Iconv.conv(to, from, str)
667
+ *
668
+ * Shorthand for
669
+ * Iconv.iconv(to, from, str).join
670
+ * See Iconv.iconv.
671
+ */
672
+ static VALUE
673
+ iconv_s_conv
674
+ #ifdef HAVE_PROTOTYPES
675
+ (VALUE self, VALUE to, VALUE from, VALUE str)
676
+ #else /* HAVE_PROTOTYPES */
677
+ (self, to, from, str)
678
+ VALUE self, to, from, str;
679
+ #endif /* HAVE_PROTOTYPES */
680
+ {
681
+ struct iconv_env_t arg;
682
+
683
+ arg.argc = 1;
684
+ arg.argv = &str;
685
+ arg.append = rb_str_append;
686
+ arg.ret = rb_str_new(0, 0);
687
+ arg.cd = iconv_create(to, from);
688
+ return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
689
+ }
690
+
691
+ /*
692
+ * Document-method: close
693
+ *
694
+ * Finishes conversion.
695
+ *
696
+ * After calling this, calling Iconv#iconv will cause an exception, but
697
+ * multiple calls of #close are guaranteed to end successfully.
698
+ *
699
+ * Returns a string containing the byte sequence to change the output buffer to
700
+ * its initial shift state.
701
+ */
702
+ static VALUE
703
+ iconv_init_state
704
+ #ifdef HAVE_PROTOTYPES
705
+ (VALUE cd)
706
+ #else /* HAVE_PROTOTYPES */
707
+ (cd)
708
+ VALUE cd;
709
+ #endif /* HAVE_PROTOTYPES */
710
+ {
711
+ return iconv_convert(VALUE2ICONV(cd), Qnil, 0, 0, NULL);
712
+ }
713
+
714
+ static VALUE
715
+ iconv_finish
716
+ #ifdef HAVE_PROTOTYPES
717
+ (VALUE self)
718
+ #else /* HAVE_PROTOTYPES */
719
+ (self)
720
+ VALUE self;
721
+ #endif /* HAVE_PROTOTYPES */
722
+ {
723
+ VALUE cd = check_iconv(self);
724
+
725
+ if (!cd) return Qnil;
726
+ DATA_PTR(self) = NULL;
727
+
728
+ return rb_ensure(iconv_init_state, cd, iconv_free, cd);
729
+ }
730
+
731
+ /*
732
+ * Document-method: Iconv#iconv
733
+ * call-seq: iconv(str, start=0, length=-1)
734
+ *
735
+ * Converts string and returns the result.
736
+ * * If +str+ is a String, converts <tt>str[start, length]</tt> and returns the converted string.
737
+ * * If +str+ is +nil+, places converter itself into initial shift state and
738
+ * just returns a string containing the byte sequence to change the output
739
+ * buffer to its initial shift state.
740
+ * * Otherwise, raises an exception.
741
+ *
742
+ * === Parameters
743
+ *
744
+ * str:: string to be converted, or nil
745
+ * start:: starting offset
746
+ * length:: conversion length; nil or -1 means whole the string from start
747
+ *
748
+ * === Exceptions
749
+ *
750
+ * * IconvIllegalSequence
751
+ * * IconvInvalidCharacter
752
+ * * IconvOutOfRange
753
+ *
754
+ * === Examples
755
+ *
756
+ * See the Iconv documentation.
757
+ */
758
+ static VALUE
759
+ iconv_iconv
760
+ #ifdef HAVE_PROTOTYPES
761
+ (int argc, VALUE *argv, VALUE self)
762
+ #else /* HAVE_PROTOTYPES */
763
+ (argc, argv, self)
764
+ int argc;
765
+ VALUE *argv;
766
+ VALUE self;
767
+ #endif /* HAVE_PROTOTYPES */
768
+ {
769
+ VALUE str, n1, n2;
770
+ VALUE cd = check_iconv(self);
771
+ long start = 0, length = 0, slen = 0;
772
+
773
+ rb_scan_args(argc, argv, "12", &str, &n1, &n2);
774
+ if (!NIL_P(str)) slen = RSTRING_LEN(StringValue(str));
775
+ if (argc != 2 || !RTEST(rb_range_beg_len(n1, &start, &length, slen, 0))) {
776
+ if (NIL_P(n1) || ((start = NUM2LONG(n1)) < 0 ? (start += slen) >= 0 : start < slen)) {
777
+ if (NIL_P(n2)) {
778
+ length = -1;
779
+ }
780
+ else if ((length = NUM2LONG(n2)) >= slen - start) {
781
+ length = slen - start;
782
+ }
783
+ }
784
+ }
785
+
786
+ return iconv_convert(VALUE2ICONV(cd), str, start, length, NULL);
787
+ }
788
+
789
+ /*
790
+ * Document-class: Iconv::Failure
791
+ *
792
+ * Base attributes for Iconv exceptions.
793
+ */
794
+
795
+ /*
796
+ * Document-method: success
797
+ * call-seq: success
798
+ *
799
+ * Returns string(s) translated successfully until the exception occurred.
800
+ * * In the case of failure occurred within Iconv.iconv, returned
801
+ * value is an array of strings translated successfully preceding
802
+ * failure and the last element is string on the way.
803
+ */
804
+ static VALUE
805
+ iconv_failure_success
806
+ #ifdef HAVE_PROTOTYPES
807
+ (VALUE self)
808
+ #else /* HAVE_PROTOTYPES */
809
+ (self)
810
+ VALUE self;
811
+ #endif /* HAVE_PROTOTYPES */
812
+ {
813
+ return rb_attr_get(self, rb_success);
814
+ }
815
+
816
+ /*
817
+ * Document-method: failed
818
+ * call-seq: failed
819
+ *
820
+ * Returns substring of the original string passed to Iconv that starts at the
821
+ * character caused the exception.
822
+ */
823
+ static VALUE
824
+ iconv_failure_failed
825
+ #ifdef HAVE_PROTOTYPES
826
+ (VALUE self)
827
+ #else /* HAVE_PROTOTYPES */
828
+ (self)
829
+ VALUE self;
830
+ #endif /* HAVE_PROTOTYPES */
831
+ {
832
+ return rb_attr_get(self, rb_failed);
833
+ }
834
+
835
+ /*
836
+ * Document-method: inspect
837
+ * call-seq: inspect
838
+ *
839
+ * Returns inspected string like as: #<_class_: _success_, _failed_>
840
+ */
841
+ static VALUE
842
+ iconv_failure_inspect
843
+ #ifdef HAVE_PROTOTYPES
844
+ (VALUE self)
845
+ #else /* HAVE_PROTOTYPES */
846
+ (self)
847
+ VALUE self;
848
+ #endif /* HAVE_PROTOTYPES */
849
+ {
850
+ const char *cname = rb_class2name(CLASS_OF(self));
851
+ VALUE success = rb_attr_get(self, rb_success);
852
+ VALUE failed = rb_attr_get(self, rb_failed);
853
+ VALUE str = rb_str_buf_cat2(rb_str_new2("#<"), cname);
854
+ str = rb_str_buf_cat(str, ": ", 2);
855
+ str = rb_str_buf_append(str, rb_inspect(success));
856
+ str = rb_str_buf_cat(str, ", ", 2);
857
+ str = rb_str_buf_append(str, rb_inspect(failed));
858
+ return rb_str_buf_cat(str, ">", 1);
859
+ }
860
+
861
+ /*
862
+ * Document-class: Iconv::InvalidEncoding
863
+ *
864
+ * Requested coding-system is not available on this system.
865
+ */
866
+
867
+ /*
868
+ * Document-class: Iconv::IllegalSequence
869
+ *
870
+ * Input conversion stopped due to an input byte that does not belong to
871
+ * the input codeset, or the output codeset does not contain the
872
+ * character.
873
+ */
874
+
875
+ /*
876
+ * Document-class: Iconv::InvalidCharacter
877
+ *
878
+ * Input conversion stopped due to an incomplete character or shift
879
+ * sequence at the end of the input buffer.
880
+ */
881
+
882
+ /*
883
+ * Document-class: Iconv::OutOfRange
884
+ *
885
+ * Iconv library internal error. Must not occur.
886
+ */
887
+
888
+ /*
889
+ * Document-class: Iconv::BrokenLibrary
890
+ *
891
+ * Detected a bug of underlying iconv(3) libray.
892
+ * * returns an error without setting errno properly
893
+ */
894
+
895
+ void
896
+ Init_iconv _((void))
897
+ {
898
+ VALUE rb_cIconv = rb_define_class("Iconv", rb_cData);
899
+
900
+ rb_define_alloc_func(rb_cIconv, iconv_s_allocate);
901
+ rb_define_singleton_method(rb_cIconv, "open", iconv_s_open, 2);
902
+ rb_define_singleton_method(rb_cIconv, "iconv", iconv_s_iconv, -1);
903
+ rb_define_singleton_method(rb_cIconv, "conv", iconv_s_conv, 3);
904
+ rb_define_method(rb_cIconv, "initialize", iconv_initialize, 2);
905
+ rb_define_method(rb_cIconv, "close", iconv_finish, 0);
906
+ rb_define_method(rb_cIconv, "iconv", iconv_iconv, -1);
907
+
908
+ rb_eIconvFailure = rb_define_module_under(rb_cIconv, "Failure");
909
+ rb_define_method(rb_eIconvFailure, "initialize", iconv_failure_initialize, 3);
910
+ rb_define_method(rb_eIconvFailure, "success", iconv_failure_success, 0);
911
+ rb_define_method(rb_eIconvFailure, "failed", iconv_failure_failed, 0);
912
+ rb_define_method(rb_eIconvFailure, "inspect", iconv_failure_inspect, 0);
913
+
914
+ rb_eIconvInvalidEncoding = rb_define_class_under(rb_cIconv, "InvalidEncoding", rb_eArgError);
915
+ rb_eIconvIllegalSeq = rb_define_class_under(rb_cIconv, "IllegalSequence", rb_eArgError);
916
+ rb_eIconvInvalidChar = rb_define_class_under(rb_cIconv, "InvalidCharacter", rb_eArgError);
917
+ rb_eIconvOutOfRange = rb_define_class_under(rb_cIconv, "OutOfRange", rb_eRuntimeError);
918
+ rb_eIconvBrokenLibrary = rb_define_class_under(rb_cIconv, "BrokenLibrary", rb_eRuntimeError);
919
+ rb_include_module(rb_eIconvInvalidEncoding, rb_eIconvFailure);
920
+ rb_include_module(rb_eIconvIllegalSeq, rb_eIconvFailure);
921
+ rb_include_module(rb_eIconvInvalidChar, rb_eIconvFailure);
922
+ rb_include_module(rb_eIconvOutOfRange, rb_eIconvFailure);
923
+ rb_include_module(rb_eIconvBrokenLibrary, rb_eIconvFailure);
924
+
925
+ rb_success = rb_intern("success");
926
+ rb_failed = rb_intern("failed");
927
+
928
+ rb_gc_register_address(&charset_map);
929
+ charset_map = rb_hash_new();
930
+ rb_define_singleton_method(rb_cIconv, "charset_map", charset_map_get, 0);
931
+ }
@@ -0,0 +1,53 @@
1
+ #! /usr/bin/ruby
2
+ require 'rbconfig'
3
+ require 'optparse'
4
+
5
+ # http://www.ctan.org/get/macros/texinfo/texinfo/gnulib/lib/config.charset
6
+ # Tue, 25 Dec 2007 00:00:00 GMT
7
+
8
+ HEADER = <<SRC
9
+ require 'iconv.so'
10
+
11
+ class Iconv
12
+ case RUBY_PLATFORM
13
+ SRC
14
+
15
+ def charset_alias(config_charset, mapfile = nil)
16
+ found = nil
17
+ src = [HEADER]
18
+ open(config_charset) do |input|
19
+ input.find {|line| /^case "\$os" in/ =~ line} or return
20
+ input.each do |line|
21
+ case line
22
+ when /^\s*([-\w\*]+(?:\s*\|\s*[-\w\*]+)*)(?=\))/
23
+ (s = " when ") << $&.split('|').collect {|targ|
24
+ targ.strip!
25
+ tail = targ.chomp!("*") ? '' : '\z'
26
+ head = targ.slice!(/\A\*/) ? '' : '\A'
27
+ targ.gsub!(/\*/, '.*')
28
+ "/#{head}#{targ}#{tail}/"
29
+ }.join(", ")
30
+ src << s
31
+ found = {}
32
+ when /^\s*echo "(?:\$\w+\.)?([-\w*]+)\s+([-\w]+)"/
33
+ sys, can = $1, $2
34
+ can.downcase!
35
+ unless found[can] or (/\Aen_(?!US\z)/ =~ sys && /\ACP437\z/i =~ can)
36
+ found[can] = true
37
+ src << " charset_map['#{can}'] = '#{sys}'.freeze"
38
+ end
39
+ when /^\s*;;/
40
+ found = nil
41
+ end
42
+ end
43
+ end
44
+ src << " end" << "end"
45
+ if mapfile
46
+ open(mapfile, "wb") {|f| f.puts(*src)}
47
+ else
48
+ puts(*src)
49
+ end
50
+ end
51
+
52
+ (1..2) === ARGV.size or abort "usage: #{$0} config_charset [mapfile]"
53
+ charset_alias(*ARGV)
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: iconv
3
+ version: !ruby/object:Gem::Version
4
+ hash: 9
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 1
9
+ version: "0.1"
10
+ platform: ruby
11
+ authors:
12
+ - Yukihiro Matsumoto
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-08-19 00:00:00 +04:00
18
+ default_executable:
19
+ dependencies: []
20
+
21
+ description: If you're using bundler, and your OS doesn't have iconv for some reason (silly FreeBSD), you can install it with this gem!
22
+ email:
23
+ executables: []
24
+
25
+ extensions:
26
+ - ext/iconv/extconf.rb
27
+ extra_rdoc_files: []
28
+
29
+ files:
30
+ - ext/iconv/charset_alias.rb
31
+ - ext/iconv/extconf.rb
32
+ - ext/iconv/iconv.c
33
+ - ext/iconv/mkwrapper.rb
34
+ has_rdoc: true
35
+ homepage: http://github.com/ruby/ruby
36
+ licenses: []
37
+
38
+ post_install_message:
39
+ rdoc_options: []
40
+
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ none: false
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ hash: 3
49
+ segments:
50
+ - 0
51
+ version: "0"
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ requirements: []
62
+
63
+ rubyforge_project:
64
+ rubygems_version: 1.3.7
65
+ signing_key:
66
+ specification_version: 3
67
+ summary: iconv extension. The same as you can install with ruby
68
+ test_files: []
69
+