iconv 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,104 @@
1
+ #! /usr/bin/ruby
2
+ # :stopdoc:
3
+ require 'rbconfig'
4
+ require 'optparse'
5
+
6
+ # http://www.ctan.org/get/macros/texinfo/texinfo/gnulib/lib/config.charset
7
+ # Tue, 25 Dec 2007 00:00:00 GMT
8
+
9
+ OS = RbConfig::CONFIG["target_os"]
10
+ SHELL = RbConfig::CONFIG['SHELL']
11
+
12
+ class Hash::Ordered < Hash
13
+ def [](key)
14
+ val = super and val.last
15
+ end
16
+ def []=(key, val)
17
+ ary = fetch(key) {return super(key, [self.size, key, val])} and
18
+ ary << val
19
+ end
20
+ def sort
21
+ values.sort.collect {|i, *rest| rest}
22
+ end
23
+ def each(&block)
24
+ sort.each(&block)
25
+ end
26
+ end
27
+
28
+ def charset_alias(config_charset, mapfile, target = OS)
29
+ map = Hash::Ordered.new
30
+ comments = []
31
+ open(config_charset) do |input|
32
+ input.find {|line| /^case "\$os" in/ =~ line} or break
33
+ input.find {|line|
34
+ /^\s*([-\w\*]+(?:\s*\|\s*[-\w\*]+)*)(?=\))/ =~ line and
35
+ $&.split('|').any? {|pattern| File.fnmatch?(pattern.strip, target)}
36
+ } or break
37
+ input.find do |line|
38
+ case line
39
+ when /^\s*echo "(?:\$\w+\.)?([-\w*]+)\s+([-\w]+)"/
40
+ sys, can = $1, $2
41
+ can.downcase!
42
+ map[can] = sys
43
+ false
44
+ when /^\s*;;/
45
+ true
46
+ else
47
+ false
48
+ end
49
+ end
50
+ end
51
+ case target
52
+ when /linux|-gnu/
53
+ # map.delete('ascii')
54
+ when /cygwin|os2-emx/
55
+ # get rid of tilde/yen problem.
56
+ map['shift_jis'] = 'cp932'
57
+ end
58
+ st = Hash.new(0)
59
+ map = map.sort.collect do |can, *sys|
60
+ if sys.grep(/^en_us(?=.|$)/i) {break true} == true
61
+ noen = %r"^(?!en_us)\w+_\w+#{Regexp.new($')}$"i #"
62
+ sys.reject! {|s| noen =~ s}
63
+ end
64
+ sys = sys.first
65
+ st[sys] += 1
66
+ [can, sys]
67
+ end
68
+ st.delete_if {|sys, i| i == 1}.empty?
69
+ st.keys.each {|sys| st[sys] = nil}
70
+ st.default = nil
71
+ writer = proc do |f|
72
+ f.puts("require 'iconv.so'")
73
+ f.puts
74
+ f.puts(comments)
75
+ f.puts("class Iconv")
76
+ i = 0
77
+ map.each do |can, sys|
78
+ if s = st[sys]
79
+ sys = s
80
+ elsif st.key?(sys)
81
+ sys = (st[sys] = "sys#{i+=1}") + " = '#{sys}'.freeze"
82
+ else
83
+ sys = "'#{sys}'.freeze"
84
+ end
85
+ f.puts(" charset_map['#{can}'] = #{sys}")
86
+ end
87
+ f.puts("end")
88
+ end
89
+ if mapfile
90
+ open(mapfile, "w", &writer)
91
+ else
92
+ writer[STDOUT]
93
+ end
94
+ end
95
+
96
+ target = OS
97
+ opt = nil
98
+ ARGV.options do |opt2|
99
+ opt = opt2
100
+ opt.banner << " config.status map.rb"
101
+ opt.on("--target OS") {|t| target = t}
102
+ opt.parse! and (1..2) === ARGV.size
103
+ end or abort opt.to_s
104
+ charset_alias(ARGV[0], ARGV[1], target)
@@ -0,0 +1,51 @@
1
+ require 'mkmf'
2
+
3
+ dir_config("iconv")
4
+
5
+ conf = File.exist?(File.join($srcdir, "config.charset"))
6
+ conf = with_config("config-charset", enable_config("config-charset", conf))
7
+
8
+ if have_func("iconv", "iconv.h") or
9
+ have_library("iconv", "iconv", "iconv.h")
10
+ if checking_for("const of iconv() 2nd argument") do
11
+ create_tmpsrc(cpp_include("iconv.h") + "---> iconv(cd,0,0,0,0) <---")
12
+ src = xpopen(cpp_command("")) {|f|f.read}
13
+ if !(func = src[/^--->\s*(\w+).*\s*<---/, 1])
14
+ Logging::message "iconv function name not found"
15
+ false
16
+ elsif !(second = src[%r"\b#{func}\s*\(.*?,(.*?),.*?\)\s*;"m, 1])
17
+ Logging::message "prototype for #{func}() not found"
18
+ false
19
+ else
20
+ Logging::message $&+"\n"
21
+ /\bconst\b/ =~ second
22
+ end
23
+ end
24
+ $defs.push('-DICONV_INPTR_CONST')
25
+ end
26
+ if conf
27
+ prefix = '$(srcdir)'
28
+ prefix = $nmake ? "{#{prefix}}" : "#{prefix}/"
29
+ if $extout
30
+ wrapper = "$(RUBYARCHDIR)/iconv.rb"
31
+ else
32
+ wrapper = "./iconv.rb"
33
+ $INSTALLFILES = [[wrapper, "$(RUBYARCHDIR)"]]
34
+ end
35
+ if String === conf
36
+ require 'uri'
37
+ scheme = URI.parse(conf).scheme
38
+ else
39
+ conf = "$(srcdir)/config.charset"
40
+ end
41
+ $cleanfiles << wrapper
42
+ end
43
+ create_makefile("iconv")
44
+ if conf
45
+ open("Makefile", "a") do |mf|
46
+ mf.print("\nall: #{wrapper}\n\n#{wrapper}: #{prefix}charset_alias.rb")
47
+ mf.print(" ", conf) unless scheme
48
+ mf.print("\n\t$(RUBY) $(srcdir)/charset_alias.rb #{conf} $@\n")
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,931 @@
1
+ /* -*- mode:c; c-file-style:"ruby" -*- */
2
+ /**********************************************************************
3
+
4
+ iconv.c -
5
+
6
+ $Author$
7
+ $Date$
8
+ created at: Wed Dec 1 20:28:09 JST 1999
9
+
10
+ All the files in this distribution are covered under the Ruby's
11
+ license (see the file COPYING).
12
+
13
+ Documentation by Yukihiro Matsumoto and Gavin Sinclair.
14
+
15
+ **********************************************************************/
16
+
17
+ #include "ruby.h"
18
+ #include <errno.h>
19
+ #include <iconv.h>
20
+ #include <assert.h>
21
+ #include "st.h"
22
+ #include "intern.h"
23
+
24
+ /*
25
+ * Document-class: Iconv
26
+ *
27
+ * == Summary
28
+ *
29
+ * Ruby extension for charset conversion.
30
+ *
31
+ * == Abstract
32
+ *
33
+ * Iconv is a wrapper class for the UNIX 95 <tt>iconv()</tt> function family,
34
+ * which translates string between various encoding systems.
35
+ *
36
+ * See Open Group's on-line documents for more details.
37
+ * * <tt>iconv.h</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv.h.html
38
+ * * <tt>iconv_open()</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv_open.html
39
+ * * <tt>iconv()</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv.html
40
+ * * <tt>iconv_close()</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv_close.html
41
+ *
42
+ * Which coding systems are available is platform-dependent.
43
+ *
44
+ * == Examples
45
+ *
46
+ * 1. Simple conversion between two charsets.
47
+ *
48
+ * converted_text = Iconv.conv('iso-8859-15', 'utf-8', text)
49
+ *
50
+ * 2. Instantiate a new Iconv and use method Iconv#iconv.
51
+ *
52
+ * cd = Iconv.new(to, from)
53
+ * begin
54
+ * input.each { |s| output << cd.iconv(s) }
55
+ * output << cd.iconv(nil) # Don't forget this!
56
+ * ensure
57
+ * cd.close
58
+ * end
59
+ *
60
+ * 3. Invoke Iconv.open with a block.
61
+ *
62
+ * Iconv.open(to, from) do |cd|
63
+ * input.each { |s| output << cd.iconv(s) }
64
+ * output << cd.iconv(nil)
65
+ * end
66
+ *
67
+ * 4. Shorthand for (3).
68
+ *
69
+ * Iconv.iconv(to, from, *input.to_a)
70
+ */
71
+
72
+ /* Invalid value for iconv_t is -1 but 0 for VALUE, I hope VALUE is
73
+ big enough to keep iconv_t */
74
+ #define VALUE2ICONV(v) ((iconv_t)((VALUE)(v) ^ -1))
75
+ #define ICONV2VALUE(c) ((VALUE)(c) ^ -1)
76
+
77
+ struct iconv_env_t
78
+ {
79
+ iconv_t cd;
80
+ int argc;
81
+ VALUE *argv;
82
+ VALUE ret;
83
+ VALUE (*append)_((VALUE, VALUE));
84
+ };
85
+
86
+ static VALUE rb_eIconvInvalidEncoding;
87
+ static VALUE rb_eIconvFailure;
88
+ static VALUE rb_eIconvIllegalSeq;
89
+ static VALUE rb_eIconvInvalidChar;
90
+ static VALUE rb_eIconvOutOfRange;
91
+ static VALUE rb_eIconvBrokenLibrary;
92
+
93
+ static ID rb_success, rb_failed;
94
+ static VALUE iconv_fail _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg));
95
+ static VALUE iconv_fail_retry _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg));
96
+ static VALUE iconv_failure_initialize _((VALUE error, VALUE mesg, VALUE success, VALUE failed));
97
+ static VALUE iconv_failure_success _((VALUE self));
98
+ static VALUE iconv_failure_failed _((VALUE self));
99
+
100
+ static iconv_t iconv_create _((VALUE to, VALUE from));
101
+ static void iconv_dfree _((void *cd));
102
+ static VALUE iconv_free _((VALUE cd));
103
+ static VALUE iconv_try _((iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen));
104
+ static VALUE rb_str_derive _((VALUE str, const char* ptr, int len));
105
+ static VALUE iconv_convert _((iconv_t cd, VALUE str, long start, long length, struct iconv_env_t* env));
106
+ static VALUE iconv_s_allocate _((VALUE klass));
107
+ static VALUE iconv_initialize _((VALUE self, VALUE to, VALUE from));
108
+ static VALUE iconv_s_open _((VALUE self, VALUE to, VALUE from));
109
+ static VALUE iconv_s_convert _((struct iconv_env_t* env));
110
+ static VALUE iconv_s_iconv _((int argc, VALUE *argv, VALUE self));
111
+ static VALUE iconv_init_state _((VALUE cd));
112
+ static VALUE iconv_finish _((VALUE self));
113
+ static VALUE iconv_iconv _((int argc, VALUE *argv, VALUE self));
114
+
115
+ static VALUE charset_map;
116
+
117
+ /*
118
+ * Document-method: charset_map
119
+ * call-seq: Iconv.charset_map
120
+ *
121
+ * Returns the map from canonical name to system dependent name.
122
+ */
123
+ static VALUE charset_map_get _((void))
124
+ {
125
+ return charset_map;
126
+ }
127
+
128
+ static char *
129
+ map_charset
130
+ #ifdef HAVE_PROTOTYPES
131
+ (VALUE *code)
132
+ #else /* HAVE_PROTOTYPES */
133
+ (code)
134
+ VALUE *code;
135
+ #endif /* HAVE_PROTOTYPES */
136
+ {
137
+ VALUE val = *code;
138
+
139
+ if (RHASH(charset_map)->tbl && RHASH(charset_map)->tbl->num_entries) {
140
+ VALUE key = rb_funcall2(val, rb_intern("downcase"), 0, 0);
141
+ StringValuePtr(key);
142
+ if (st_lookup(RHASH(charset_map)->tbl, key, &val)) {
143
+ *code = val;
144
+ }
145
+ }
146
+ return StringValuePtr(*code);
147
+ }
148
+
149
+ NORETURN(static void rb_iconv_sys_fail(const char *s));
150
+ static void
151
+ rb_iconv_sys_fail(const char *s)
152
+ {
153
+ if (errno == 0) {
154
+ rb_exc_raise(iconv_fail(rb_eIconvBrokenLibrary, Qnil, Qnil, NULL, s));
155
+ }
156
+ rb_sys_fail(s);
157
+ }
158
+
159
+ #define rb_sys_fail(s) rb_iconv_sys_fail(s)
160
+
161
+ static iconv_t
162
+ iconv_create
163
+ #ifdef HAVE_PROTOTYPES
164
+ (VALUE to, VALUE from)
165
+ #else /* HAVE_PROTOTYPES */
166
+ (to, from)
167
+ VALUE to;
168
+ VALUE from;
169
+ #endif /* HAVE_PROTOTYPES */
170
+ {
171
+ const char* tocode = map_charset(&to);
172
+ const char* fromcode = map_charset(&from);
173
+
174
+ iconv_t cd = iconv_open(tocode, fromcode);
175
+
176
+ if (cd == (iconv_t)-1) {
177
+ switch (errno) {
178
+ case EMFILE:
179
+ case ENFILE:
180
+ case ENOMEM:
181
+ rb_gc();
182
+ cd = iconv_open(tocode, fromcode);
183
+ }
184
+ if (cd == (iconv_t)-1) {
185
+ int inval = errno == EINVAL;
186
+ const char *s = inval ? "invalid encoding " : "iconv";
187
+ volatile VALUE msg = rb_str_new(0, strlen(s) + RSTRING(to)->len +
188
+ RSTRING(from)->len + 8);
189
+
190
+ sprintf(RSTRING(msg)->ptr, "%s(\"%s\", \"%s\")",
191
+ s, RSTRING(to)->ptr, RSTRING(from)->ptr);
192
+ s = RSTRING(msg)->ptr;
193
+ RSTRING(msg)->len = strlen(s);
194
+ if (!inval) rb_sys_fail(s);
195
+ rb_exc_raise(iconv_fail(rb_eIconvInvalidEncoding, Qnil,
196
+ rb_ary_new3(2, to, from), NULL, s));
197
+ }
198
+ }
199
+
200
+ return cd;
201
+ }
202
+
203
+ static void
204
+ iconv_dfree
205
+ #ifdef HAVE_PROTOTYPES
206
+ (void *cd)
207
+ #else /* HAVE_PROTOTYPES */
208
+ (cd)
209
+ void *cd;
210
+ #endif /* HAVE_PROTOTYPES */
211
+ {
212
+ iconv_close(VALUE2ICONV(cd));
213
+ }
214
+
215
+ #define ICONV_FREE iconv_dfree
216
+
217
+ static VALUE
218
+ iconv_free
219
+ #ifdef HAVE_PROTOTYPES
220
+ (VALUE cd)
221
+ #else /* HAVE_PROTOTYPES */
222
+ (cd)
223
+ VALUE cd;
224
+ #endif /* HAVE_PROTOTYPES */
225
+ {
226
+ if (cd && iconv_close(VALUE2ICONV(cd)) == -1)
227
+ rb_sys_fail("iconv_close");
228
+ return Qnil;
229
+ }
230
+
231
+ static VALUE
232
+ check_iconv
233
+ #ifdef HAVE_PROTOTYPES
234
+ (VALUE obj)
235
+ #else /* HAVE_PROTOTYPES */
236
+ (obj)
237
+ VALUE obj;
238
+ #endif /* HAVE_PROTOTYPES */
239
+ {
240
+ Check_Type(obj, T_DATA);
241
+ if (RDATA(obj)->dfree != ICONV_FREE) {
242
+ rb_raise(rb_eArgError, "Iconv expected (%s)", rb_class2name(CLASS_OF(obj)));
243
+ }
244
+ return (VALUE)DATA_PTR(obj);
245
+ }
246
+
247
+ static VALUE
248
+ iconv_try
249
+ #ifdef HAVE_PROTOTYPES
250
+ (iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen)
251
+ #else /* HAVE_PROTOTYPES */
252
+ (cd, inptr, inlen, outptr, outlen)
253
+ iconv_t cd;
254
+ const char **inptr;
255
+ size_t *inlen;
256
+ char **outptr;
257
+ size_t *outlen;
258
+ #endif /* HAVE_PROTOTYPES */
259
+ {
260
+ #ifdef ICONV_INPTR_CONST
261
+ #define ICONV_INPTR_CAST
262
+ #else
263
+ #define ICONV_INPTR_CAST (char **)
264
+ #endif
265
+ size_t ret;
266
+
267
+ errno = 0;
268
+ ret = iconv(cd, ICONV_INPTR_CAST inptr, inlen, outptr, outlen);
269
+ if (ret == (size_t)-1) {
270
+ if (!*inlen)
271
+ return Qfalse;
272
+ switch (errno) {
273
+ case E2BIG:
274
+ /* try the left in next loop */
275
+ break;
276
+ case EILSEQ:
277
+ return rb_eIconvIllegalSeq;
278
+ case EINVAL:
279
+ return rb_eIconvInvalidChar;
280
+ case 0:
281
+ return rb_eIconvBrokenLibrary;
282
+ default:
283
+ rb_sys_fail("iconv");
284
+ }
285
+ }
286
+ else if (*inlen > 0) {
287
+ /* something goes wrong */
288
+ return rb_eIconvIllegalSeq;
289
+ }
290
+ else if (ret) {
291
+ return Qnil; /* conversion */
292
+ }
293
+ return Qfalse;
294
+ }
295
+
296
+ #define FAILED_MAXLEN 16
297
+
298
+ static VALUE iconv_failure_initialize
299
+ #ifdef HAVE_PROTOTYPES
300
+ (VALUE error, VALUE mesg, VALUE success, VALUE failed)
301
+ #else /* HAVE_PROTOTYPES */
302
+ (error, mesg, success, failed)
303
+ VALUE error, mesg, success, failed;
304
+ #endif /* HAVE_PROTOTYPES */
305
+ {
306
+ rb_call_super(1, &mesg);
307
+ rb_ivar_set(error, rb_success, success);
308
+ rb_ivar_set(error, rb_failed, failed);
309
+ return error;
310
+ }
311
+
312
+ static VALUE
313
+ iconv_fail
314
+ #ifdef HAVE_PROTOTYPES
315
+ (VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg)
316
+ #else /* HAVE_PROTOTYPES */
317
+ (error, success, failed, env, mesg)
318
+ VALUE error, success, failed;
319
+ struct iconv_env_t *env;
320
+ const char *mesg;
321
+ #endif /* HAVE_PROTOTYPES */
322
+ {
323
+ VALUE args[3];
324
+
325
+ if (mesg && *mesg) {
326
+ args[0] = rb_str_new2(mesg);
327
+ }
328
+ else if (TYPE(failed) != T_STRING || RSTRING(failed)->len < FAILED_MAXLEN) {
329
+ args[0] = rb_inspect(failed);
330
+ }
331
+ else {
332
+ args[0] = rb_inspect(rb_str_substr(failed, 0, FAILED_MAXLEN));
333
+ rb_str_cat2(args[0], "...");
334
+ }
335
+ args[1] = success;
336
+ args[2] = failed;
337
+ if (env) {
338
+ args[1] = env->append(rb_obj_dup(env->ret), success);
339
+ if (env->argc > 0) {
340
+ *(env->argv) = failed;
341
+ args[2] = rb_ary_new4(env->argc, env->argv);
342
+ }
343
+ }
344
+ return rb_class_new_instance(3, args, error);
345
+ }
346
+
347
+ static VALUE
348
+ iconv_fail_retry(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg)
349
+ {
350
+ error = iconv_fail(error, success, failed, env, mesg);
351
+ if (!rb_block_given_p()) rb_exc_raise(error);
352
+ ruby_errinfo = error;
353
+ return rb_yield(failed);
354
+ }
355
+
356
+ static VALUE
357
+ rb_str_derive
358
+ #ifdef HAVE_PROTOTYPES
359
+ (VALUE str, const char* ptr, int len)
360
+ #else /* HAVE_PROTOTYPES */
361
+ (str, ptr, len)
362
+ VALUE str;
363
+ const char *ptr;
364
+ int len;
365
+ #endif /* HAVE_PROTOTYPES */
366
+ {
367
+ VALUE ret;
368
+
369
+ if (NIL_P(str))
370
+ return rb_str_new(ptr, len);
371
+ if (RSTRING(str)->ptr == ptr && RSTRING(str)->len == len)
372
+ return str;
373
+ if (RSTRING(str)->ptr + RSTRING(str)->len == ptr + len)
374
+ ret = rb_str_substr(str, ptr - RSTRING(str)->ptr, len);
375
+ else
376
+ ret = rb_str_new(ptr, len);
377
+ OBJ_INFECT(ret, str);
378
+ return ret;
379
+ }
380
+
381
+ static VALUE
382
+ iconv_convert
383
+ #ifdef HAVE_PROTOTYPES
384
+ (iconv_t cd, VALUE str, long start, long length, struct iconv_env_t* env)
385
+ #else /* HAVE_PROTOTYPES */
386
+ (cd, str, start, length, env)
387
+ iconv_t cd;
388
+ VALUE str;
389
+ long start;
390
+ long length;
391
+ struct iconv_env_t *env;
392
+ #endif /* HAVE_PROTOTYPES */
393
+ {
394
+ VALUE ret = Qfalse;
395
+ VALUE error = Qfalse;
396
+ VALUE rescue;
397
+ const char *inptr, *instart;
398
+ size_t inlen;
399
+ /* I believe ONE CHARACTER never exceed this. */
400
+ char buffer[BUFSIZ];
401
+ char *outptr;
402
+ size_t outlen;
403
+
404
+ if (cd == (iconv_t)-1)
405
+ rb_raise(rb_eArgError, "closed iconv");
406
+
407
+ if (NIL_P(str)) {
408
+ /* Reset output pointer or something. */
409
+ inptr = "";
410
+ inlen = 0;
411
+ outptr = buffer;
412
+ outlen = sizeof(buffer);
413
+ error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen);
414
+ if (RTEST(error)) {
415
+ unsigned int i;
416
+ rescue = iconv_fail_retry(error, Qnil, Qnil, env, 0);
417
+ if (TYPE(rescue) == T_ARRAY) {
418
+ str = RARRAY(rescue)->len > 0 ? RARRAY(rescue)->ptr[0] : Qnil;
419
+ }
420
+ if (FIXNUM_P(str) && (i = FIX2INT(str)) <= 0xff) {
421
+ char c = i;
422
+ str = rb_str_new(&c, 1);
423
+ }
424
+ else if (!NIL_P(str)) {
425
+ StringValue(str);
426
+ }
427
+ }
428
+
429
+ inptr = NULL;
430
+ length = 0;
431
+ }
432
+ else {
433
+ int slen;
434
+
435
+ StringValue(str);
436
+ slen = RSTRING(str)->len;
437
+ inptr = RSTRING(str)->ptr;
438
+
439
+ inptr += start;
440
+ if (length < 0 || length > start + slen)
441
+ length = slen - start;
442
+ }
443
+ instart = inptr;
444
+ inlen = length;
445
+
446
+ do {
447
+ char errmsg[50];
448
+ const char *tmpstart = inptr;
449
+ outptr = buffer;
450
+ outlen = sizeof(buffer);
451
+
452
+ errmsg[0] = 0;
453
+ error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen);
454
+
455
+ if (0 <= outlen && outlen <= sizeof(buffer)) {
456
+ outlen = sizeof(buffer) - outlen;
457
+ if (NIL_P(error) || /* something converted */
458
+ outlen > inptr - tmpstart || /* input can't contain output */
459
+ (outlen < inptr - tmpstart && inlen > 0) || /* something skipped */
460
+ memcmp(buffer, tmpstart, outlen)) /* something differs */
461
+ {
462
+ if (NIL_P(str)) {
463
+ ret = rb_str_new(buffer, outlen);
464
+ }
465
+ else {
466
+ if (ret) {
467
+ ret = rb_str_buf_cat(ret, instart, tmpstart - instart);
468
+ }
469
+ else {
470
+ ret = rb_str_new(instart, tmpstart - instart);
471
+ OBJ_INFECT(ret, str);
472
+ }
473
+ ret = rb_str_buf_cat(ret, buffer, outlen);
474
+ instart = inptr;
475
+ }
476
+ }
477
+ else if (!inlen) {
478
+ inptr = tmpstart + outlen;
479
+ }
480
+ }
481
+ else {
482
+ /* Some iconv() have a bug, return *outlen out of range */
483
+ sprintf(errmsg, "bug?(output length = %ld)", (long)(sizeof(buffer) - outlen));
484
+ error = rb_eIconvOutOfRange;
485
+ }
486
+
487
+ if (RTEST(error)) {
488
+ long len = 0;
489
+
490
+ if (!ret)
491
+ ret = rb_str_derive(str, instart, inptr - instart);
492
+ else if (inptr > instart)
493
+ rb_str_cat(ret, instart, inptr - instart);
494
+ str = rb_str_derive(str, inptr, inlen);
495
+ rescue = iconv_fail_retry(error, ret, str, env, errmsg);
496
+ if (TYPE(rescue) == T_ARRAY) {
497
+ if ((len = RARRAY(rescue)->len) > 0)
498
+ rb_str_concat(ret, RARRAY(rescue)->ptr[0]);
499
+ if (len > 1 && !NIL_P(str = RARRAY(rescue)->ptr[1])) {
500
+ StringValue(str);
501
+ inlen = length = RSTRING(str)->len;
502
+ instart = inptr = RSTRING(str)->ptr;
503
+ continue;
504
+ }
505
+ }
506
+ else if (!NIL_P(rescue)) {
507
+ rb_str_concat(ret, rescue);
508
+ }
509
+ break;
510
+ }
511
+ } while (inlen > 0);
512
+
513
+ if (!ret)
514
+ ret = rb_str_derive(str, instart, inptr - instart);
515
+ else if (inptr > instart)
516
+ rb_str_cat(ret, instart, inptr - instart);
517
+ return ret;
518
+ }
519
+
520
+ static VALUE
521
+ iconv_s_allocate
522
+ #ifdef HAVE_PROTOTYPES
523
+ (VALUE klass)
524
+ #else /* HAVE_PROTOTYPES */
525
+ (klass)
526
+ VALUE klass;
527
+ #endif /* HAVE_PROTOTYPES */
528
+ {
529
+ return Data_Wrap_Struct(klass, 0, ICONV_FREE, 0);
530
+ }
531
+
532
+ /*
533
+ * Document-method: new
534
+ * call-seq: Iconv.new(to, from)
535
+ *
536
+ * Creates new code converter from a coding-system designated with +from+
537
+ * to another one designated with +to+.
538
+ *
539
+ * === Parameters
540
+ *
541
+ * +to+:: encoding name for destination
542
+ * +from+:: encoding name for source
543
+ *
544
+ * === Exceptions
545
+ *
546
+ * TypeError:: if +to+ or +from+ aren't String
547
+ * InvalidEncoding:: if designated converter couldn't find out
548
+ * SystemCallError:: if <tt>iconv_open(3)</tt> fails
549
+ */
550
+ static VALUE
551
+ iconv_initialize
552
+ #ifdef HAVE_PROTOTYPES
553
+ (VALUE self, VALUE to, VALUE from)
554
+ #else /* HAVE_PROTOTYPES */
555
+ (self, to, from)
556
+ VALUE self;
557
+ VALUE to;
558
+ VALUE from;
559
+ #endif /* HAVE_PROTOTYPES */
560
+ {
561
+ iconv_free(check_iconv(self));
562
+ DATA_PTR(self) = NULL;
563
+ DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from));
564
+ return self;
565
+ }
566
+
567
+ /*
568
+ * Document-method: open
569
+ * call-seq: Iconv.open(to, from) { |iconv| ... }
570
+ *
571
+ * Equivalent to Iconv.new except that when it is called with a block, it
572
+ * yields with the new instance and closes it, and returns the result which
573
+ * returned from the block.
574
+ */
575
+ static VALUE
576
+ iconv_s_open
577
+ #ifdef HAVE_PROTOTYPES
578
+ (VALUE self, VALUE to, VALUE from)
579
+ #else /* HAVE_PROTOTYPES */
580
+ (self, to, from)
581
+ VALUE self;
582
+ VALUE to;
583
+ VALUE from;
584
+ #endif /* HAVE_PROTOTYPES */
585
+ {
586
+ VALUE cd = ICONV2VALUE(iconv_create(to, from));
587
+
588
+ self = Data_Wrap_Struct(self, NULL, ICONV_FREE, (void *)cd);
589
+ if (rb_block_given_p()) {
590
+ return rb_ensure(rb_yield, self, (VALUE(*)())iconv_finish, self);
591
+ }
592
+ else {
593
+ return self;
594
+ }
595
+ }
596
+
597
+ static VALUE
598
+ iconv_s_convert
599
+ #ifdef HAVE_PROTOTYPES
600
+ (struct iconv_env_t* env)
601
+ #else /* HAVE_PROTOTYPES */
602
+ (env)
603
+ struct iconv_env_t *env;
604
+ #endif /* HAVE_PROTOTYPES */
605
+ {
606
+ VALUE last = 0;
607
+
608
+ for (; env->argc > 0; --env->argc, ++env->argv) {
609
+ VALUE s = iconv_convert(env->cd, last = *(env->argv), 0, -1, env);
610
+ env->append(env->ret, s);
611
+ }
612
+
613
+ if (!NIL_P(last)) {
614
+ VALUE s = iconv_convert(env->cd, Qnil, 0, 0, env);
615
+ if (RSTRING(s)->len)
616
+ env->append(env->ret, s);
617
+ }
618
+
619
+ return env->ret;
620
+ }
621
+
622
+ /*
623
+ * Document-method: Iconv::iconv
624
+ * call-seq: Iconv.iconv(to, from, *strs)
625
+ *
626
+ * Shorthand for
627
+ * Iconv.open(to, from) { |cd|
628
+ * (strs + [nil]).collect { |s| cd.iconv(s) }
629
+ * }
630
+ *
631
+ * === Parameters
632
+ *
633
+ * <tt>to, from</tt>:: see Iconv.new
634
+ * <tt>strs</tt>:: strings to be converted
635
+ *
636
+ * === Exceptions
637
+ *
638
+ * Exceptions thrown by Iconv.new, Iconv.open and Iconv#iconv.
639
+ */
640
+ static VALUE
641
+ iconv_s_iconv
642
+ #ifdef HAVE_PROTOTYPES
643
+ (int argc, VALUE *argv, VALUE self)
644
+ #else /* HAVE_PROTOTYPES */
645
+ (argc, argv, self)
646
+ int argc;
647
+ VALUE *argv;
648
+ VALUE self;
649
+ #endif /* HAVE_PROTOTYPES */
650
+ {
651
+ struct iconv_env_t arg;
652
+
653
+ if (argc < 2) /* needs `to' and `from' arguments at least */
654
+ rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, 2);
655
+
656
+ arg.argc = argc -= 2;
657
+ arg.argv = argv + 2;
658
+ arg.append = rb_ary_push;
659
+ arg.ret = rb_ary_new2(argc);
660
+ arg.cd = iconv_create(argv[0], argv[1]);
661
+ return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
662
+ }
663
+
664
+ /*
665
+ * Document-method: Iconv::conv
666
+ * call-seq: Iconv.conv(to, from, str)
667
+ *
668
+ * Shorthand for
669
+ * Iconv.iconv(to, from, str).join
670
+ * See Iconv.iconv.
671
+ */
672
+ static VALUE
673
+ iconv_s_conv
674
+ #ifdef HAVE_PROTOTYPES
675
+ (VALUE self, VALUE to, VALUE from, VALUE str)
676
+ #else /* HAVE_PROTOTYPES */
677
+ (self, to, from, str)
678
+ VALUE self, to, from, str;
679
+ #endif /* HAVE_PROTOTYPES */
680
+ {
681
+ struct iconv_env_t arg;
682
+
683
+ arg.argc = 1;
684
+ arg.argv = &str;
685
+ arg.append = rb_str_append;
686
+ arg.ret = rb_str_new(0, 0);
687
+ arg.cd = iconv_create(to, from);
688
+ return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
689
+ }
690
+
691
+ /*
692
+ * Document-method: close
693
+ *
694
+ * Finishes conversion.
695
+ *
696
+ * After calling this, calling Iconv#iconv will cause an exception, but
697
+ * multiple calls of #close are guaranteed to end successfully.
698
+ *
699
+ * Returns a string containing the byte sequence to change the output buffer to
700
+ * its initial shift state.
701
+ */
702
+ static VALUE
703
+ iconv_init_state
704
+ #ifdef HAVE_PROTOTYPES
705
+ (VALUE cd)
706
+ #else /* HAVE_PROTOTYPES */
707
+ (cd)
708
+ VALUE cd;
709
+ #endif /* HAVE_PROTOTYPES */
710
+ {
711
+ return iconv_convert(VALUE2ICONV(cd), Qnil, 0, 0, NULL);
712
+ }
713
+
714
+ static VALUE
715
+ iconv_finish
716
+ #ifdef HAVE_PROTOTYPES
717
+ (VALUE self)
718
+ #else /* HAVE_PROTOTYPES */
719
+ (self)
720
+ VALUE self;
721
+ #endif /* HAVE_PROTOTYPES */
722
+ {
723
+ VALUE cd = check_iconv(self);
724
+
725
+ if (!cd) return Qnil;
726
+ DATA_PTR(self) = NULL;
727
+
728
+ return rb_ensure(iconv_init_state, cd, iconv_free, cd);
729
+ }
730
+
731
+ /*
732
+ * Document-method: Iconv#iconv
733
+ * call-seq: iconv(str, start=0, length=-1)
734
+ *
735
+ * Converts string and returns the result.
736
+ * * If +str+ is a String, converts <tt>str[start, length]</tt> and returns the converted string.
737
+ * * If +str+ is +nil+, places converter itself into initial shift state and
738
+ * just returns a string containing the byte sequence to change the output
739
+ * buffer to its initial shift state.
740
+ * * Otherwise, raises an exception.
741
+ *
742
+ * === Parameters
743
+ *
744
+ * str:: string to be converted, or nil
745
+ * start:: starting offset
746
+ * length:: conversion length; nil or -1 means whole the string from start
747
+ *
748
+ * === Exceptions
749
+ *
750
+ * * IconvIllegalSequence
751
+ * * IconvInvalidCharacter
752
+ * * IconvOutOfRange
753
+ *
754
+ * === Examples
755
+ *
756
+ * See the Iconv documentation.
757
+ */
758
+ static VALUE
759
+ iconv_iconv
760
+ #ifdef HAVE_PROTOTYPES
761
+ (int argc, VALUE *argv, VALUE self)
762
+ #else /* HAVE_PROTOTYPES */
763
+ (argc, argv, self)
764
+ int argc;
765
+ VALUE *argv;
766
+ VALUE self;
767
+ #endif /* HAVE_PROTOTYPES */
768
+ {
769
+ VALUE str, n1, n2;
770
+ VALUE cd = check_iconv(self);
771
+ long start = 0, length = 0, slen = 0;
772
+
773
+ rb_scan_args(argc, argv, "12", &str, &n1, &n2);
774
+ if (!NIL_P(str)) slen = RSTRING_LEN(StringValue(str));
775
+ if (argc != 2 || !RTEST(rb_range_beg_len(n1, &start, &length, slen, 0))) {
776
+ if (NIL_P(n1) || ((start = NUM2LONG(n1)) < 0 ? (start += slen) >= 0 : start < slen)) {
777
+ if (NIL_P(n2)) {
778
+ length = -1;
779
+ }
780
+ else if ((length = NUM2LONG(n2)) >= slen - start) {
781
+ length = slen - start;
782
+ }
783
+ }
784
+ }
785
+
786
+ return iconv_convert(VALUE2ICONV(cd), str, start, length, NULL);
787
+ }
788
+
789
+ /*
790
+ * Document-class: Iconv::Failure
791
+ *
792
+ * Base attributes for Iconv exceptions.
793
+ */
794
+
795
+ /*
796
+ * Document-method: success
797
+ * call-seq: success
798
+ *
799
+ * Returns string(s) translated successfully until the exception occurred.
800
+ * * In the case of failure occurred within Iconv.iconv, returned
801
+ * value is an array of strings translated successfully preceding
802
+ * failure and the last element is string on the way.
803
+ */
804
+ static VALUE
805
+ iconv_failure_success
806
+ #ifdef HAVE_PROTOTYPES
807
+ (VALUE self)
808
+ #else /* HAVE_PROTOTYPES */
809
+ (self)
810
+ VALUE self;
811
+ #endif /* HAVE_PROTOTYPES */
812
+ {
813
+ return rb_attr_get(self, rb_success);
814
+ }
815
+
816
+ /*
817
+ * Document-method: failed
818
+ * call-seq: failed
819
+ *
820
+ * Returns substring of the original string passed to Iconv that starts at the
821
+ * character caused the exception.
822
+ */
823
+ static VALUE
824
+ iconv_failure_failed
825
+ #ifdef HAVE_PROTOTYPES
826
+ (VALUE self)
827
+ #else /* HAVE_PROTOTYPES */
828
+ (self)
829
+ VALUE self;
830
+ #endif /* HAVE_PROTOTYPES */
831
+ {
832
+ return rb_attr_get(self, rb_failed);
833
+ }
834
+
835
+ /*
836
+ * Document-method: inspect
837
+ * call-seq: inspect
838
+ *
839
+ * Returns inspected string like as: #<_class_: _success_, _failed_>
840
+ */
841
+ static VALUE
842
+ iconv_failure_inspect
843
+ #ifdef HAVE_PROTOTYPES
844
+ (VALUE self)
845
+ #else /* HAVE_PROTOTYPES */
846
+ (self)
847
+ VALUE self;
848
+ #endif /* HAVE_PROTOTYPES */
849
+ {
850
+ const char *cname = rb_class2name(CLASS_OF(self));
851
+ VALUE success = rb_attr_get(self, rb_success);
852
+ VALUE failed = rb_attr_get(self, rb_failed);
853
+ VALUE str = rb_str_buf_cat2(rb_str_new2("#<"), cname);
854
+ str = rb_str_buf_cat(str, ": ", 2);
855
+ str = rb_str_buf_append(str, rb_inspect(success));
856
+ str = rb_str_buf_cat(str, ", ", 2);
857
+ str = rb_str_buf_append(str, rb_inspect(failed));
858
+ return rb_str_buf_cat(str, ">", 1);
859
+ }
860
+
861
+ /*
862
+ * Document-class: Iconv::InvalidEncoding
863
+ *
864
+ * Requested coding-system is not available on this system.
865
+ */
866
+
867
+ /*
868
+ * Document-class: Iconv::IllegalSequence
869
+ *
870
+ * Input conversion stopped due to an input byte that does not belong to
871
+ * the input codeset, or the output codeset does not contain the
872
+ * character.
873
+ */
874
+
875
+ /*
876
+ * Document-class: Iconv::InvalidCharacter
877
+ *
878
+ * Input conversion stopped due to an incomplete character or shift
879
+ * sequence at the end of the input buffer.
880
+ */
881
+
882
+ /*
883
+ * Document-class: Iconv::OutOfRange
884
+ *
885
+ * Iconv library internal error. Must not occur.
886
+ */
887
+
888
+ /*
889
+ * Document-class: Iconv::BrokenLibrary
890
+ *
891
+ * Detected a bug of underlying iconv(3) libray.
892
+ * * returns an error without setting errno properly
893
+ */
894
+
895
+ void
896
+ Init_iconv _((void))
897
+ {
898
+ VALUE rb_cIconv = rb_define_class("Iconv", rb_cData);
899
+
900
+ rb_define_alloc_func(rb_cIconv, iconv_s_allocate);
901
+ rb_define_singleton_method(rb_cIconv, "open", iconv_s_open, 2);
902
+ rb_define_singleton_method(rb_cIconv, "iconv", iconv_s_iconv, -1);
903
+ rb_define_singleton_method(rb_cIconv, "conv", iconv_s_conv, 3);
904
+ rb_define_method(rb_cIconv, "initialize", iconv_initialize, 2);
905
+ rb_define_method(rb_cIconv, "close", iconv_finish, 0);
906
+ rb_define_method(rb_cIconv, "iconv", iconv_iconv, -1);
907
+
908
+ rb_eIconvFailure = rb_define_module_under(rb_cIconv, "Failure");
909
+ rb_define_method(rb_eIconvFailure, "initialize", iconv_failure_initialize, 3);
910
+ rb_define_method(rb_eIconvFailure, "success", iconv_failure_success, 0);
911
+ rb_define_method(rb_eIconvFailure, "failed", iconv_failure_failed, 0);
912
+ rb_define_method(rb_eIconvFailure, "inspect", iconv_failure_inspect, 0);
913
+
914
+ rb_eIconvInvalidEncoding = rb_define_class_under(rb_cIconv, "InvalidEncoding", rb_eArgError);
915
+ rb_eIconvIllegalSeq = rb_define_class_under(rb_cIconv, "IllegalSequence", rb_eArgError);
916
+ rb_eIconvInvalidChar = rb_define_class_under(rb_cIconv, "InvalidCharacter", rb_eArgError);
917
+ rb_eIconvOutOfRange = rb_define_class_under(rb_cIconv, "OutOfRange", rb_eRuntimeError);
918
+ rb_eIconvBrokenLibrary = rb_define_class_under(rb_cIconv, "BrokenLibrary", rb_eRuntimeError);
919
+ rb_include_module(rb_eIconvInvalidEncoding, rb_eIconvFailure);
920
+ rb_include_module(rb_eIconvIllegalSeq, rb_eIconvFailure);
921
+ rb_include_module(rb_eIconvInvalidChar, rb_eIconvFailure);
922
+ rb_include_module(rb_eIconvOutOfRange, rb_eIconvFailure);
923
+ rb_include_module(rb_eIconvBrokenLibrary, rb_eIconvFailure);
924
+
925
+ rb_success = rb_intern("success");
926
+ rb_failed = rb_intern("failed");
927
+
928
+ rb_gc_register_address(&charset_map);
929
+ charset_map = rb_hash_new();
930
+ rb_define_singleton_method(rb_cIconv, "charset_map", charset_map_get, 0);
931
+ }
@@ -0,0 +1,53 @@
1
+ #! /usr/bin/ruby
2
+ require 'rbconfig'
3
+ require 'optparse'
4
+
5
+ # http://www.ctan.org/get/macros/texinfo/texinfo/gnulib/lib/config.charset
6
+ # Tue, 25 Dec 2007 00:00:00 GMT
7
+
8
+ HEADER = <<SRC
9
+ require 'iconv.so'
10
+
11
+ class Iconv
12
+ case RUBY_PLATFORM
13
+ SRC
14
+
15
+ def charset_alias(config_charset, mapfile = nil)
16
+ found = nil
17
+ src = [HEADER]
18
+ open(config_charset) do |input|
19
+ input.find {|line| /^case "\$os" in/ =~ line} or return
20
+ input.each do |line|
21
+ case line
22
+ when /^\s*([-\w\*]+(?:\s*\|\s*[-\w\*]+)*)(?=\))/
23
+ (s = " when ") << $&.split('|').collect {|targ|
24
+ targ.strip!
25
+ tail = targ.chomp!("*") ? '' : '\z'
26
+ head = targ.slice!(/\A\*/) ? '' : '\A'
27
+ targ.gsub!(/\*/, '.*')
28
+ "/#{head}#{targ}#{tail}/"
29
+ }.join(", ")
30
+ src << s
31
+ found = {}
32
+ when /^\s*echo "(?:\$\w+\.)?([-\w*]+)\s+([-\w]+)"/
33
+ sys, can = $1, $2
34
+ can.downcase!
35
+ unless found[can] or (/\Aen_(?!US\z)/ =~ sys && /\ACP437\z/i =~ can)
36
+ found[can] = true
37
+ src << " charset_map['#{can}'] = '#{sys}'.freeze"
38
+ end
39
+ when /^\s*;;/
40
+ found = nil
41
+ end
42
+ end
43
+ end
44
+ src << " end" << "end"
45
+ if mapfile
46
+ open(mapfile, "wb") {|f| f.puts(*src)}
47
+ else
48
+ puts(*src)
49
+ end
50
+ end
51
+
52
+ (1..2) === ARGV.size or abort "usage: #{$0} config_charset [mapfile]"
53
+ charset_alias(*ARGV)
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: iconv
3
+ version: !ruby/object:Gem::Version
4
+ hash: 9
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 1
9
+ version: "0.1"
10
+ platform: ruby
11
+ authors:
12
+ - Yukihiro Matsumoto
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-08-19 00:00:00 +04:00
18
+ default_executable:
19
+ dependencies: []
20
+
21
+ description: If you're using bundler, and your OS doesn't have iconv for some reason (silly FreeBSD), you can install it with this gem!
22
+ email:
23
+ executables: []
24
+
25
+ extensions:
26
+ - ext/iconv/extconf.rb
27
+ extra_rdoc_files: []
28
+
29
+ files:
30
+ - ext/iconv/charset_alias.rb
31
+ - ext/iconv/extconf.rb
32
+ - ext/iconv/iconv.c
33
+ - ext/iconv/mkwrapper.rb
34
+ has_rdoc: true
35
+ homepage: http://github.com/ruby/ruby
36
+ licenses: []
37
+
38
+ post_install_message:
39
+ rdoc_options: []
40
+
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ none: false
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ hash: 3
49
+ segments:
50
+ - 0
51
+ version: "0"
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ requirements: []
62
+
63
+ rubyforge_project:
64
+ rubygems_version: 1.3.7
65
+ signing_key:
66
+ specification_version: 3
67
+ summary: iconv extension. The same as you can install with ruby
68
+ test_files: []
69
+