rubysl-nkf 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,34 @@
1
+ #ifndef _UTF8TBL_H_
2
+ #define _UTF8TBL_H_
3
+
4
+ #ifdef UTF8_OUTPUT_ENABLE
5
+ extern const unsigned short euc_to_utf8_1byte[];
6
+ extern const unsigned short *const euc_to_utf8_2bytes[];
7
+ extern const unsigned short *const euc_to_utf8_2bytes_ms[];
8
+ extern const unsigned short *const x0212_to_utf8_2bytes[];
9
+ #endif /* UTF8_OUTPUT_ENABLE */
10
+
11
+ #ifdef UTF8_INPUT_ENABLE
12
+ extern const unsigned short *const utf8_to_euc_2bytes[];
13
+ extern const unsigned short *const utf8_to_euc_2bytes_ms[];
14
+ extern const unsigned short *const utf8_to_euc_2bytes_932[];
15
+ extern const unsigned short *const *const utf8_to_euc_3bytes[];
16
+ extern const unsigned short *const *const utf8_to_euc_3bytes_ms[];
17
+ extern const unsigned short *const *const utf8_to_euc_3bytes_932[];
18
+ #endif /* UTF8_INPUT_ENABLE */
19
+
20
+ #ifdef UNICODE_NORMALIZATION
21
+ extern const struct normalization_pair normalization_table[];
22
+ #endif
23
+
24
+ #ifdef SHIFTJIS_CP932
25
+ extern const unsigned short shiftjis_cp932[3][189];
26
+ extern const unsigned short cp932inv[2][189];
27
+ #endif /* SHIFTJIS_CP932 */
28
+
29
+ #ifdef X0212_ENABLE
30
+ extern const unsigned short shiftjis_x0212[3][189];
31
+ extern const unsigned short *const x0212_shiftjis[];
32
+ #endif /* X0212_ENABLE */
33
+
34
+ #endif
@@ -0,0 +1,654 @@
1
+ /*
2
+ * NKF - Ruby extension for Network Kanji Filter
3
+ *
4
+ * original nkf2.x is maintained at http://sourceforge.jp/projects/nkf/
5
+ *
6
+ * $Id: nkf.c 11905 2007-02-27 10:38:32Z knu $
7
+ *
8
+ */
9
+
10
+ #define RUBY_NKF_REVISION "$Revision: 11905 $"
11
+ #define RUBY_NKF_VERSION NKF_VERSION " (" NKF_RELEASE_DATE ")"
12
+
13
+ #include "ruby.h"
14
+
15
+ /* Encoding Constants */
16
+ #define _AUTO 0
17
+ #define _JIS 1
18
+ #define _EUC 2
19
+ #define _SJIS 3
20
+ #define _BINARY 4
21
+ #define _NOCONV 4
22
+ #define _ASCII 5
23
+ /* 0b011x is reserved for UTF-8 Family */
24
+ #define _UTF8 6
25
+ /* 0b10xx is reserved for UTF-16 Family */
26
+ #define _UTF16 8
27
+ /* 0b11xx is reserved for UTF-32 Family */
28
+ #define _UTF32 12
29
+ #define _OTHER 16
30
+ #define _UNKNOWN _AUTO
31
+
32
+ /* Replace nkf's getchar/putchar for variable modification */
33
+ /* we never use getc, ungetc */
34
+
35
+ #undef getc
36
+ #undef ungetc
37
+ #define getc(f) (input_ctr>=i_len?-1:input[input_ctr++])
38
+ #define ungetc(c,f) input_ctr--
39
+
40
+ #define INCSIZE 32
41
+ #undef putchar
42
+ #undef TRUE
43
+ #undef FALSE
44
+ #define putchar(c) rb_nkf_putchar(c)
45
+
46
+ /* Input/Output pointers */
47
+
48
+ static unsigned char *output;
49
+ static unsigned char *input;
50
+ static int input_ctr;
51
+ static int i_len;
52
+ static int output_ctr;
53
+ static int o_len;
54
+ static int incsize;
55
+
56
+ static VALUE result;
57
+
58
+ static int
59
+ rb_nkf_putchar(c)
60
+ unsigned int c;
61
+ {
62
+ if (output_ctr >= o_len) {
63
+ o_len += incsize;
64
+ rb_str_resize(result, o_len);
65
+ incsize *= 2;
66
+ output = (unsigned char *)RSTRING(result)->ptr;
67
+ }
68
+ output[output_ctr++] = c;
69
+
70
+ return c;
71
+ }
72
+
73
+ /* Include kanji filter main part */
74
+ /* getchar and putchar will be replaced during inclusion */
75
+
76
+ #define PERL_XS 1
77
+ #include "nkf-utf8/config.h"
78
+ #include "nkf-utf8/utf8tbl.c"
79
+ #include "nkf-utf8/nkf.c"
80
+
81
+ int nkf_split_options(arg)
82
+ const char* arg;
83
+ {
84
+ int count = 0;
85
+ char option[256];
86
+ int i = 0, j = 0;
87
+ int is_escaped = FALSE;
88
+ int is_single_quoted = FALSE;
89
+ int is_double_quoted = FALSE;
90
+ for(i = 0; arg[i]; i++){
91
+ if(j == 255){
92
+ return -1;
93
+ }else if(is_single_quoted){
94
+ if(arg[i] == '\''){
95
+ is_single_quoted = FALSE;
96
+ }else{
97
+ option[j++] = arg[i];
98
+ }
99
+ }else if(is_escaped){
100
+ is_escaped = FALSE;
101
+ option[j++] = arg[i];
102
+ }else if(arg[i] == '\\'){
103
+ is_escaped = TRUE;
104
+ }else if(is_double_quoted){
105
+ if(arg[i] == '"'){
106
+ is_double_quoted = FALSE;
107
+ }else{
108
+ option[j++] = arg[i];
109
+ }
110
+ }else if(arg[i] == '\''){
111
+ is_single_quoted = TRUE;
112
+ }else if(arg[i] == '"'){
113
+ is_double_quoted = TRUE;
114
+ }else if(arg[i] == ' '){
115
+ option[j] = '\0';
116
+ options((unsigned char*)option);
117
+ j = 0;
118
+ }else{
119
+ option[j++] = arg[i];
120
+ }
121
+ }
122
+ if(j){
123
+ option[j] = '\0';
124
+ options((unsigned char*)option);
125
+ }
126
+ return count;
127
+ }
128
+
129
+ /*
130
+ * call-seq:
131
+ * NKF.nkf(opt, str) -> string
132
+ *
133
+ * Convert _str_ and return converted result.
134
+ * Conversion details are specified by _opt_ as String.
135
+ *
136
+ * require 'nkf'
137
+ * output = NKF.nkf("-s", input)
138
+ *
139
+ * *Note*
140
+ * By default, nkf decodes MIME encoded string.
141
+ * If you want not to decode input, use NKF.nkf with <b>-m0</b> flag.
142
+ */
143
+
144
+ static VALUE
145
+ rb_nkf_kconv(obj, opt, src)
146
+ VALUE obj, opt, src;
147
+ {
148
+ char *opt_ptr, *opt_end;
149
+ volatile VALUE v;
150
+
151
+ reinit();
152
+ StringValue(opt);
153
+ opt_ptr = RSTRING(opt)->ptr;
154
+ opt_end = opt_ptr + RSTRING(opt)->len;
155
+ nkf_split_options(opt_ptr);
156
+
157
+ incsize = INCSIZE;
158
+
159
+ input_ctr = 0;
160
+ StringValue(src);
161
+ input = (unsigned char *)RSTRING(src)->ptr;
162
+ i_len = RSTRING(src)->len;
163
+ result = rb_str_new(0, i_len*3 + 10);
164
+ v = result;
165
+
166
+ output_ctr = 0;
167
+ output = (unsigned char *)RSTRING(result)->ptr;
168
+ o_len = RSTRING(result)->len;
169
+ *output = '\0';
170
+
171
+ if(x0201_f == WISH_TRUE)
172
+ x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
173
+
174
+ kanji_convert(NULL);
175
+ RSTRING(result)->ptr[output_ctr] = '\0';
176
+ RSTRING(result)->len = output_ctr;
177
+ OBJ_INFECT(result, src);
178
+
179
+ return result;
180
+ }
181
+
182
+
183
+ /*
184
+ * call-seq:
185
+ * NKF.guess1(str) -> integer
186
+ *
187
+ * Returns guessed encoding of _str_ as integer.
188
+ *
189
+ * Algorithm described in:
190
+ * Ken Lunde. `Understanding Japanese Information Processing'
191
+ * Sebastopol, CA: O'Reilly & Associates.
192
+ *
193
+ * case NKF.guess1(input)
194
+ * when NKF::JIS
195
+ * "ISO-2022-JP"
196
+ * when NKF::SJIS
197
+ * "Shift_JIS"
198
+ * when NKF::EUC
199
+ * "EUC-JP"
200
+ * when NKF::UNKNOWN
201
+ * "UNKNOWN(ASCII)"
202
+ * when NKF::BINARY
203
+ * "BINARY"
204
+ * end
205
+ */
206
+
207
+ static VALUE
208
+ rb_nkf_guess1(obj, src)
209
+ VALUE obj, src;
210
+ {
211
+ unsigned char *p;
212
+ unsigned char *pend;
213
+ int sequence_counter = 0;
214
+
215
+ StringValue(src);
216
+ p = (unsigned char *)RSTRING(src)->ptr;
217
+ pend = p + RSTRING(src)->len;
218
+ if (p == pend) return INT2FIX(_UNKNOWN);
219
+
220
+ #define INCR do {\
221
+ p++;\
222
+ if (p==pend) return INT2FIX(_UNKNOWN);\
223
+ sequence_counter++;\
224
+ if (sequence_counter % 2 == 1 && *p != 0xa4)\
225
+ sequence_counter = 0;\
226
+ if (6 <= sequence_counter) {\
227
+ sequence_counter = 0;\
228
+ return INT2FIX(_EUC);\
229
+ }\
230
+ } while (0)
231
+
232
+ if (*p == 0xa4)
233
+ sequence_counter = 1;
234
+
235
+ while (p<pend) {
236
+ if (*p == '\033') {
237
+ return INT2FIX(_JIS);
238
+ }
239
+ if (*p < '\006' || *p == 0x7f || *p == 0xff) {
240
+ return INT2FIX(_BINARY);
241
+ }
242
+ if (0x81 <= *p && *p <= 0x8d) {
243
+ return INT2FIX(_SJIS);
244
+ }
245
+ if (0x8f <= *p && *p <= 0x9f) {
246
+ return INT2FIX(_SJIS);
247
+ }
248
+ if (*p == 0x8e) { /* SS2 */
249
+ INCR;
250
+ if ((0x40 <= *p && *p <= 0x7e) ||
251
+ (0x80 <= *p && *p <= 0xa0) ||
252
+ (0xe0 <= *p && *p <= 0xfc))
253
+ return INT2FIX(_SJIS);
254
+ }
255
+ else if (0xa1 <= *p && *p <= 0xdf) {
256
+ INCR;
257
+ if (0xf0 <= *p && *p <= 0xfe)
258
+ return INT2FIX(_EUC);
259
+ if (0xe0 <= *p && *p <= 0xef) {
260
+ while (p < pend && *p >= 0x40) {
261
+ if (*p >= 0x81) {
262
+ if (*p <= 0x8d || (0x8f <= *p && *p <= 0x9f)) {
263
+ return INT2FIX(_SJIS);
264
+ }
265
+ else if (0xfd <= *p && *p <= 0xfe) {
266
+ return INT2FIX(_EUC);
267
+ }
268
+ }
269
+ INCR;
270
+ }
271
+ }
272
+ else if (*p <= 0x9f) {
273
+ return INT2FIX(_SJIS);
274
+ }
275
+ }
276
+ else if (0xf0 <= *p && *p <= 0xfe) {
277
+ return INT2FIX(_EUC);
278
+ }
279
+ else if (0xe0 <= *p && *p <= 0xef) {
280
+ INCR;
281
+ if ((0x40 <= *p && *p <= 0x7e) ||
282
+ (0x80 <= *p && *p <= 0xa0)) {
283
+ return INT2FIX(_SJIS);
284
+ }
285
+ if (0xfd <= *p && *p <= 0xfe) {
286
+ return INT2FIX(_EUC);
287
+ }
288
+ }
289
+ INCR;
290
+ }
291
+ return INT2FIX(_UNKNOWN);
292
+ }
293
+
294
+
295
+ /*
296
+ * call-seq:
297
+ * NKF.guess2(str) -> integer
298
+ *
299
+ * Returns guessed encoding of _str_ as integer by nkf routine.
300
+ *
301
+ * case NKF.guess(input)
302
+ * when NKF::ASCII
303
+ * "ASCII"
304
+ * when NKF::JIS
305
+ * "ISO-2022-JP"
306
+ * when NKF::SJIS
307
+ * "Shift_JIS"
308
+ * when NKF::EUC
309
+ * "EUC-JP"
310
+ * when NKF::UTF8
311
+ * "UTF-8"
312
+ * when NKF::UTF16
313
+ * "UTF-16"
314
+ * when NKF::UNKNOWN
315
+ * "UNKNOWN"
316
+ * when NKF::BINARY
317
+ * "BINARY"
318
+ * end
319
+ */
320
+
321
+ static VALUE
322
+ rb_nkf_guess2(obj, src)
323
+ VALUE obj, src;
324
+ {
325
+ int code = _BINARY;
326
+
327
+ reinit();
328
+
329
+ input_ctr = 0;
330
+ StringValue(src);
331
+ input = (unsigned char *)RSTRING(src)->ptr;
332
+ i_len = RSTRING(src)->len;
333
+
334
+ if(x0201_f == WISH_TRUE)
335
+ x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
336
+
337
+ guess_f = TRUE;
338
+ kanji_convert( NULL );
339
+ guess_f = FALSE;
340
+
341
+ if (!is_inputcode_mixed) {
342
+ if (strcmp(input_codename, "") == 0) {
343
+ code = _ASCII;
344
+ } else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
345
+ code = _JIS;
346
+ } else if (strcmp(input_codename, "EUC-JP") == 0) {
347
+ code = _EUC;
348
+ } else if (strcmp(input_codename, "Shift_JIS") == 0) {
349
+ code = _SJIS;
350
+ } else if (strcmp(input_codename, "UTF-8") == 0) {
351
+ code = _UTF8;
352
+ } else if (strcmp(input_codename, "UTF-16") == 0) {
353
+ code = _UTF16;
354
+ } else if (strlen(input_codename) > 0) {
355
+ code = _UNKNOWN;
356
+ }
357
+ }
358
+
359
+ return INT2FIX( code );
360
+ }
361
+
362
+
363
+ /*
364
+ * NKF - Ruby extension for Network Kanji Filter
365
+ *
366
+ * == Description
367
+ *
368
+ * This is a Ruby Extension version of nkf (Netowrk Kanji Filter).
369
+ * It converts the first argument and return converted result. Conversion
370
+ * details are specified by flags as the first argument.
371
+ *
372
+ * *Nkf* is a yet another kanji code converter among networks, hosts and terminals.
373
+ * It converts input kanji code to designated kanji code
374
+ * such as ISO-2022-JP, Shift_JIS, EUC-JP, UTF-8 or UTF-16.
375
+ *
376
+ * One of the most unique faculty of *nkf* is the guess of the input kanji encodings.
377
+ * It currently recognizes ISO-2022-JP, Shift_JIS, EUC-JP, UTF-8 and UTF-16.
378
+ * So users needn't set the input kanji code explicitly.
379
+ *
380
+ * By default, X0201 kana is converted into X0208 kana.
381
+ * For X0201 kana, SO/SI, SSO and ESC-(-I methods are supported.
382
+ * For automatic code detection, nkf assumes no X0201 kana in Shift_JIS.
383
+ * To accept X0201 in Shift_JIS, use <b>-X</b>, <b>-x</b> or <b>-S</b>.
384
+ *
385
+ * == Flags
386
+ *
387
+ * === -b -u
388
+ *
389
+ * Output is buffered (DEFAULT), Output is unbuffered.
390
+ *
391
+ * === -j -s -e -w -w16
392
+ *
393
+ * Output code is ISO-2022-JP (7bit JIS), Shift_JIS, EUC-JP,
394
+ * UTF-8N, UTF-16BE.
395
+ * Without this option and compile option, ISO-2022-JP is assumed.
396
+ *
397
+ * === -J -S -E -W -W16
398
+ *
399
+ * Input assumption is JIS 7 bit, Shift_JIS, EUC-JP,
400
+ * UTF-8, UTF-16LE.
401
+ *
402
+ * ==== -J
403
+ *
404
+ * Assume JIS input. It also accepts EUC-JP.
405
+ * This is the default. This flag does not exclude Shift_JIS.
406
+ *
407
+ * ==== -S
408
+ *
409
+ * Assume Shift_JIS and X0201 kana input. It also accepts JIS.
410
+ * EUC-JP is recognized as X0201 kana. Without <b>-x</b> flag,
411
+ * X0201 kana (halfwidth kana) is converted into X0208.
412
+ *
413
+ * ==== -E
414
+ *
415
+ * Assume EUC-JP input. It also accepts JIS.
416
+ * Same as -J.
417
+ *
418
+ * === -t
419
+ *
420
+ * No conversion.
421
+ *
422
+ * === -i_
423
+ *
424
+ * Output sequence to designate JIS-kanji. (DEFAULT B)
425
+ *
426
+ * === -o_
427
+ *
428
+ * Output sequence to designate ASCII. (DEFAULT B)
429
+ *
430
+ * === -r
431
+ *
432
+ * {de/en}crypt ROT13/47
433
+ *
434
+ * === -h[123] --hiragana --katakana --katakana-hiragana
435
+ *
436
+ * [-h1 --hiragana] Katakana to Hiragana conversion.
437
+ *
438
+ * [-h2 --katakana] Hiragana to Katakana conversion.
439
+ *
440
+ * [-h3 --katakana-hiragana] Katakana to Hiragana and Hiragana to Katakana conversion.
441
+ *
442
+ * === -T
443
+ *
444
+ * Text mode output (MS-DOS)
445
+ *
446
+ * === -l
447
+ *
448
+ * ISO8859-1 (Latin-1) support
449
+ *
450
+ * === -f[<code>m</code> [- <code>n</code>]]
451
+ *
452
+ * Folding on <code>m</code> length with <code>n</code> margin in a line.
453
+ * Without this option, fold length is 60 and fold margin is 10.
454
+ *
455
+ * === -F
456
+ *
457
+ * New line preserving line folding.
458
+ *
459
+ * === -Z[0-3]
460
+ *
461
+ * Convert X0208 alphabet (Fullwidth Alphabets) to ASCII.
462
+ *
463
+ * [-Z -Z0] Convert X0208 alphabet to ASCII.
464
+ *
465
+ * [-Z1] Converts X0208 kankaku to single ASCII space.
466
+ *
467
+ * [-Z2] Converts X0208 kankaku to double ASCII spaces.
468
+ *
469
+ * [-Z3] Replacing Fullwidth >, <, ", & into '&gt;', '&lt;', '&quot;', '&amp;' as in HTML.
470
+ *
471
+ * === -X -x
472
+ *
473
+ * Assume X0201 kana in MS-Kanji.
474
+ * With <b>-X</b> or without this option, X0201 is converted into X0208 Kana.
475
+ * With <b>-x</b>, try to preserve X0208 kana and do not convert X0201 kana to X0208.
476
+ * In JIS output, ESC-(-I is used. In EUC output, SSO is used.
477
+ *
478
+ * === -B[0-2]
479
+ *
480
+ * Assume broken JIS-Kanji input, which lost ESC.
481
+ * Useful when your site is using old B-News Nihongo patch.
482
+ *
483
+ * [-B1] allows any char after ESC-( or ESC-$.
484
+ *
485
+ * [-B2] forces ASCII after NL.
486
+ *
487
+ * === -I
488
+ *
489
+ * Replacing non iso-2022-jp char into a geta character
490
+ * (substitute character in Japanese).
491
+ *
492
+ * === -d -c
493
+ *
494
+ * Delete \r in line feed, Add \r in line feed.
495
+ *
496
+ * === -m[BQN0]
497
+ *
498
+ * MIME ISO-2022-JP/ISO8859-1 decode. (DEFAULT)
499
+ * To see ISO8859-1 (Latin-1) -l is necessary.
500
+ *
501
+ * [-mB] Decode MIME base64 encoded stream. Remove header or other part before
502
+ * conversion.
503
+ *
504
+ * [-mQ] Decode MIME quoted stream. '_' in quoted stream is converted to space.
505
+ *
506
+ * [-mN] Non-strict decoding.
507
+ * It allows line break in the middle of the base64 encoding.
508
+ *
509
+ * [-m0] No MIME decode.
510
+ *
511
+ * === -M
512
+ *
513
+ * MIME encode. Header style. All ASCII code and control characters are intact.
514
+ * Kanji conversion is performed before encoding, so this cannot be used as a picture encoder.
515
+ *
516
+ * [-MB] MIME encode Base64 stream.
517
+ *
518
+ * [-MQ] Perfome quoted encoding.
519
+ *
520
+ * === -l
521
+ *
522
+ * Input and output code is ISO8859-1 (Latin-1) and ISO-2022-JP.
523
+ * <b>-s</b>, <b>-e</b> and <b>-x</b> are not compatible with this option.
524
+ *
525
+ * === -L[uwm]
526
+ *
527
+ * new line mode
528
+ * Without this option, nkf doesn't convert line breaks.
529
+ *
530
+ * [-Lu] unix (LF)
531
+ *
532
+ * [-Lw] windows (CRLF)
533
+ *
534
+ * [-Lm] mac (CR)
535
+ *
536
+ * === --fj --unix --mac --msdos --windows
537
+ *
538
+ * convert for these system
539
+ *
540
+ * === --jis --euc --sjis --mime --base64
541
+ *
542
+ * convert for named code
543
+ *
544
+ * === --jis-input --euc-input --sjis-input --mime-input --base64-input
545
+ *
546
+ * assume input system
547
+ *
548
+ * === --ic=<code>input codeset</code> --oc=<code>output codeset</code>
549
+ *
550
+ * Set the input or output codeset.
551
+ * NKF supports following codesets and those codeset name are case insensitive.
552
+ *
553
+ * [ISO-2022-JP] a.k.a. RFC1468, 7bit JIS, JUNET
554
+ *
555
+ * [EUC-JP (eucJP-nkf)] a.k.a. AT&T JIS, Japanese EUC, UJIS
556
+ *
557
+ * [eucJP-ascii] a.k.a. x-eucjp-open-19970715-ascii
558
+ *
559
+ * [eucJP-ms] a.k.a. x-eucjp-open-19970715-ms
560
+ *
561
+ * [CP51932] Microsoft Version of EUC-JP.
562
+ *
563
+ * [Shift_JIS] SJIS, MS-Kanji
564
+ *
565
+ * [CP932] a.k.a. Windows-31J
566
+ *
567
+ * [UTF-8] same as UTF-8N
568
+ *
569
+ * [UTF-8N] UTF-8 without BOM
570
+ *
571
+ * [UTF-8-BOM] UTF-8 with BOM
572
+ *
573
+ * [UTF-16] same as UTF-16BE
574
+ *
575
+ * [UTF-16BE] UTF-16 Big Endian without BOM
576
+ *
577
+ * [UTF-16BE-BOM] UTF-16 Big Endian with BOM
578
+ *
579
+ * [UTF-16LE] UTF-16 Little Endian without BOM
580
+ *
581
+ * [UTF-16LE-BOM] UTF-16 Little Endian with BOM
582
+ *
583
+ * [UTF8-MAC] NKDed UTF-8, a.k.a. UTF8-NFD (input only)
584
+ *
585
+ * === --fb-{skip, html, xml, perl, java, subchar}
586
+ *
587
+ * Specify the way that nkf handles unassigned characters.
588
+ * Without this option, --fb-skip is assumed.
589
+ *
590
+ * === --prefix= <code>escape character</code> <code>target character</code> ..
591
+ *
592
+ * When nkf converts to Shift_JIS,
593
+ * nkf adds a specified escape character to specified 2nd byte of Shift_JIS characters.
594
+ * 1st byte of argument is the escape character and following bytes are target characters.
595
+ *
596
+ * === --disable-cp932ext
597
+ *
598
+ * Handle the characters extended in CP932 as unassigned characters.
599
+ *
600
+ * === --cap-input
601
+ *
602
+ * Decode hex encoded characters.
603
+ *
604
+ * === --url-input
605
+ *
606
+ * Unescape percent escaped characters.
607
+ *
608
+ * === --
609
+ *
610
+ * Ignore rest of -option.
611
+ */
612
+
613
+ void
614
+ Init_nkf()
615
+ {
616
+ /* hoge */
617
+ VALUE mKconv = rb_define_module("NKF");
618
+ /* hoge */
619
+
620
+ rb_define_module_function(mKconv, "nkf", rb_nkf_kconv, 2);
621
+ rb_define_module_function(mKconv, "guess1", rb_nkf_guess1, 1);
622
+ rb_define_module_function(mKconv, "guess2", rb_nkf_guess2, 1);
623
+ rb_define_alias(mKconv, "guess", "guess2");
624
+ rb_define_alias(rb_singleton_class(mKconv), "guess", "guess2");
625
+
626
+ /* Auto-Detect */
627
+ rb_define_const(mKconv, "AUTO", INT2FIX(_AUTO));
628
+ /* ISO-2022-JP */
629
+ rb_define_const(mKconv, "JIS", INT2FIX(_JIS));
630
+ /* EUC-JP */
631
+ rb_define_const(mKconv, "EUC", INT2FIX(_EUC));
632
+ /* Shift_JIS */
633
+ rb_define_const(mKconv, "SJIS", INT2FIX(_SJIS));
634
+ /* BINARY */
635
+ rb_define_const(mKconv, "BINARY", INT2FIX(_BINARY));
636
+ /* No conversion */
637
+ rb_define_const(mKconv, "NOCONV", INT2FIX(_NOCONV));
638
+ /* ASCII */
639
+ rb_define_const(mKconv, "ASCII", INT2FIX(_ASCII));
640
+ /* UTF-8 */
641
+ rb_define_const(mKconv, "UTF8", INT2FIX(_UTF8));
642
+ /* UTF-16 */
643
+ rb_define_const(mKconv, "UTF16", INT2FIX(_UTF16));
644
+ /* UTF-32 */
645
+ rb_define_const(mKconv, "UTF32", INT2FIX(_UTF32));
646
+ /* UNKNOWN */
647
+ rb_define_const(mKconv, "UNKNOWN", INT2FIX(_UNKNOWN));
648
+ /* Full version string of nkf */
649
+ rb_define_const(mKconv, "VERSION", rb_str_new2(RUBY_NKF_VERSION));
650
+ /* Version of nkf */
651
+ rb_define_const(mKconv, "NKF_VERSION", rb_str_new2(NKF_VERSION));
652
+ /* Release date of nkf */
653
+ rb_define_const(mKconv, "NKF_RELEASE_DATE", rb_str_new2(NKF_RELEASE_DATE));
654
+ }