nkf 0.2.0-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,601 @@
1
+ /***** BEGIN LICENSE BLOCK *****
2
+ * Version: EPL 2.0/LGPL 2.1
3
+ *
4
+ * The contents of this file are subject to the Eclipse Public
5
+ * License Version 2.0 (the "License"); you may not use this file
6
+ * except in compliance with the License. You may obtain a copy of
7
+ * the License at http://www.eclipse.org/legal/epl-v20.html
8
+ *
9
+ * Software distributed under the License is distributed on an "AS
10
+ * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
11
+ * implied. See the License for the specific language governing
12
+ * rights and limitations under the License.
13
+ *
14
+ * Copyright (C) 2007-2011 Koichiro Ohba <koichiro@meadowy.org>
15
+ *
16
+ * Alternatively, the contents of this file may be used under the terms of
17
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
18
+ * in which case the provisions of the LGPL are applicable instead
19
+ * of those above. If you wish to allow use of your version of this file only
20
+ * under the terms of either the LGPL, and not to allow others to
21
+ * use your version of this file under the terms of the EPL, indicate your
22
+ * decision by deleting the provisions above and replace them with the notice
23
+ * and other provisions required by the LGPL. If you do not delete
24
+ * the provisions above, a recipient may use your version of this file under
25
+ * the terms of any one of the EPL, the LGPL.
26
+ ***** END LICENSE BLOCK *****/
27
+
28
+ package org.jruby.ext.nkf;
29
+
30
+ import java.nio.ByteBuffer;
31
+ import java.nio.CharBuffer;
32
+ import java.nio.charset.CharacterCodingException;
33
+ import java.nio.charset.Charset;
34
+ import java.nio.charset.CharsetDecoder;
35
+ import java.nio.charset.CharsetEncoder;
36
+ import java.nio.charset.UnsupportedCharsetException;
37
+ import java.util.ArrayList;
38
+ import java.util.Map;
39
+ import java.util.HashMap;
40
+
41
+ import org.jcodings.Encoding;
42
+ import org.jcodings.specific.ASCIIEncoding;
43
+ import org.jcodings.specific.UTF8Encoding;
44
+ import org.jcodings.transcode.EConv;
45
+ import org.jcodings.transcode.EConvFlags;
46
+ import org.jruby.Ruby;
47
+ import org.jruby.RubyArray;
48
+ import org.jruby.RubyModule;
49
+ import org.jruby.RubyString;
50
+
51
+ import org.jruby.anno.JRubyMethod;
52
+ import org.jruby.anno.JRubyModule;
53
+ import org.jruby.runtime.Helpers;
54
+ import org.jruby.runtime.ThreadContext;
55
+ import org.jruby.runtime.builtin.IRubyObject;
56
+ import org.jruby.util.ByteList;
57
+ import org.jruby.util.KCode;
58
+ import org.jruby.util.Pack;
59
+ import org.jruby.util.io.EncodingUtils;
60
+
61
+ @JRubyModule(name="NKF")
62
+ public class RubyNKF {
63
+ public static enum NKFCharset {
64
+ AUTO(0, "x-JISAutoDetect"),
65
+ // no ISO-2022-JP in jcodings
66
+ JIS(1, "ISO-2022-JP"),
67
+ EUC(2, "EUC-JP"),
68
+ SJIS(3, "Shift_JIS"),
69
+ BINARY(4, null),
70
+ NOCONV(4, null),
71
+ UNKNOWN(0, null),
72
+ ASCII(5, "iso-8859-1"),
73
+ UTF8(6, "UTF-8"),
74
+ UTF16(8, "UTF-16"),
75
+ UTF32(12, "UTF-32"),
76
+ OTHER(16, null),
77
+ BASE64(20, "base64"),
78
+ QENCODE(21, "qencode"),
79
+ MIME_DETECT(22, "MimeAutoDetect");
80
+
81
+ private NKFCharset(int value, String charset) {
82
+ this.value = value;
83
+ this.charset = charset;
84
+ }
85
+
86
+ public int getValue() {
87
+ return value;
88
+ }
89
+
90
+ public String getCharset() {
91
+ return charset;
92
+ }
93
+
94
+ private final int value;
95
+ private final String charset;
96
+ }
97
+
98
+ private static final ByteList BEGIN_MIME_STRING = new ByteList(ByteList.plain("=?"));
99
+ private static final ByteList END_MIME_STRING = new ByteList(ByteList.plain("?="));
100
+ private static final ByteList PACK_BASE64 = new ByteList(ByteList.plain("m"));
101
+ private static final ByteList PACK_QENCODE = new ByteList(ByteList.plain("M"));
102
+
103
+ public static final Map<Integer, String> NKFCharsetMap = new HashMap<Integer, String>(20, 1);
104
+
105
+ public static void load(Ruby runtime) {
106
+ createNKF(runtime);
107
+ }
108
+
109
+ public static void createNKF(Ruby runtime) {
110
+ final RubyModule NKF = runtime.defineModule("NKF");
111
+ final String version = "2.1.2";
112
+ final String relDate = "2011-09-08";
113
+
114
+ NKF.defineConstant("NKF_VERSION", runtime.newString(version));
115
+ NKF.defineConstant("NKF_RELEASE_DATE", runtime.newString(relDate));
116
+ NKF.defineConstant("VERSION", runtime.newString(version + ' ' + '(' + "JRuby" + '_' + relDate + ')'));
117
+
118
+ for ( NKFCharset charset : NKFCharset.values() ) {
119
+ NKFCharsetMap.put(charset.value, charset.name());
120
+
121
+ if (charset.value > 12 ) continue;
122
+ NKF.defineConstant(charset.name(), charsetMappedValue(runtime, charset));
123
+ }
124
+
125
+ NKF.defineAnnotatedMethods(RubyNKF.class);
126
+ }
127
+
128
+ @JRubyMethod(name = "guess", module = true)
129
+ public static IRubyObject guess(ThreadContext context, IRubyObject recv, IRubyObject s) {
130
+ return charsetMappedValue(context.runtime, guess(context, s));
131
+ }
132
+
133
+ public static NKFCharset guess(ThreadContext context, IRubyObject s) {
134
+ // TODO: Fix charset usage for JRUBY-4553
135
+ Ruby runtime = context.runtime;
136
+ if (!s.respondsTo("to_str")) {
137
+ throw runtime.newTypeError("can't convert " + s.getMetaClass() + " into String");
138
+ }
139
+ ByteList bytes = s.convertToString().getByteList();
140
+ ByteBuffer buf = ByteBuffer.wrap(bytes.getUnsafeBytes(), bytes.begin(), bytes.length());
141
+ CharsetDecoder decoder;
142
+ try {
143
+ decoder = Charset.forName("x-JISAutoDetect").newDecoder();
144
+ } catch (UnsupportedCharsetException e) {
145
+ throw runtime.newStandardError("charsets.jar is required to use NKF#guess. Please install JRE which supports m17n.");
146
+ }
147
+ try {
148
+ decoder.decode(buf);
149
+
150
+ if ( ! decoder.isCharsetDetected() ) {
151
+ return NKFCharset.UNKNOWN;
152
+ }
153
+ Charset charset = decoder.detectedCharset();
154
+ String name = charset.name();
155
+ if ("Shift_JIS".equals(name)) {
156
+ return NKFCharset.SJIS;
157
+ }
158
+ if ("Windows-31j".equalsIgnoreCase(name)) {
159
+ return NKFCharset.JIS;
160
+ }
161
+ if ("EUC-JP".equals(name)) {
162
+ return NKFCharset.EUC;
163
+ }
164
+ if ("ISO-2022-JP".equals(name)) {
165
+ return NKFCharset.JIS;
166
+ }
167
+ }
168
+ catch (CharacterCodingException e) {
169
+ // fall through and try direct encoding
170
+ }
171
+
172
+ if (bytes.getEncoding() == UTF8Encoding.INSTANCE) {
173
+ return NKFCharset.UTF8;
174
+ }
175
+ if (bytes.getEncoding().toString().startsWith("UTF-16")) {
176
+ return NKFCharset.UTF16;
177
+ }
178
+ if (bytes.getEncoding().toString().startsWith("UTF-32")) {
179
+ return NKFCharset.UTF32;
180
+ }
181
+ return NKFCharset.UNKNOWN;
182
+ }
183
+
184
+ private static IRubyObject charsetMappedValue(final Ruby runtime, final NKFCharset charset) {
185
+ final Encoding encoding;
186
+ switch (charset) {
187
+ case AUTO: case NOCONV: case UNKNOWN: return runtime.getNil();
188
+ case BINARY:
189
+ encoding = runtime.getEncodingService().getAscii8bitEncoding();
190
+ return runtime.getEncodingService().convertEncodingToRubyEncoding(encoding);
191
+ }
192
+
193
+ encoding = runtime.getEncodingService().getEncodingFromString(charset.getCharset());
194
+ return runtime.getEncodingService().convertEncodingToRubyEncoding(encoding);
195
+ }
196
+
197
+ @JRubyMethod(name = "guess1", module = true)
198
+ public static IRubyObject guess1(ThreadContext context, IRubyObject recv, IRubyObject str) {
199
+ return guess(context, recv, str);
200
+ }
201
+
202
+ @JRubyMethod(name = "guess2", module = true)
203
+ public static IRubyObject guess2(ThreadContext context, IRubyObject recv, IRubyObject str) {
204
+ return guess(context, recv, str);
205
+ }
206
+
207
+ @JRubyMethod(name = "nkf", module = true)
208
+ public static IRubyObject nkf(ThreadContext context, IRubyObject recv, IRubyObject opt, IRubyObject str) {
209
+ Ruby runtime = context.runtime;
210
+
211
+ if (!opt.respondsTo("to_str")) {
212
+ throw runtime.newTypeError("can't convert " + opt.getMetaClass() + " into String");
213
+ }
214
+
215
+ if (!str.respondsTo("to_str")) {
216
+ throw runtime.newTypeError("can't convert " + str.getMetaClass() + " into String");
217
+ }
218
+
219
+ Map<String, NKFCharset> options = parseOpt(opt.convertToString().toString());
220
+
221
+ if (options.get("input").getValue() == NKFCharset.AUTO.getValue()) {
222
+ options.put("input", guess(context, str));
223
+ }
224
+
225
+ ByteList bstr = str.convertToString().getByteList();
226
+ final Converter converter;
227
+ if (Converter.isMimeText(bstr, options)) {
228
+ converter = new MimeConverter(context, options);
229
+ } else {
230
+ converter = new DefaultConverter(context, options);
231
+ }
232
+
233
+ RubyString result = converter.convert(bstr);
234
+
235
+ if (options.get("mime-encode") == NKFCharset.BASE64) {
236
+ result = Converter.encodeMimeString(runtime, result, PACK_BASE64);
237
+ } else if (options.get("mime-encode") == NKFCharset.QENCODE) {
238
+ result = Converter.encodeMimeString(runtime, result, PACK_QENCODE);
239
+ }
240
+
241
+ return result;
242
+ }
243
+
244
+ public static Command parseOption(String s) {
245
+ Options options = new Options();
246
+ options.addOption("b");
247
+ options.addOption("u");
248
+ options.addOption("j", "jis");
249
+ options.addOption("s", "sjis");
250
+ options.addOption("e", "euc");
251
+ options.addOption("w", null, "[0-9][0-9]");
252
+ options.addOption("J", "jis-input");
253
+ options.addOption("S", "sjis-input");
254
+ options.addOption("E", "euc-input");
255
+ options.addOption("W", null, "[0-9][0-9]");
256
+ options.addOption("t");
257
+ options.addOption("i_");
258
+ options.addOption("o_");
259
+ options.addOption("r");
260
+ options.addOption("h1", "hiragana");
261
+ options.addOption("h2", "katakana");
262
+ options.addOption("h3", "katakana-hiragana");
263
+ options.addOption("T");
264
+ options.addOption("l");
265
+ options.addOption("f", null, "[0-9]+-[0-9]*");
266
+ options.addOption("F");
267
+ options.addOption("Z", null, "[0-3]");
268
+ options.addOption("X");
269
+ options.addOption("x");
270
+ options.addOption("B", null, "[0-2]");
271
+ options.addOption("I");
272
+ options.addOption("L", null, "[uwm]");
273
+ options.addOption("d");
274
+ options.addOption("c");
275
+ options.addOption("m", null, "[BQN0]");
276
+ options.addOption("M", null, "[BQ]");
277
+ options.addOption(null, "fj");
278
+ options.addOption(null, "unix");
279
+ options.addOption(null, "mac");
280
+ options.addOption(null, "msdos");
281
+ options.addOption(null, "windows");
282
+ options.addOption(null, "mime");
283
+ options.addOption(null, "base64");
284
+ options.addOption(null, "mime-input");
285
+ options.addOption(null, "base64-input");
286
+ options.addOption(null, "ic", "ic=(.*)");
287
+ options.addOption(null, "oc", "oc=(.*)");
288
+ options.addOption(null, "fb-skip");
289
+ options.addOption(null, "fb-html");
290
+ options.addOption(null, "fb-xml");
291
+ options.addOption(null, "fb-perl");
292
+ options.addOption(null, "fb-java");
293
+ options.addOption(null, "fb-subchar", "fb-subchar=(.*)");
294
+ options.addOption(null, "no-cp932ext");
295
+ options.addOption(null, "cap-input");
296
+ options.addOption(null, "url-input");
297
+ options.addOption(null, "numchar-input");
298
+ options.addOption(null, "no-best-fit-chars");
299
+
300
+ CommandParser parser = new CommandParser();
301
+ Command cmd = parser.parse(options, s);
302
+ return cmd;
303
+ }
304
+
305
+ private static Map<String, NKFCharset> parseOpt(String s) {
306
+ Map<String, NKFCharset> options = new HashMap<String, NKFCharset>();
307
+
308
+ // default options
309
+ options.put("input", NKFCharset.AUTO);
310
+ options.put("output", NKFCharset.JIS);
311
+ options.put("mime-decode", NKFCharset.MIME_DETECT);
312
+ options.put("mime-encode", NKFCharset.NOCONV);
313
+
314
+ Command cmd = parseOption(s);
315
+ if (cmd.hasOption("j")) {
316
+ options.put("output", NKFCharset.JIS);
317
+ }
318
+ if (cmd.hasOption("s")) {
319
+ options.put("output", NKFCharset.SJIS);
320
+ }
321
+ if (cmd.hasOption("e")) {
322
+ options.put("output", NKFCharset.EUC);
323
+ }
324
+ if (cmd.hasOption("w")) {
325
+ Option opt = cmd.getOption("w");
326
+ if ("32".equals(opt.getValue())) {
327
+ options.put("output", NKFCharset.UTF32);
328
+ } else if("16".equals(opt.getValue())) {
329
+ options.put("output", NKFCharset.UTF16);
330
+ } else {
331
+ options.put("output", NKFCharset.UTF8);
332
+ }
333
+ }
334
+ if (cmd.hasOption("J")) {
335
+ options.put("input", NKFCharset.JIS);
336
+ }
337
+ if (cmd.hasOption("S")) {
338
+ options.put("input", NKFCharset.SJIS);
339
+ }
340
+ if (cmd.hasOption("E")) {
341
+ options.put("input", NKFCharset.EUC);
342
+ }
343
+ if (cmd.hasOption("W")) {
344
+ Option opt = cmd.getOption("W");
345
+ if ("32".equals(opt.getValue())) {
346
+ options.put("input", NKFCharset.UTF32);
347
+ } else if("16".equals(opt.getValue())) {
348
+ options.put("input", NKFCharset.UTF16);
349
+ } else {
350
+ options.put("input", NKFCharset.UTF8);
351
+ }
352
+ }
353
+ if (cmd.hasOption("m")) {
354
+ Option opt = cmd.getOption("m");
355
+ if (opt.getValue() == null) {
356
+ options.put("mime-decode", NKFCharset.MIME_DETECT);
357
+ } else if ("B".equals(opt.getValue())) {
358
+ options.put("mime-decode", NKFCharset.BASE64);
359
+ } else if ("Q".equals(opt.getValue())) {
360
+ options.put("mime-decode", NKFCharset.QENCODE);
361
+ } else if ("N".equals(opt.getValue())) {
362
+ // TODO: non-strict option
363
+ } else if ("0".equals(opt.getValue())) {
364
+ options.put("mime-decode", NKFCharset.NOCONV);
365
+ }
366
+ }
367
+ if (cmd.hasOption("M")) {
368
+ Option opt = cmd.getOption("M");
369
+ if (opt.getValue() == null) {
370
+ options.put("mime-encode", NKFCharset.NOCONV);
371
+ } else if ("B".equals(opt.getValue())) {
372
+ options.put("mime-encode", NKFCharset.BASE64);
373
+ } else if ("Q".equals(opt.getValue())) {
374
+ options.put("mime-encode", NKFCharset.QENCODE);
375
+ }
376
+ }
377
+ if (cmd.hasOption("base64")) {
378
+ options.put("mime-encode", NKFCharset.BASE64);
379
+ }
380
+ if (cmd.hasOption("oc")) {
381
+ Option opt = cmd.getOption("oc");
382
+ if ("ISO-2022-JP".compareToIgnoreCase(opt.getValue()) == 0) {
383
+ options.put("output", NKFCharset.JIS);
384
+ } else if ("EUC-JP".compareToIgnoreCase(opt.getValue()) == 0) {
385
+ options.put("output", NKFCharset.EUC);
386
+ } else if ("CP932".compareToIgnoreCase(opt.getValue()) == 0) {
387
+ options.put("output", NKFCharset.SJIS);
388
+ } else if ("Shift_JIS".compareToIgnoreCase(opt.getValue()) == 0) {
389
+ options.put("output", NKFCharset.SJIS);
390
+ } else if ("Windows-31J".compareToIgnoreCase(opt.getValue()) == 0) {
391
+ options.put("output", NKFCharset.JIS);
392
+ } else if ("UTF-8".compareToIgnoreCase(opt.getValue()) == 0) {
393
+ options.put("output", NKFCharset.UTF8);
394
+ } else if ("UTF-8N".compareToIgnoreCase(opt.getValue()) == 0) {
395
+ options.put("output", NKFCharset.UTF8);
396
+ } else if ("UTF-16".compareToIgnoreCase(opt.getValue()) == 0) {
397
+ options.put("output", NKFCharset.UTF16);
398
+ } else if ("UTF-16BE-BOM".compareToIgnoreCase(opt.getValue()) == 0) {
399
+ options.put("output", NKFCharset.UTF16);
400
+ } else if ("UTF-32".compareToIgnoreCase(opt.getValue()) == 0) {
401
+ options.put("output", NKFCharset.UTF32);
402
+ } else if ("UTF-32BE-BOM".compareToIgnoreCase(opt.getValue()) == 0) {
403
+ options.put("output", NKFCharset.UTF32);
404
+ }
405
+ }
406
+ if (cmd.hasOption("ic")) {
407
+ Option opt = cmd.getOption("ic");
408
+ if ("ISO-2022-JP".compareToIgnoreCase(opt.getValue()) == 0) {
409
+ options.put("input", NKFCharset.JIS);
410
+ } else if ("EUC-JP".compareToIgnoreCase(opt.getValue()) == 0) {
411
+ options.put("input", NKFCharset.EUC);
412
+ } else if ("CP932".compareToIgnoreCase(opt.getValue()) == 0) {
413
+ options.put("input", NKFCharset.SJIS);
414
+ } else if ("Shift_JIS".compareToIgnoreCase(opt.getValue()) == 0) {
415
+ options.put("input", NKFCharset.SJIS);
416
+ } else if ("Windows-31J".compareToIgnoreCase(opt.getValue()) == 0) {
417
+ options.put("input", NKFCharset.SJIS);
418
+ } else if ("UTF-8".compareToIgnoreCase(opt.getValue()) == 0) {
419
+ options.put("input", NKFCharset.UTF8);
420
+ } else if ("UTF-8N".compareToIgnoreCase(opt.getValue()) == 0) {
421
+ options.put("input", NKFCharset.UTF8);
422
+ } else if ("UTF-16".compareToIgnoreCase(opt.getValue()) == 0) {
423
+ options.put("input", NKFCharset.UTF16);
424
+ } else if ("UTF-16BE-BOM".compareToIgnoreCase(opt.getValue()) == 0) {
425
+ options.put("input", NKFCharset.UTF16);
426
+ } else if ("UTF-32".compareToIgnoreCase(opt.getValue()) == 0) {
427
+ options.put("input", NKFCharset.UTF32);
428
+ } else if ("UTF-32BE-BOM".compareToIgnoreCase(opt.getValue()) == 0) {
429
+ options.put("input", NKFCharset.UTF32);
430
+ }
431
+ }
432
+
433
+ return options;
434
+ }
435
+
436
+ static abstract class Converter {
437
+
438
+ protected final ThreadContext context;
439
+ protected final Map<String, NKFCharset> options;
440
+
441
+ public Converter(ThreadContext ctx, Map<String, NKFCharset> opt) {
442
+ context = ctx;
443
+ options = opt;
444
+ }
445
+
446
+ static boolean isMimeText(ByteList str, Map<String, NKFCharset> options) {
447
+ if (str.length() <= 6) {
448
+ return false;
449
+ }
450
+ if (options.get("mime-decode") == NKFCharset.NOCONV) {
451
+ return false;
452
+ }
453
+ if (str.indexOf(BEGIN_MIME_STRING) < 0) {
454
+ return false;
455
+ }
456
+ if (str.lastIndexOf(END_MIME_STRING) < 0) {
457
+ return false;
458
+ }
459
+ return true;
460
+ }
461
+
462
+ private static RubyString encodeMimeString(Ruby runtime, RubyString str, ByteList format) {
463
+ RubyArray array = RubyArray.newArray(runtime, str);
464
+ return Pack.pack(runtime, array, format).chomp(runtime.getCurrentContext());
465
+ }
466
+
467
+ abstract RubyString convert(ByteList str);
468
+
469
+ ByteList convert_byte(ByteList str, String inputCharset, NKFCharset output) {
470
+ String outputCharset = output.getCharset();
471
+
472
+ if (inputCharset == null) {
473
+ inputCharset = str.getEncoding().toString();
474
+ }
475
+
476
+ if (outputCharset.equals(inputCharset)) {
477
+ return str.dup();
478
+ }
479
+
480
+ byte[] outCharsetBytes = outputCharset.getBytes();
481
+
482
+ EConv ec = EncodingUtils.econvOpenOpts(context, inputCharset.getBytes(), outCharsetBytes, 0, context.nil);
483
+
484
+ if (ec == null) {
485
+ throw context.runtime.newArgumentError("invalid encoding pair: " + inputCharset + " to " + outputCharset);
486
+ }
487
+
488
+ ByteList converted = EncodingUtils.econvStrConvert(context, ec, str, EConvFlags.INVALID_REPLACE);
489
+
490
+ converted.setEncoding(context.runtime.getEncodingService().findEncodingOrAliasEntry(outCharsetBytes).getEncoding());
491
+
492
+ return converted;
493
+ }
494
+ }
495
+
496
+ static class DefaultConverter extends Converter {
497
+
498
+ public DefaultConverter(ThreadContext ctx, Map<String, NKFCharset> opt) {
499
+ super(ctx, opt);
500
+ }
501
+
502
+ RubyString convert(ByteList str) {
503
+ NKFCharset input = options.get("input");
504
+ NKFCharset output = options.get("output");
505
+ ByteList b = convert_byte(str,
506
+ input.getCharset(),
507
+ output);
508
+ return context.runtime.newString(b);
509
+ }
510
+ }
511
+
512
+ static class MimeConverter extends Converter {
513
+
514
+ public MimeConverter(ThreadContext ctx, Map<String, NKFCharset> opt) {
515
+ super(ctx, opt);
516
+ }
517
+
518
+ private String detectCharset(String charset) {
519
+ if (charset.compareToIgnoreCase(NKFCharset.UTF8.getCharset()) == 0) {
520
+ return NKFCharset.UTF8.getCharset();
521
+ } else if (charset.compareToIgnoreCase(NKFCharset.JIS.getCharset()) == 0) {
522
+ return NKFCharset.JIS.getCharset();
523
+ } else if (charset.compareToIgnoreCase(NKFCharset.EUC.getCharset()) == 0) {
524
+ return NKFCharset.EUC.getCharset();
525
+ } else {
526
+ return NKFCharset.ASCII.getCharset();
527
+ }
528
+ }
529
+
530
+ private ByteList decodeMimeString(String str) {
531
+ String[] mime = str.split("^=\\?|\\?|\\?=$");
532
+ String charset = detectCharset(mime[1]);
533
+ int encode = mime[2].charAt(0);
534
+ RubyString body = EncodingUtils.newExternalStringWithEncoding(context.runtime, mime[3], ASCIIEncoding.INSTANCE);
535
+
536
+ final RubyArray<?> array;
537
+ if ('B' == encode || 'b' == encode) { // BASE64
538
+ array = Pack.unpack(context, body, PACK_BASE64);
539
+ } else { // Qencode
540
+ array = Pack.unpack(context, body, PACK_QENCODE);
541
+ }
542
+ RubyString s = (RubyString) array.entry(0);
543
+ ByteList decodeStr = s.asString().getByteList();
544
+
545
+ return convert_byte(decodeStr, charset, options.get("output"));
546
+ }
547
+
548
+ RubyString makeRubyString(ArrayList<ByteList> list) {
549
+ ByteList r = new ByteList();
550
+ for (ByteList l : list) {
551
+ r.append(l);
552
+ }
553
+ return context.runtime.newString(r);
554
+ }
555
+
556
+ RubyString convert(ByteList str) {
557
+ String s = Helpers.decodeByteList(context.runtime, str);
558
+ String[] token = s.split("\\s");
559
+ ArrayList<ByteList> raw_data = new ArrayList<ByteList>();
560
+
561
+ for (int i = 0; i < token.length; i++) {
562
+ raw_data.add(decodeMimeString(token[i]));
563
+ }
564
+
565
+ return makeRubyString(raw_data);
566
+ }
567
+
568
+ }
569
+
570
+ @Deprecated
571
+ public static final NKFCharset AUTO = NKFCharset.AUTO;
572
+ // no ISO-2022-JP in jcodings
573
+ @Deprecated
574
+ public static final NKFCharset JIS = NKFCharset.JIS;
575
+ @Deprecated
576
+ public static final NKFCharset EUC = NKFCharset.EUC;
577
+ @Deprecated
578
+ public static final NKFCharset SJIS = NKFCharset.SJIS;
579
+ @Deprecated
580
+ public static final NKFCharset BINARY = NKFCharset.BINARY;
581
+ @Deprecated
582
+ public static final NKFCharset NOCONV = NKFCharset.NOCONV;
583
+ @Deprecated
584
+ public static final NKFCharset UNKNOWN = NKFCharset.UNKNOWN;
585
+ @Deprecated
586
+ public static final NKFCharset ASCII = NKFCharset.ASCII;
587
+ @Deprecated
588
+ public static final NKFCharset UTF8 = NKFCharset.UTF8;
589
+ @Deprecated
590
+ public static final NKFCharset UTF16 = NKFCharset.UTF16;
591
+ @Deprecated
592
+ public static final NKFCharset UTF32 = NKFCharset.UTF32;
593
+ @Deprecated
594
+ public static final NKFCharset OTHER = NKFCharset.OTHER;
595
+ @Deprecated
596
+ public static final NKFCharset BASE64 = NKFCharset.BASE64;
597
+ @Deprecated
598
+ public static final NKFCharset QENCODE = NKFCharset.QENCODE;
599
+ @Deprecated
600
+ public static final NKFCharset MIME_DETECT = NKFCharset.MIME_DETECT;
601
+ }
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: false
2
+ require 'mkmf'
3
+ create_makefile('nkf')
@@ -0,0 +1,51 @@
1
+ #ifndef _CONFIG_H_
2
+ #define _CONFIG_H_
3
+
4
+ /* UTF8 input and output */
5
+ #define UTF8_INPUT_ENABLE
6
+ #define UTF8_OUTPUT_ENABLE
7
+
8
+ /* invert characters invalid in Shift_JIS to CP932 */
9
+ #define SHIFTJIS_CP932
10
+
11
+ /* fix input encoding when given by option */
12
+ #define INPUT_CODE_FIX
13
+
14
+ /* --overwrite option */
15
+ /* by Satoru Takabayashi <ccsatoru@vega.aichi-u.ac.jp> */
16
+ #define OVERWRITE
17
+
18
+ /* --cap-input, --url-input option */
19
+ #define INPUT_OPTION
20
+
21
+ /* --numchar-input option */
22
+ #define NUMCHAR_OPTION
23
+
24
+ /* --debug, --no-output option */
25
+ #define CHECK_OPTION
26
+
27
+ /* JIS X0212 */
28
+ #define X0212_ENABLE
29
+
30
+ /* --exec-in, --exec-out option
31
+ * require pipe, fork, execvp and so on.
32
+ * please undef this on MS-DOS, MinGW
33
+ * this is still buggy around child process
34
+ */
35
+ /* #define EXEC_IO */
36
+
37
+ /* Unicode Normalization */
38
+ #define UNICODE_NORMALIZATION
39
+
40
+ /*
41
+ * Select Default Output Encoding
42
+ *
43
+ */
44
+
45
+ /* #define DEFAULT_CODE_JIS */
46
+ /* #define DEFAULT_CODE_SJIS */
47
+ /* #define DEFAULT_CODE_WINDOWS_31J */
48
+ /* #define DEFAULT_CODE_EUC */
49
+ /* #define DEFAULT_CODE_UTF8 */
50
+
51
+ #endif /* _CONFIG_H_ */