nkf 0.2.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,601 @@
1
+ /***** BEGIN LICENSE BLOCK *****
2
+ * Version: EPL 2.0/LGPL 2.1
3
+ *
4
+ * The contents of this file are subject to the Eclipse Public
5
+ * License Version 2.0 (the "License"); you may not use this file
6
+ * except in compliance with the License. You may obtain a copy of
7
+ * the License at http://www.eclipse.org/legal/epl-v20.html
8
+ *
9
+ * Software distributed under the License is distributed on an "AS
10
+ * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
11
+ * implied. See the License for the specific language governing
12
+ * rights and limitations under the License.
13
+ *
14
+ * Copyright (C) 2007-2011 Koichiro Ohba <koichiro@meadowy.org>
15
+ *
16
+ * Alternatively, the contents of this file may be used under the terms of
17
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
18
+ * in which case the provisions of the LGPL are applicable instead
19
+ * of those above. If you wish to allow use of your version of this file only
20
+ * under the terms of either the LGPL, and not to allow others to
21
+ * use your version of this file under the terms of the EPL, indicate your
22
+ * decision by deleting the provisions above and replace them with the notice
23
+ * and other provisions required by the LGPL. If you do not delete
24
+ * the provisions above, a recipient may use your version of this file under
25
+ * the terms of any one of the EPL, the LGPL.
26
+ ***** END LICENSE BLOCK *****/
27
+
28
+ package org.jruby.ext.nkf;
29
+
30
+ import java.nio.ByteBuffer;
31
+ import java.nio.CharBuffer;
32
+ import java.nio.charset.CharacterCodingException;
33
+ import java.nio.charset.Charset;
34
+ import java.nio.charset.CharsetDecoder;
35
+ import java.nio.charset.CharsetEncoder;
36
+ import java.nio.charset.UnsupportedCharsetException;
37
+ import java.util.ArrayList;
38
+ import java.util.Map;
39
+ import java.util.HashMap;
40
+
41
+ import org.jcodings.Encoding;
42
+ import org.jcodings.specific.ASCIIEncoding;
43
+ import org.jcodings.specific.UTF8Encoding;
44
+ import org.jcodings.transcode.EConv;
45
+ import org.jcodings.transcode.EConvFlags;
46
+ import org.jruby.Ruby;
47
+ import org.jruby.RubyArray;
48
+ import org.jruby.RubyModule;
49
+ import org.jruby.RubyString;
50
+
51
+ import org.jruby.anno.JRubyMethod;
52
+ import org.jruby.anno.JRubyModule;
53
+ import org.jruby.runtime.Helpers;
54
+ import org.jruby.runtime.ThreadContext;
55
+ import org.jruby.runtime.builtin.IRubyObject;
56
+ import org.jruby.util.ByteList;
57
+ import org.jruby.util.KCode;
58
+ import org.jruby.util.Pack;
59
+ import org.jruby.util.io.EncodingUtils;
60
+
61
+ @JRubyModule(name="NKF")
62
+ public class RubyNKF {
63
+ public static enum NKFCharset {
64
+ AUTO(0, "x-JISAutoDetect"),
65
+ // no ISO-2022-JP in jcodings
66
+ JIS(1, "ISO-2022-JP"),
67
+ EUC(2, "EUC-JP"),
68
+ SJIS(3, "Shift_JIS"),
69
+ BINARY(4, null),
70
+ NOCONV(4, null),
71
+ UNKNOWN(0, null),
72
+ ASCII(5, "iso-8859-1"),
73
+ UTF8(6, "UTF-8"),
74
+ UTF16(8, "UTF-16"),
75
+ UTF32(12, "UTF-32"),
76
+ OTHER(16, null),
77
+ BASE64(20, "base64"),
78
+ QENCODE(21, "qencode"),
79
+ MIME_DETECT(22, "MimeAutoDetect");
80
+
81
+ private NKFCharset(int value, String charset) {
82
+ this.value = value;
83
+ this.charset = charset;
84
+ }
85
+
86
+ public int getValue() {
87
+ return value;
88
+ }
89
+
90
+ public String getCharset() {
91
+ return charset;
92
+ }
93
+
94
+ private final int value;
95
+ private final String charset;
96
+ }
97
+
98
+ private static final ByteList BEGIN_MIME_STRING = new ByteList(ByteList.plain("=?"));
99
+ private static final ByteList END_MIME_STRING = new ByteList(ByteList.plain("?="));
100
+ private static final ByteList PACK_BASE64 = new ByteList(ByteList.plain("m"));
101
+ private static final ByteList PACK_QENCODE = new ByteList(ByteList.plain("M"));
102
+
103
+ public static final Map<Integer, String> NKFCharsetMap = new HashMap<Integer, String>(20, 1);
104
+
105
+ public static void load(Ruby runtime) {
106
+ createNKF(runtime);
107
+ }
108
+
109
+ public static void createNKF(Ruby runtime) {
110
+ final RubyModule NKF = runtime.defineModule("NKF");
111
+ final String version = "2.1.2";
112
+ final String relDate = "2011-09-08";
113
+
114
+ NKF.defineConstant("NKF_VERSION", runtime.newString(version));
115
+ NKF.defineConstant("NKF_RELEASE_DATE", runtime.newString(relDate));
116
+ NKF.defineConstant("VERSION", runtime.newString(version + ' ' + '(' + "JRuby" + '_' + relDate + ')'));
117
+
118
+ for ( NKFCharset charset : NKFCharset.values() ) {
119
+ NKFCharsetMap.put(charset.value, charset.name());
120
+
121
+ if (charset.value > 12 ) continue;
122
+ NKF.defineConstant(charset.name(), charsetMappedValue(runtime, charset));
123
+ }
124
+
125
+ NKF.defineAnnotatedMethods(RubyNKF.class);
126
+ }
127
+
128
+ @JRubyMethod(name = "guess", module = true)
129
+ public static IRubyObject guess(ThreadContext context, IRubyObject recv, IRubyObject s) {
130
+ return charsetMappedValue(context.runtime, guess(context, s));
131
+ }
132
+
133
+ public static NKFCharset guess(ThreadContext context, IRubyObject s) {
134
+ // TODO: Fix charset usage for JRUBY-4553
135
+ Ruby runtime = context.runtime;
136
+ if (!s.respondsTo("to_str")) {
137
+ throw runtime.newTypeError("can't convert " + s.getMetaClass() + " into String");
138
+ }
139
+ ByteList bytes = s.convertToString().getByteList();
140
+ ByteBuffer buf = ByteBuffer.wrap(bytes.getUnsafeBytes(), bytes.begin(), bytes.length());
141
+ CharsetDecoder decoder;
142
+ try {
143
+ decoder = Charset.forName("x-JISAutoDetect").newDecoder();
144
+ } catch (UnsupportedCharsetException e) {
145
+ throw runtime.newStandardError("charsets.jar is required to use NKF#guess. Please install JRE which supports m17n.");
146
+ }
147
+ try {
148
+ decoder.decode(buf);
149
+
150
+ if ( ! decoder.isCharsetDetected() ) {
151
+ return NKFCharset.UNKNOWN;
152
+ }
153
+ Charset charset = decoder.detectedCharset();
154
+ String name = charset.name();
155
+ if ("Shift_JIS".equals(name)) {
156
+ return NKFCharset.SJIS;
157
+ }
158
+ if ("Windows-31j".equalsIgnoreCase(name)) {
159
+ return NKFCharset.JIS;
160
+ }
161
+ if ("EUC-JP".equals(name)) {
162
+ return NKFCharset.EUC;
163
+ }
164
+ if ("ISO-2022-JP".equals(name)) {
165
+ return NKFCharset.JIS;
166
+ }
167
+ }
168
+ catch (CharacterCodingException e) {
169
+ // fall through and try direct encoding
170
+ }
171
+
172
+ if (bytes.getEncoding() == UTF8Encoding.INSTANCE) {
173
+ return NKFCharset.UTF8;
174
+ }
175
+ if (bytes.getEncoding().toString().startsWith("UTF-16")) {
176
+ return NKFCharset.UTF16;
177
+ }
178
+ if (bytes.getEncoding().toString().startsWith("UTF-32")) {
179
+ return NKFCharset.UTF32;
180
+ }
181
+ return NKFCharset.UNKNOWN;
182
+ }
183
+
184
+ private static IRubyObject charsetMappedValue(final Ruby runtime, final NKFCharset charset) {
185
+ final Encoding encoding;
186
+ switch (charset) {
187
+ case AUTO: case NOCONV: case UNKNOWN: return runtime.getNil();
188
+ case BINARY:
189
+ encoding = runtime.getEncodingService().getAscii8bitEncoding();
190
+ return runtime.getEncodingService().convertEncodingToRubyEncoding(encoding);
191
+ }
192
+
193
+ encoding = runtime.getEncodingService().getEncodingFromString(charset.getCharset());
194
+ return runtime.getEncodingService().convertEncodingToRubyEncoding(encoding);
195
+ }
196
+
197
+ @JRubyMethod(name = "guess1", module = true)
198
+ public static IRubyObject guess1(ThreadContext context, IRubyObject recv, IRubyObject str) {
199
+ return guess(context, recv, str);
200
+ }
201
+
202
+ @JRubyMethod(name = "guess2", module = true)
203
+ public static IRubyObject guess2(ThreadContext context, IRubyObject recv, IRubyObject str) {
204
+ return guess(context, recv, str);
205
+ }
206
+
207
+ @JRubyMethod(name = "nkf", module = true)
208
+ public static IRubyObject nkf(ThreadContext context, IRubyObject recv, IRubyObject opt, IRubyObject str) {
209
+ Ruby runtime = context.runtime;
210
+
211
+ if (!opt.respondsTo("to_str")) {
212
+ throw runtime.newTypeError("can't convert " + opt.getMetaClass() + " into String");
213
+ }
214
+
215
+ if (!str.respondsTo("to_str")) {
216
+ throw runtime.newTypeError("can't convert " + str.getMetaClass() + " into String");
217
+ }
218
+
219
+ Map<String, NKFCharset> options = parseOpt(opt.convertToString().toString());
220
+
221
+ if (options.get("input").getValue() == NKFCharset.AUTO.getValue()) {
222
+ options.put("input", guess(context, str));
223
+ }
224
+
225
+ ByteList bstr = str.convertToString().getByteList();
226
+ final Converter converter;
227
+ if (Converter.isMimeText(bstr, options)) {
228
+ converter = new MimeConverter(context, options);
229
+ } else {
230
+ converter = new DefaultConverter(context, options);
231
+ }
232
+
233
+ RubyString result = converter.convert(bstr);
234
+
235
+ if (options.get("mime-encode") == NKFCharset.BASE64) {
236
+ result = Converter.encodeMimeString(runtime, result, PACK_BASE64);
237
+ } else if (options.get("mime-encode") == NKFCharset.QENCODE) {
238
+ result = Converter.encodeMimeString(runtime, result, PACK_QENCODE);
239
+ }
240
+
241
+ return result;
242
+ }
243
+
244
+ public static Command parseOption(String s) {
245
+ Options options = new Options();
246
+ options.addOption("b");
247
+ options.addOption("u");
248
+ options.addOption("j", "jis");
249
+ options.addOption("s", "sjis");
250
+ options.addOption("e", "euc");
251
+ options.addOption("w", null, "[0-9][0-9]");
252
+ options.addOption("J", "jis-input");
253
+ options.addOption("S", "sjis-input");
254
+ options.addOption("E", "euc-input");
255
+ options.addOption("W", null, "[0-9][0-9]");
256
+ options.addOption("t");
257
+ options.addOption("i_");
258
+ options.addOption("o_");
259
+ options.addOption("r");
260
+ options.addOption("h1", "hiragana");
261
+ options.addOption("h2", "katakana");
262
+ options.addOption("h3", "katakana-hiragana");
263
+ options.addOption("T");
264
+ options.addOption("l");
265
+ options.addOption("f", null, "[0-9]+-[0-9]*");
266
+ options.addOption("F");
267
+ options.addOption("Z", null, "[0-3]");
268
+ options.addOption("X");
269
+ options.addOption("x");
270
+ options.addOption("B", null, "[0-2]");
271
+ options.addOption("I");
272
+ options.addOption("L", null, "[uwm]");
273
+ options.addOption("d");
274
+ options.addOption("c");
275
+ options.addOption("m", null, "[BQN0]");
276
+ options.addOption("M", null, "[BQ]");
277
+ options.addOption(null, "fj");
278
+ options.addOption(null, "unix");
279
+ options.addOption(null, "mac");
280
+ options.addOption(null, "msdos");
281
+ options.addOption(null, "windows");
282
+ options.addOption(null, "mime");
283
+ options.addOption(null, "base64");
284
+ options.addOption(null, "mime-input");
285
+ options.addOption(null, "base64-input");
286
+ options.addOption(null, "ic", "ic=(.*)");
287
+ options.addOption(null, "oc", "oc=(.*)");
288
+ options.addOption(null, "fb-skip");
289
+ options.addOption(null, "fb-html");
290
+ options.addOption(null, "fb-xml");
291
+ options.addOption(null, "fb-perl");
292
+ options.addOption(null, "fb-java");
293
+ options.addOption(null, "fb-subchar", "fb-subchar=(.*)");
294
+ options.addOption(null, "no-cp932ext");
295
+ options.addOption(null, "cap-input");
296
+ options.addOption(null, "url-input");
297
+ options.addOption(null, "numchar-input");
298
+ options.addOption(null, "no-best-fit-chars");
299
+
300
+ CommandParser parser = new CommandParser();
301
+ Command cmd = parser.parse(options, s);
302
+ return cmd;
303
+ }
304
+
305
+ private static Map<String, NKFCharset> parseOpt(String s) {
306
+ Map<String, NKFCharset> options = new HashMap<String, NKFCharset>();
307
+
308
+ // default options
309
+ options.put("input", NKFCharset.AUTO);
310
+ options.put("output", NKFCharset.JIS);
311
+ options.put("mime-decode", NKFCharset.MIME_DETECT);
312
+ options.put("mime-encode", NKFCharset.NOCONV);
313
+
314
+ Command cmd = parseOption(s);
315
+ if (cmd.hasOption("j")) {
316
+ options.put("output", NKFCharset.JIS);
317
+ }
318
+ if (cmd.hasOption("s")) {
319
+ options.put("output", NKFCharset.SJIS);
320
+ }
321
+ if (cmd.hasOption("e")) {
322
+ options.put("output", NKFCharset.EUC);
323
+ }
324
+ if (cmd.hasOption("w")) {
325
+ Option opt = cmd.getOption("w");
326
+ if ("32".equals(opt.getValue())) {
327
+ options.put("output", NKFCharset.UTF32);
328
+ } else if("16".equals(opt.getValue())) {
329
+ options.put("output", NKFCharset.UTF16);
330
+ } else {
331
+ options.put("output", NKFCharset.UTF8);
332
+ }
333
+ }
334
+ if (cmd.hasOption("J")) {
335
+ options.put("input", NKFCharset.JIS);
336
+ }
337
+ if (cmd.hasOption("S")) {
338
+ options.put("input", NKFCharset.SJIS);
339
+ }
340
+ if (cmd.hasOption("E")) {
341
+ options.put("input", NKFCharset.EUC);
342
+ }
343
+ if (cmd.hasOption("W")) {
344
+ Option opt = cmd.getOption("W");
345
+ if ("32".equals(opt.getValue())) {
346
+ options.put("input", NKFCharset.UTF32);
347
+ } else if("16".equals(opt.getValue())) {
348
+ options.put("input", NKFCharset.UTF16);
349
+ } else {
350
+ options.put("input", NKFCharset.UTF8);
351
+ }
352
+ }
353
+ if (cmd.hasOption("m")) {
354
+ Option opt = cmd.getOption("m");
355
+ if (opt.getValue() == null) {
356
+ options.put("mime-decode", NKFCharset.MIME_DETECT);
357
+ } else if ("B".equals(opt.getValue())) {
358
+ options.put("mime-decode", NKFCharset.BASE64);
359
+ } else if ("Q".equals(opt.getValue())) {
360
+ options.put("mime-decode", NKFCharset.QENCODE);
361
+ } else if ("N".equals(opt.getValue())) {
362
+ // TODO: non-strict option
363
+ } else if ("0".equals(opt.getValue())) {
364
+ options.put("mime-decode", NKFCharset.NOCONV);
365
+ }
366
+ }
367
+ if (cmd.hasOption("M")) {
368
+ Option opt = cmd.getOption("M");
369
+ if (opt.getValue() == null) {
370
+ options.put("mime-encode", NKFCharset.NOCONV);
371
+ } else if ("B".equals(opt.getValue())) {
372
+ options.put("mime-encode", NKFCharset.BASE64);
373
+ } else if ("Q".equals(opt.getValue())) {
374
+ options.put("mime-encode", NKFCharset.QENCODE);
375
+ }
376
+ }
377
+ if (cmd.hasOption("base64")) {
378
+ options.put("mime-encode", NKFCharset.BASE64);
379
+ }
380
+ if (cmd.hasOption("oc")) {
381
+ Option opt = cmd.getOption("oc");
382
+ if ("ISO-2022-JP".compareToIgnoreCase(opt.getValue()) == 0) {
383
+ options.put("output", NKFCharset.JIS);
384
+ } else if ("EUC-JP".compareToIgnoreCase(opt.getValue()) == 0) {
385
+ options.put("output", NKFCharset.EUC);
386
+ } else if ("CP932".compareToIgnoreCase(opt.getValue()) == 0) {
387
+ options.put("output", NKFCharset.SJIS);
388
+ } else if ("Shift_JIS".compareToIgnoreCase(opt.getValue()) == 0) {
389
+ options.put("output", NKFCharset.SJIS);
390
+ } else if ("Windows-31J".compareToIgnoreCase(opt.getValue()) == 0) {
391
+ options.put("output", NKFCharset.JIS);
392
+ } else if ("UTF-8".compareToIgnoreCase(opt.getValue()) == 0) {
393
+ options.put("output", NKFCharset.UTF8);
394
+ } else if ("UTF-8N".compareToIgnoreCase(opt.getValue()) == 0) {
395
+ options.put("output", NKFCharset.UTF8);
396
+ } else if ("UTF-16".compareToIgnoreCase(opt.getValue()) == 0) {
397
+ options.put("output", NKFCharset.UTF16);
398
+ } else if ("UTF-16BE-BOM".compareToIgnoreCase(opt.getValue()) == 0) {
399
+ options.put("output", NKFCharset.UTF16);
400
+ } else if ("UTF-32".compareToIgnoreCase(opt.getValue()) == 0) {
401
+ options.put("output", NKFCharset.UTF32);
402
+ } else if ("UTF-32BE-BOM".compareToIgnoreCase(opt.getValue()) == 0) {
403
+ options.put("output", NKFCharset.UTF32);
404
+ }
405
+ }
406
+ if (cmd.hasOption("ic")) {
407
+ Option opt = cmd.getOption("ic");
408
+ if ("ISO-2022-JP".compareToIgnoreCase(opt.getValue()) == 0) {
409
+ options.put("input", NKFCharset.JIS);
410
+ } else if ("EUC-JP".compareToIgnoreCase(opt.getValue()) == 0) {
411
+ options.put("input", NKFCharset.EUC);
412
+ } else if ("CP932".compareToIgnoreCase(opt.getValue()) == 0) {
413
+ options.put("input", NKFCharset.SJIS);
414
+ } else if ("Shift_JIS".compareToIgnoreCase(opt.getValue()) == 0) {
415
+ options.put("input", NKFCharset.SJIS);
416
+ } else if ("Windows-31J".compareToIgnoreCase(opt.getValue()) == 0) {
417
+ options.put("input", NKFCharset.SJIS);
418
+ } else if ("UTF-8".compareToIgnoreCase(opt.getValue()) == 0) {
419
+ options.put("input", NKFCharset.UTF8);
420
+ } else if ("UTF-8N".compareToIgnoreCase(opt.getValue()) == 0) {
421
+ options.put("input", NKFCharset.UTF8);
422
+ } else if ("UTF-16".compareToIgnoreCase(opt.getValue()) == 0) {
423
+ options.put("input", NKFCharset.UTF16);
424
+ } else if ("UTF-16BE-BOM".compareToIgnoreCase(opt.getValue()) == 0) {
425
+ options.put("input", NKFCharset.UTF16);
426
+ } else if ("UTF-32".compareToIgnoreCase(opt.getValue()) == 0) {
427
+ options.put("input", NKFCharset.UTF32);
428
+ } else if ("UTF-32BE-BOM".compareToIgnoreCase(opt.getValue()) == 0) {
429
+ options.put("input", NKFCharset.UTF32);
430
+ }
431
+ }
432
+
433
+ return options;
434
+ }
435
+
436
+ static abstract class Converter {
437
+
438
+ protected final ThreadContext context;
439
+ protected final Map<String, NKFCharset> options;
440
+
441
+ public Converter(ThreadContext ctx, Map<String, NKFCharset> opt) {
442
+ context = ctx;
443
+ options = opt;
444
+ }
445
+
446
+ static boolean isMimeText(ByteList str, Map<String, NKFCharset> options) {
447
+ if (str.length() <= 6) {
448
+ return false;
449
+ }
450
+ if (options.get("mime-decode") == NKFCharset.NOCONV) {
451
+ return false;
452
+ }
453
+ if (str.indexOf(BEGIN_MIME_STRING) < 0) {
454
+ return false;
455
+ }
456
+ if (str.lastIndexOf(END_MIME_STRING) < 0) {
457
+ return false;
458
+ }
459
+ return true;
460
+ }
461
+
462
+ private static RubyString encodeMimeString(Ruby runtime, RubyString str, ByteList format) {
463
+ RubyArray array = RubyArray.newArray(runtime, str);
464
+ return Pack.pack(runtime, array, format).chomp(runtime.getCurrentContext());
465
+ }
466
+
467
+ abstract RubyString convert(ByteList str);
468
+
469
+ ByteList convert_byte(ByteList str, String inputCharset, NKFCharset output) {
470
+ String outputCharset = output.getCharset();
471
+
472
+ if (inputCharset == null) {
473
+ inputCharset = str.getEncoding().toString();
474
+ }
475
+
476
+ if (outputCharset.equals(inputCharset)) {
477
+ return str.dup();
478
+ }
479
+
480
+ byte[] outCharsetBytes = outputCharset.getBytes();
481
+
482
+ EConv ec = EncodingUtils.econvOpenOpts(context, inputCharset.getBytes(), outCharsetBytes, 0, context.nil);
483
+
484
+ if (ec == null) {
485
+ throw context.runtime.newArgumentError("invalid encoding pair: " + inputCharset + " to " + outputCharset);
486
+ }
487
+
488
+ ByteList converted = EncodingUtils.econvStrConvert(context, ec, str, EConvFlags.INVALID_REPLACE);
489
+
490
+ converted.setEncoding(context.runtime.getEncodingService().findEncodingOrAliasEntry(outCharsetBytes).getEncoding());
491
+
492
+ return converted;
493
+ }
494
+ }
495
+
496
+ static class DefaultConverter extends Converter {
497
+
498
+ public DefaultConverter(ThreadContext ctx, Map<String, NKFCharset> opt) {
499
+ super(ctx, opt);
500
+ }
501
+
502
+ RubyString convert(ByteList str) {
503
+ NKFCharset input = options.get("input");
504
+ NKFCharset output = options.get("output");
505
+ ByteList b = convert_byte(str,
506
+ input.getCharset(),
507
+ output);
508
+ return context.runtime.newString(b);
509
+ }
510
+ }
511
+
512
+ static class MimeConverter extends Converter {
513
+
514
+ public MimeConverter(ThreadContext ctx, Map<String, NKFCharset> opt) {
515
+ super(ctx, opt);
516
+ }
517
+
518
+ private String detectCharset(String charset) {
519
+ if (charset.compareToIgnoreCase(NKFCharset.UTF8.getCharset()) == 0) {
520
+ return NKFCharset.UTF8.getCharset();
521
+ } else if (charset.compareToIgnoreCase(NKFCharset.JIS.getCharset()) == 0) {
522
+ return NKFCharset.JIS.getCharset();
523
+ } else if (charset.compareToIgnoreCase(NKFCharset.EUC.getCharset()) == 0) {
524
+ return NKFCharset.EUC.getCharset();
525
+ } else {
526
+ return NKFCharset.ASCII.getCharset();
527
+ }
528
+ }
529
+
530
+ private ByteList decodeMimeString(String str) {
531
+ String[] mime = str.split("^=\\?|\\?|\\?=$");
532
+ String charset = detectCharset(mime[1]);
533
+ int encode = mime[2].charAt(0);
534
+ RubyString body = EncodingUtils.newExternalStringWithEncoding(context.runtime, mime[3], ASCIIEncoding.INSTANCE);
535
+
536
+ final RubyArray<?> array;
537
+ if ('B' == encode || 'b' == encode) { // BASE64
538
+ array = Pack.unpack(context, body, PACK_BASE64);
539
+ } else { // Qencode
540
+ array = Pack.unpack(context, body, PACK_QENCODE);
541
+ }
542
+ RubyString s = (RubyString) array.entry(0);
543
+ ByteList decodeStr = s.asString().getByteList();
544
+
545
+ return convert_byte(decodeStr, charset, options.get("output"));
546
+ }
547
+
548
+ RubyString makeRubyString(ArrayList<ByteList> list) {
549
+ ByteList r = new ByteList();
550
+ for (ByteList l : list) {
551
+ r.append(l);
552
+ }
553
+ return context.runtime.newString(r);
554
+ }
555
+
556
+ RubyString convert(ByteList str) {
557
+ String s = Helpers.decodeByteList(context.runtime, str);
558
+ String[] token = s.split("\\s");
559
+ ArrayList<ByteList> raw_data = new ArrayList<ByteList>();
560
+
561
+ for (int i = 0; i < token.length; i++) {
562
+ raw_data.add(decodeMimeString(token[i]));
563
+ }
564
+
565
+ return makeRubyString(raw_data);
566
+ }
567
+
568
+ }
569
+
570
+ @Deprecated
571
+ public static final NKFCharset AUTO = NKFCharset.AUTO;
572
+ // no ISO-2022-JP in jcodings
573
+ @Deprecated
574
+ public static final NKFCharset JIS = NKFCharset.JIS;
575
+ @Deprecated
576
+ public static final NKFCharset EUC = NKFCharset.EUC;
577
+ @Deprecated
578
+ public static final NKFCharset SJIS = NKFCharset.SJIS;
579
+ @Deprecated
580
+ public static final NKFCharset BINARY = NKFCharset.BINARY;
581
+ @Deprecated
582
+ public static final NKFCharset NOCONV = NKFCharset.NOCONV;
583
+ @Deprecated
584
+ public static final NKFCharset UNKNOWN = NKFCharset.UNKNOWN;
585
+ @Deprecated
586
+ public static final NKFCharset ASCII = NKFCharset.ASCII;
587
+ @Deprecated
588
+ public static final NKFCharset UTF8 = NKFCharset.UTF8;
589
+ @Deprecated
590
+ public static final NKFCharset UTF16 = NKFCharset.UTF16;
591
+ @Deprecated
592
+ public static final NKFCharset UTF32 = NKFCharset.UTF32;
593
+ @Deprecated
594
+ public static final NKFCharset OTHER = NKFCharset.OTHER;
595
+ @Deprecated
596
+ public static final NKFCharset BASE64 = NKFCharset.BASE64;
597
+ @Deprecated
598
+ public static final NKFCharset QENCODE = NKFCharset.QENCODE;
599
+ @Deprecated
600
+ public static final NKFCharset MIME_DETECT = NKFCharset.MIME_DETECT;
601
+ }
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: false
2
+ require 'mkmf'
3
+ create_makefile('nkf')
@@ -0,0 +1,51 @@
1
+ #ifndef _CONFIG_H_
2
+ #define _CONFIG_H_
3
+
4
+ /* UTF8 input and output */
5
+ #define UTF8_INPUT_ENABLE
6
+ #define UTF8_OUTPUT_ENABLE
7
+
8
+ /* invert characters invalid in Shift_JIS to CP932 */
9
+ #define SHIFTJIS_CP932
10
+
11
+ /* fix input encoding when given by option */
12
+ #define INPUT_CODE_FIX
13
+
14
+ /* --overwrite option */
15
+ /* by Satoru Takabayashi <ccsatoru@vega.aichi-u.ac.jp> */
16
+ #define OVERWRITE
17
+
18
+ /* --cap-input, --url-input option */
19
+ #define INPUT_OPTION
20
+
21
+ /* --numchar-input option */
22
+ #define NUMCHAR_OPTION
23
+
24
+ /* --debug, --no-output option */
25
+ #define CHECK_OPTION
26
+
27
+ /* JIS X0212 */
28
+ #define X0212_ENABLE
29
+
30
+ /* --exec-in, --exec-out option
31
+ * require pipe, fork, execvp and so on.
32
+ * please undef this on MS-DOS, MinGW
33
+ * this is still buggy around child process
34
+ */
35
+ /* #define EXEC_IO */
36
+
37
+ /* Unicode Normalization */
38
+ #define UNICODE_NORMALIZATION
39
+
40
+ /*
41
+ * Select Default Output Encoding
42
+ *
43
+ */
44
+
45
+ /* #define DEFAULT_CODE_JIS */
46
+ /* #define DEFAULT_CODE_SJIS */
47
+ /* #define DEFAULT_CODE_WINDOWS_31J */
48
+ /* #define DEFAULT_CODE_EUC */
49
+ /* #define DEFAULT_CODE_UTF8 */
50
+
51
+ #endif /* _CONFIG_H_ */