nkf 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +9 -2
- data/.gitignore +3 -0
- data/Rakefile +14 -2
- data/ext/java/org/jruby/ext/nkf/Command.java +58 -0
- data/ext/java/org/jruby/ext/nkf/CommandParser.java +70 -0
- data/ext/java/org/jruby/ext/nkf/NKFLibrary.java +13 -0
- data/ext/java/org/jruby/ext/nkf/Option.java +80 -0
- data/ext/java/org/jruby/ext/nkf/Options.java +109 -0
- data/ext/java/org/jruby/ext/nkf/RubyNKF.java +601 -0
- data/ext/nkf/nkf.c +3 -0
- data/lib/nkf.rb +6 -0
- data/nkf.gemspec +22 -3
- metadata +14 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f82f8f21210cbbd491307967db9c664a2afc2707edadb6a93151391c273d83d9
|
4
|
+
data.tar.gz: 91c3a5ef30e7cc7d6f11d4fad788f16ceec857dc2f988fd7a0c70b36ca322d38
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6c50ad8960ac5ac3f16b2956a30f483753ab0bc394e507b335ea0b85b44ee306a23842789457a5c220431667ddca34b9c121b74cd238acc84a7c1964d6ef59aa
|
7
|
+
data.tar.gz: c63500824d58fa3a27c119bc20dc49237e3be1c5f1d531606af5514d6d79f860f99ca44d8b12388773024f79374994fb2b1bcd96cdbb543c549dda21edf4a796
|
data/.github/workflows/test.yml
CHANGED
@@ -3,15 +3,22 @@ name: build
|
|
3
3
|
on: [push, pull_request]
|
4
4
|
|
5
5
|
jobs:
|
6
|
+
ruby-versions:
|
7
|
+
uses: ruby/actions/.github/workflows/ruby_versions.yml@master
|
8
|
+
with:
|
9
|
+
engine: cruby-jruby
|
10
|
+
min_version: 2.5
|
11
|
+
|
6
12
|
build:
|
13
|
+
needs: ruby-versions
|
7
14
|
name: build (${{ matrix.ruby }} / ${{ matrix.os }})
|
8
15
|
strategy:
|
9
16
|
matrix:
|
10
|
-
ruby:
|
17
|
+
ruby: ${{ fromJson(needs.ruby-versions.outputs.versions) }}
|
11
18
|
os: [ ubuntu-latest, macos-latest ]
|
12
19
|
runs-on: ${{ matrix.os }}
|
13
20
|
steps:
|
14
|
-
- uses: actions/checkout@
|
21
|
+
- uses: actions/checkout@v4
|
15
22
|
- name: Set up Ruby
|
16
23
|
uses: ruby/setup-ruby@v1
|
17
24
|
with:
|
data/.gitignore
CHANGED
data/Rakefile
CHANGED
@@ -7,6 +7,18 @@ Rake::TestTask.new(:test) do |t|
|
|
7
7
|
t.test_files = FileList["test/**/test_*.rb"]
|
8
8
|
end
|
9
9
|
|
10
|
-
|
11
|
-
|
10
|
+
if RUBY_ENGINE == "jruby"
|
11
|
+
require "rake/javaextensiontask"
|
12
|
+
Rake::JavaExtensionTask.new("nkf") do |ext|
|
13
|
+
ext.source_version = "1.8"
|
14
|
+
ext.target_version = "1.8"
|
15
|
+
ext.ext_dir = "ext/java"
|
16
|
+
end
|
17
|
+
|
18
|
+
task :build => :compile
|
19
|
+
else
|
20
|
+
require 'rake/extensiontask'
|
21
|
+
Rake::ExtensionTask.new("nkf")
|
22
|
+
end
|
23
|
+
|
12
24
|
task :default => :test
|
@@ -0,0 +1,58 @@
|
|
1
|
+
/***** BEGIN LICENSE BLOCK *****
|
2
|
+
* Version: EPL 2.0/LGPL 2.1
|
3
|
+
*
|
4
|
+
* The contents of this file are subject to the Eclipse Public
|
5
|
+
* License Version 2.0 (the "License"); you may not use this file
|
6
|
+
* except in compliance with the License. You may obtain a copy of
|
7
|
+
* the License at http://www.eclipse.org/legal/epl-v20.html
|
8
|
+
*
|
9
|
+
* Software distributed under the License is distributed on an "AS
|
10
|
+
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
11
|
+
* implied. See the License for the specific language governing
|
12
|
+
* rights and limitations under the License.
|
13
|
+
*
|
14
|
+
* Copyright (C) 2011 Koichiro Ohba <koichiro@meadowy.org>
|
15
|
+
*
|
16
|
+
* Alternatively, the contents of this file may be used under the terms of
|
17
|
+
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
18
|
+
* in which case the provisions of the LGPL are applicable instead
|
19
|
+
* of those above. If you wish to allow use of your version of this file only
|
20
|
+
* under the terms of either the LGPL, and not to allow others to
|
21
|
+
* use your version of this file under the terms of the EPL, indicate your
|
22
|
+
* decision by deleting the provisions above and replace them with the notice
|
23
|
+
* and other provisions required by the LGPL. If you do not delete
|
24
|
+
* the provisions above, a recipient may use your version of this file under
|
25
|
+
* the terms of any one of the EPL, the LGPL.
|
26
|
+
***** END LICENSE BLOCK *****/
|
27
|
+
|
28
|
+
package org.jruby.ext.nkf;
|
29
|
+
|
30
|
+
import java.util.List;
|
31
|
+
import java.util.ArrayList;
|
32
|
+
|
33
|
+
public class Command {
|
34
|
+
private final List<Option> options = new ArrayList<Option>();
|
35
|
+
public boolean hasOption(String opt) {
|
36
|
+
for (Option option : options) {
|
37
|
+
if (opt.equals(option.getOpt())) return true;
|
38
|
+
if (opt.equals(option.getLongOpt())) return true;
|
39
|
+
}
|
40
|
+
return false;
|
41
|
+
}
|
42
|
+
public void addOption(Option opt) {
|
43
|
+
options.add(opt);
|
44
|
+
}
|
45
|
+
public Option getOption(String opt) {
|
46
|
+
for (Option option : options) {
|
47
|
+
if (opt.equals(option.getOpt())) return option;
|
48
|
+
if (opt.equals(option.getLongOpt())) return option;
|
49
|
+
}
|
50
|
+
return null;
|
51
|
+
}
|
52
|
+
public String getOptionValue(String opt) {
|
53
|
+
return getOption(opt).getValue();
|
54
|
+
}
|
55
|
+
public String toString() {
|
56
|
+
return options.toString();
|
57
|
+
}
|
58
|
+
}
|
@@ -0,0 +1,70 @@
|
|
1
|
+
/***** BEGIN LICENSE BLOCK *****
|
2
|
+
* Version: EPL 2.0/LGPL 2.1
|
3
|
+
*
|
4
|
+
* The contents of this file are subject to the Eclipse Public
|
5
|
+
* License Version 2.0 (the "License"); you may not use this file
|
6
|
+
* except in compliance with the License. You may obtain a copy of
|
7
|
+
* the License at http://www.eclipse.org/legal/epl-v20.html
|
8
|
+
*
|
9
|
+
* Software distributed under the License is distributed on an "AS
|
10
|
+
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
11
|
+
* implied. See the License for the specific language governing
|
12
|
+
* rights and limitations under the License.
|
13
|
+
*
|
14
|
+
* Copyright (C) 2011 Koichiro Ohba <koichiro@meadowy.org>
|
15
|
+
*
|
16
|
+
* Alternatively, the contents of this file may be used under the terms of
|
17
|
+
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
18
|
+
* in which case the provisions of the LGPL are applicable instead
|
19
|
+
* of those above. If you wish to allow use of your version of this file only
|
20
|
+
* under the terms of either the LGPL, and not to allow others to
|
21
|
+
* use your version of this file under the terms of the EPL, indicate your
|
22
|
+
* decision by deleting the provisions above and replace them with the notice
|
23
|
+
* and other provisions required by the LGPL. If you do not delete
|
24
|
+
* the provisions above, a recipient may use your version of this file under
|
25
|
+
* the terms of any one of the EPL, the LGPL.
|
26
|
+
***** END LICENSE BLOCK *****/
|
27
|
+
|
28
|
+
package org.jruby.ext.nkf;
|
29
|
+
|
30
|
+
public class CommandParser {
|
31
|
+
public Command parse(Options opt, String args) {
|
32
|
+
Command cc = new Command();
|
33
|
+
String[] tokens = args.split("\\s");
|
34
|
+
for (int i = 0; i < tokens.length; i++) {
|
35
|
+
// long option
|
36
|
+
if (tokens[i].startsWith("--")) {
|
37
|
+
String s = stripDash(tokens[i]);
|
38
|
+
if (opt.hasLongOption(s)) {
|
39
|
+
cc.addOption(opt.matchLongOption(s));
|
40
|
+
}
|
41
|
+
} else {
|
42
|
+
// short option
|
43
|
+
String s = stripDash(tokens[i]);
|
44
|
+
int max = s.length();
|
45
|
+
for (int j = 0; j < max; j++) {
|
46
|
+
if (opt.hasShortOption(s)) {
|
47
|
+
Option cmd = opt.matchShortOption(s);
|
48
|
+
if (cmd.getValue() != null) {
|
49
|
+
int op_len = cmd.getValue().length();
|
50
|
+
s = s.substring(op_len);
|
51
|
+
j = j + op_len;
|
52
|
+
}
|
53
|
+
cc.addOption(cmd);
|
54
|
+
}
|
55
|
+
s = s.substring(1);
|
56
|
+
}
|
57
|
+
}
|
58
|
+
}
|
59
|
+
return cc;
|
60
|
+
}
|
61
|
+
private String stripDash(String s) {
|
62
|
+
if (s.startsWith("--")) {
|
63
|
+
return s.substring(2, s.length());
|
64
|
+
} else if (s.startsWith("-")) {
|
65
|
+
return s.substring(1, s.length());
|
66
|
+
} else {
|
67
|
+
return s;
|
68
|
+
}
|
69
|
+
}
|
70
|
+
}
|
@@ -0,0 +1,13 @@
|
|
1
|
+
package org.jruby.ext.nkf;
|
2
|
+
|
3
|
+
import org.jruby.Ruby;
|
4
|
+
import org.jruby.runtime.load.Library;
|
5
|
+
|
6
|
+
import java.io.IOException;
|
7
|
+
|
8
|
+
public class NKFLibrary implements Library {
|
9
|
+
@Override
|
10
|
+
public void load(Ruby ruby, boolean b) throws IOException {
|
11
|
+
RubyNKF.load(ruby);
|
12
|
+
}
|
13
|
+
}
|
@@ -0,0 +1,80 @@
|
|
1
|
+
/***** BEGIN LICENSE BLOCK *****
|
2
|
+
* Version: EPL 2.0/LGPL 2.1
|
3
|
+
*
|
4
|
+
* The contents of this file are subject to the Eclipse Public
|
5
|
+
* License Version 2.0 (the "License"); you may not use this file
|
6
|
+
* except in compliance with the License. You may obtain a copy of
|
7
|
+
* the License at http://www.eclipse.org/legal/epl-v20.html
|
8
|
+
*
|
9
|
+
* Software distributed under the License is distributed on an "AS
|
10
|
+
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
11
|
+
* implied. See the License for the specific language governing
|
12
|
+
* rights and limitations under the License.
|
13
|
+
*
|
14
|
+
* Copyright (C) 2011 Koichiro Ohba <koichiro@meadowy.org>
|
15
|
+
*
|
16
|
+
* Alternatively, the contents of this file may be used under the terms of
|
17
|
+
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
18
|
+
* in which case the provisions of the LGPL are applicable instead
|
19
|
+
* of those above. If you wish to allow use of your version of this file only
|
20
|
+
* under the terms of either the LGPL, and not to allow others to
|
21
|
+
* use your version of this file under the terms of the EPL, indicate your
|
22
|
+
* decision by deleting the provisions above and replace them with the notice
|
23
|
+
* and other provisions required by the LGPL. If you do not delete
|
24
|
+
* the provisions above, a recipient may use your version of this file under
|
25
|
+
* the terms of any one of the EPL, the LGPL.
|
26
|
+
***** END LICENSE BLOCK *****/
|
27
|
+
|
28
|
+
package org.jruby.ext.nkf;
|
29
|
+
|
30
|
+
import java.util.regex.Pattern;
|
31
|
+
|
32
|
+
public class Option {
|
33
|
+
private final String opt;
|
34
|
+
private final String longOpt;
|
35
|
+
private boolean hasArg = false;
|
36
|
+
private String value = null;
|
37
|
+
private Pattern pattern;
|
38
|
+
|
39
|
+
public Option(String opt, String longOpt, String pattern) {
|
40
|
+
this.opt = opt;
|
41
|
+
this.longOpt = longOpt;
|
42
|
+
if (pattern != null) {
|
43
|
+
this.hasArg = true;
|
44
|
+
this.pattern = Pattern.compile(pattern);
|
45
|
+
}
|
46
|
+
}
|
47
|
+
String getOpt() { return opt; }
|
48
|
+
String getLongOpt() { return longOpt; }
|
49
|
+
boolean hasShortOpt() {
|
50
|
+
return opt != null;
|
51
|
+
}
|
52
|
+
boolean hasLongOpt() {
|
53
|
+
return longOpt != null;
|
54
|
+
}
|
55
|
+
boolean hasArg() {
|
56
|
+
return hasArg;
|
57
|
+
}
|
58
|
+
public String getValue() {
|
59
|
+
return value;
|
60
|
+
}
|
61
|
+
void setValue(String v) {
|
62
|
+
value = v;
|
63
|
+
}
|
64
|
+
String getKey() {
|
65
|
+
if (opt == null)
|
66
|
+
return longOpt;
|
67
|
+
else
|
68
|
+
return opt;
|
69
|
+
}
|
70
|
+
Pattern pattern() {
|
71
|
+
return pattern;
|
72
|
+
}
|
73
|
+
public String toString() {
|
74
|
+
return "[opt: " + opt
|
75
|
+
+ " longOpt: " + longOpt
|
76
|
+
+ " hasArg: " + hasArg
|
77
|
+
+ " pattern: " + pattern
|
78
|
+
+ " value: " + value + "]";
|
79
|
+
}
|
80
|
+
}
|
@@ -0,0 +1,109 @@
|
|
1
|
+
/***** BEGIN LICENSE BLOCK *****
|
2
|
+
* Version: EPL 2.0/LGPL 2.1
|
3
|
+
*
|
4
|
+
* The contents of this file are subject to the Eclipse Public
|
5
|
+
* License Version 2.0 (the "License"); you may not use this file
|
6
|
+
* except in compliance with the License. You may obtain a copy of
|
7
|
+
* the License at http://www.eclipse.org/legal/epl-v20.html
|
8
|
+
*
|
9
|
+
* Software distributed under the License is distributed on an "AS
|
10
|
+
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
11
|
+
* implied. See the License for the specific language governing
|
12
|
+
* rights and limitations under the License.
|
13
|
+
*
|
14
|
+
* Copyright (C) 2011 Koichiro Ohba <koichiro@meadowy.org>
|
15
|
+
*
|
16
|
+
* Alternatively, the contents of this file may be used under the terms of
|
17
|
+
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
18
|
+
* in which case the provisions of the LGPL are applicable instead
|
19
|
+
* of those above. If you wish to allow use of your version of this file only
|
20
|
+
* under the terms of either the LGPL, and not to allow others to
|
21
|
+
* use your version of this file under the terms of the EPL, indicate your
|
22
|
+
* decision by deleting the provisions above and replace them with the notice
|
23
|
+
* and other provisions required by the LGPL. If you do not delete
|
24
|
+
* the provisions above, a recipient may use your version of this file under
|
25
|
+
* the terms of any one of the EPL, the LGPL.
|
26
|
+
***** END LICENSE BLOCK *****/
|
27
|
+
|
28
|
+
package org.jruby.ext.nkf;
|
29
|
+
|
30
|
+
import java.util.Map;
|
31
|
+
import java.util.LinkedHashMap;
|
32
|
+
import java.util.regex.Matcher;
|
33
|
+
|
34
|
+
public class Options {
|
35
|
+
private final Map<String, Option> shortOpts = new LinkedHashMap<String, Option>();
|
36
|
+
private final Map<String, Option> longOpts = new LinkedHashMap<String, Option>();
|
37
|
+
|
38
|
+
public Options addOption(String opt) {
|
39
|
+
return addOption(opt, null);
|
40
|
+
}
|
41
|
+
public Options addOption(String opt, String longOpt) {
|
42
|
+
return addOption(opt, longOpt, null);
|
43
|
+
}
|
44
|
+
public Options addOption(String opt, String longOpt, String pattern) {
|
45
|
+
return addOption(new Option(opt, longOpt, pattern));
|
46
|
+
}
|
47
|
+
public Options addOption(Option opt) {
|
48
|
+
if (opt.hasLongOpt()) {
|
49
|
+
longOpts.put(opt.getLongOpt(), opt);
|
50
|
+
}
|
51
|
+
if (opt.hasShortOpt()) {
|
52
|
+
shortOpts.put(opt.getOpt(), opt);
|
53
|
+
}
|
54
|
+
return this;
|
55
|
+
}
|
56
|
+
boolean hasShortOption(String opt) {
|
57
|
+
for (Map.Entry<String , Option> e : shortOpts.entrySet()) {
|
58
|
+
if (opt.startsWith(e.getKey())) {
|
59
|
+
return true;
|
60
|
+
}
|
61
|
+
}
|
62
|
+
return false;
|
63
|
+
}
|
64
|
+
public Option matchShortOption(String opt) {
|
65
|
+
// independent of opt length
|
66
|
+
for (Map.Entry<String , Option> e : shortOpts.entrySet()) {
|
67
|
+
//System.out.println(opt + " = " + e.getKey());
|
68
|
+
if (opt.startsWith(e.getKey())) {
|
69
|
+
//System.out.println("match[" + e.getKey() + "]");
|
70
|
+
Option cmd = e.getValue();
|
71
|
+
if (cmd.hasArg()) {
|
72
|
+
Matcher m = cmd.pattern().matcher(opt);
|
73
|
+
if (m.find()) {
|
74
|
+
//System.out.println("regix[" + m.group() + "]");
|
75
|
+
cmd.setValue(m.group());
|
76
|
+
}
|
77
|
+
}
|
78
|
+
return cmd;
|
79
|
+
}
|
80
|
+
}
|
81
|
+
return null;
|
82
|
+
}
|
83
|
+
boolean hasLongOption(String opt) {
|
84
|
+
for (Map.Entry<String , Option> e : longOpts.entrySet()) {
|
85
|
+
if (opt.startsWith(e.getKey())) {
|
86
|
+
return true;
|
87
|
+
}
|
88
|
+
}
|
89
|
+
return false;
|
90
|
+
}
|
91
|
+
Option matchLongOption(String opt) {
|
92
|
+
for (Map.Entry<String , Option> e : longOpts.entrySet()) {
|
93
|
+
//System.out.println(opt + " = " + e.getKey());
|
94
|
+
if (opt.startsWith(e.getKey())) {
|
95
|
+
//System.out.println("match[" + e.getKey() + "]");
|
96
|
+
Option cmd = e.getValue();
|
97
|
+
if (cmd.hasArg()) {
|
98
|
+
Matcher m = cmd.pattern().matcher(opt);
|
99
|
+
if (m.find()) {
|
100
|
+
//System.out.println("regix[" + m.group() + "]");
|
101
|
+
cmd.setValue(m.group(1));
|
102
|
+
}
|
103
|
+
}
|
104
|
+
return cmd;
|
105
|
+
}
|
106
|
+
}
|
107
|
+
return null;
|
108
|
+
}
|
109
|
+
}
|
@@ -0,0 +1,601 @@
|
|
1
|
+
/***** BEGIN LICENSE BLOCK *****
|
2
|
+
* Version: EPL 2.0/LGPL 2.1
|
3
|
+
*
|
4
|
+
* The contents of this file are subject to the Eclipse Public
|
5
|
+
* License Version 2.0 (the "License"); you may not use this file
|
6
|
+
* except in compliance with the License. You may obtain a copy of
|
7
|
+
* the License at http://www.eclipse.org/legal/epl-v20.html
|
8
|
+
*
|
9
|
+
* Software distributed under the License is distributed on an "AS
|
10
|
+
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
11
|
+
* implied. See the License for the specific language governing
|
12
|
+
* rights and limitations under the License.
|
13
|
+
*
|
14
|
+
* Copyright (C) 2007-2011 Koichiro Ohba <koichiro@meadowy.org>
|
15
|
+
*
|
16
|
+
* Alternatively, the contents of this file may be used under the terms of
|
17
|
+
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
18
|
+
* in which case the provisions of the LGPL are applicable instead
|
19
|
+
* of those above. If you wish to allow use of your version of this file only
|
20
|
+
* under the terms of either the LGPL, and not to allow others to
|
21
|
+
* use your version of this file under the terms of the EPL, indicate your
|
22
|
+
* decision by deleting the provisions above and replace them with the notice
|
23
|
+
* and other provisions required by the LGPL. If you do not delete
|
24
|
+
* the provisions above, a recipient may use your version of this file under
|
25
|
+
* the terms of any one of the EPL, the LGPL.
|
26
|
+
***** END LICENSE BLOCK *****/
|
27
|
+
|
28
|
+
package org.jruby.ext.nkf;
|
29
|
+
|
30
|
+
import java.nio.ByteBuffer;
|
31
|
+
import java.nio.CharBuffer;
|
32
|
+
import java.nio.charset.CharacterCodingException;
|
33
|
+
import java.nio.charset.Charset;
|
34
|
+
import java.nio.charset.CharsetDecoder;
|
35
|
+
import java.nio.charset.CharsetEncoder;
|
36
|
+
import java.nio.charset.UnsupportedCharsetException;
|
37
|
+
import java.util.ArrayList;
|
38
|
+
import java.util.Map;
|
39
|
+
import java.util.HashMap;
|
40
|
+
|
41
|
+
import org.jcodings.Encoding;
|
42
|
+
import org.jcodings.specific.ASCIIEncoding;
|
43
|
+
import org.jcodings.specific.UTF8Encoding;
|
44
|
+
import org.jcodings.transcode.EConv;
|
45
|
+
import org.jcodings.transcode.EConvFlags;
|
46
|
+
import org.jruby.Ruby;
|
47
|
+
import org.jruby.RubyArray;
|
48
|
+
import org.jruby.RubyModule;
|
49
|
+
import org.jruby.RubyString;
|
50
|
+
|
51
|
+
import org.jruby.anno.JRubyMethod;
|
52
|
+
import org.jruby.anno.JRubyModule;
|
53
|
+
import org.jruby.runtime.Helpers;
|
54
|
+
import org.jruby.runtime.ThreadContext;
|
55
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
56
|
+
import org.jruby.util.ByteList;
|
57
|
+
import org.jruby.util.KCode;
|
58
|
+
import org.jruby.util.Pack;
|
59
|
+
import org.jruby.util.io.EncodingUtils;
|
60
|
+
|
61
|
+
@JRubyModule(name="NKF")
|
62
|
+
public class RubyNKF {
|
63
|
+
public static enum NKFCharset {
|
64
|
+
AUTO(0, "x-JISAutoDetect"),
|
65
|
+
// no ISO-2022-JP in jcodings
|
66
|
+
JIS(1, "ISO-2022-JP"),
|
67
|
+
EUC(2, "EUC-JP"),
|
68
|
+
SJIS(3, "Shift_JIS"),
|
69
|
+
BINARY(4, null),
|
70
|
+
NOCONV(4, null),
|
71
|
+
UNKNOWN(0, null),
|
72
|
+
ASCII(5, "iso-8859-1"),
|
73
|
+
UTF8(6, "UTF-8"),
|
74
|
+
UTF16(8, "UTF-16"),
|
75
|
+
UTF32(12, "UTF-32"),
|
76
|
+
OTHER(16, null),
|
77
|
+
BASE64(20, "base64"),
|
78
|
+
QENCODE(21, "qencode"),
|
79
|
+
MIME_DETECT(22, "MimeAutoDetect");
|
80
|
+
|
81
|
+
private NKFCharset(int value, String charset) {
|
82
|
+
this.value = value;
|
83
|
+
this.charset = charset;
|
84
|
+
}
|
85
|
+
|
86
|
+
public int getValue() {
|
87
|
+
return value;
|
88
|
+
}
|
89
|
+
|
90
|
+
public String getCharset() {
|
91
|
+
return charset;
|
92
|
+
}
|
93
|
+
|
94
|
+
private final int value;
|
95
|
+
private final String charset;
|
96
|
+
}
|
97
|
+
|
98
|
+
private static final ByteList BEGIN_MIME_STRING = new ByteList(ByteList.plain("=?"));
|
99
|
+
private static final ByteList END_MIME_STRING = new ByteList(ByteList.plain("?="));
|
100
|
+
private static final ByteList PACK_BASE64 = new ByteList(ByteList.plain("m"));
|
101
|
+
private static final ByteList PACK_QENCODE = new ByteList(ByteList.plain("M"));
|
102
|
+
|
103
|
+
public static final Map<Integer, String> NKFCharsetMap = new HashMap<Integer, String>(20, 1);
|
104
|
+
|
105
|
+
public static void load(Ruby runtime) {
|
106
|
+
createNKF(runtime);
|
107
|
+
}
|
108
|
+
|
109
|
+
public static void createNKF(Ruby runtime) {
|
110
|
+
final RubyModule NKF = runtime.defineModule("NKF");
|
111
|
+
final String version = "2.1.2";
|
112
|
+
final String relDate = "2011-09-08";
|
113
|
+
|
114
|
+
NKF.defineConstant("NKF_VERSION", runtime.newString(version));
|
115
|
+
NKF.defineConstant("NKF_RELEASE_DATE", runtime.newString(relDate));
|
116
|
+
NKF.defineConstant("VERSION", runtime.newString(version + ' ' + '(' + "JRuby" + '_' + relDate + ')'));
|
117
|
+
|
118
|
+
for ( NKFCharset charset : NKFCharset.values() ) {
|
119
|
+
NKFCharsetMap.put(charset.value, charset.name());
|
120
|
+
|
121
|
+
if (charset.value > 12 ) continue;
|
122
|
+
NKF.defineConstant(charset.name(), charsetMappedValue(runtime, charset));
|
123
|
+
}
|
124
|
+
|
125
|
+
NKF.defineAnnotatedMethods(RubyNKF.class);
|
126
|
+
}
|
127
|
+
|
128
|
+
@JRubyMethod(name = "guess", module = true)
|
129
|
+
public static IRubyObject guess(ThreadContext context, IRubyObject recv, IRubyObject s) {
|
130
|
+
return charsetMappedValue(context.runtime, guess(context, s));
|
131
|
+
}
|
132
|
+
|
133
|
+
public static NKFCharset guess(ThreadContext context, IRubyObject s) {
|
134
|
+
// TODO: Fix charset usage for JRUBY-4553
|
135
|
+
Ruby runtime = context.runtime;
|
136
|
+
if (!s.respondsTo("to_str")) {
|
137
|
+
throw runtime.newTypeError("can't convert " + s.getMetaClass() + " into String");
|
138
|
+
}
|
139
|
+
ByteList bytes = s.convertToString().getByteList();
|
140
|
+
ByteBuffer buf = ByteBuffer.wrap(bytes.getUnsafeBytes(), bytes.begin(), bytes.length());
|
141
|
+
CharsetDecoder decoder;
|
142
|
+
try {
|
143
|
+
decoder = Charset.forName("x-JISAutoDetect").newDecoder();
|
144
|
+
} catch (UnsupportedCharsetException e) {
|
145
|
+
throw runtime.newStandardError("charsets.jar is required to use NKF#guess. Please install JRE which supports m17n.");
|
146
|
+
}
|
147
|
+
try {
|
148
|
+
decoder.decode(buf);
|
149
|
+
|
150
|
+
if ( ! decoder.isCharsetDetected() ) {
|
151
|
+
return NKFCharset.UNKNOWN;
|
152
|
+
}
|
153
|
+
Charset charset = decoder.detectedCharset();
|
154
|
+
String name = charset.name();
|
155
|
+
if ("Shift_JIS".equals(name)) {
|
156
|
+
return NKFCharset.SJIS;
|
157
|
+
}
|
158
|
+
if ("Windows-31j".equalsIgnoreCase(name)) {
|
159
|
+
return NKFCharset.JIS;
|
160
|
+
}
|
161
|
+
if ("EUC-JP".equals(name)) {
|
162
|
+
return NKFCharset.EUC;
|
163
|
+
}
|
164
|
+
if ("ISO-2022-JP".equals(name)) {
|
165
|
+
return NKFCharset.JIS;
|
166
|
+
}
|
167
|
+
}
|
168
|
+
catch (CharacterCodingException e) {
|
169
|
+
// fall through and try direct encoding
|
170
|
+
}
|
171
|
+
|
172
|
+
if (bytes.getEncoding() == UTF8Encoding.INSTANCE) {
|
173
|
+
return NKFCharset.UTF8;
|
174
|
+
}
|
175
|
+
if (bytes.getEncoding().toString().startsWith("UTF-16")) {
|
176
|
+
return NKFCharset.UTF16;
|
177
|
+
}
|
178
|
+
if (bytes.getEncoding().toString().startsWith("UTF-32")) {
|
179
|
+
return NKFCharset.UTF32;
|
180
|
+
}
|
181
|
+
return NKFCharset.UNKNOWN;
|
182
|
+
}
|
183
|
+
|
184
|
+
private static IRubyObject charsetMappedValue(final Ruby runtime, final NKFCharset charset) {
|
185
|
+
final Encoding encoding;
|
186
|
+
switch (charset) {
|
187
|
+
case AUTO: case NOCONV: case UNKNOWN: return runtime.getNil();
|
188
|
+
case BINARY:
|
189
|
+
encoding = runtime.getEncodingService().getAscii8bitEncoding();
|
190
|
+
return runtime.getEncodingService().convertEncodingToRubyEncoding(encoding);
|
191
|
+
}
|
192
|
+
|
193
|
+
encoding = runtime.getEncodingService().getEncodingFromString(charset.getCharset());
|
194
|
+
return runtime.getEncodingService().convertEncodingToRubyEncoding(encoding);
|
195
|
+
}
|
196
|
+
|
197
|
+
@JRubyMethod(name = "guess1", module = true)
|
198
|
+
public static IRubyObject guess1(ThreadContext context, IRubyObject recv, IRubyObject str) {
|
199
|
+
return guess(context, recv, str);
|
200
|
+
}
|
201
|
+
|
202
|
+
@JRubyMethod(name = "guess2", module = true)
|
203
|
+
public static IRubyObject guess2(ThreadContext context, IRubyObject recv, IRubyObject str) {
|
204
|
+
return guess(context, recv, str);
|
205
|
+
}
|
206
|
+
|
207
|
+
@JRubyMethod(name = "nkf", module = true)
|
208
|
+
public static IRubyObject nkf(ThreadContext context, IRubyObject recv, IRubyObject opt, IRubyObject str) {
|
209
|
+
Ruby runtime = context.runtime;
|
210
|
+
|
211
|
+
if (!opt.respondsTo("to_str")) {
|
212
|
+
throw runtime.newTypeError("can't convert " + opt.getMetaClass() + " into String");
|
213
|
+
}
|
214
|
+
|
215
|
+
if (!str.respondsTo("to_str")) {
|
216
|
+
throw runtime.newTypeError("can't convert " + str.getMetaClass() + " into String");
|
217
|
+
}
|
218
|
+
|
219
|
+
Map<String, NKFCharset> options = parseOpt(opt.convertToString().toString());
|
220
|
+
|
221
|
+
if (options.get("input").getValue() == NKFCharset.AUTO.getValue()) {
|
222
|
+
options.put("input", guess(context, str));
|
223
|
+
}
|
224
|
+
|
225
|
+
ByteList bstr = str.convertToString().getByteList();
|
226
|
+
final Converter converter;
|
227
|
+
if (Converter.isMimeText(bstr, options)) {
|
228
|
+
converter = new MimeConverter(context, options);
|
229
|
+
} else {
|
230
|
+
converter = new DefaultConverter(context, options);
|
231
|
+
}
|
232
|
+
|
233
|
+
RubyString result = converter.convert(bstr);
|
234
|
+
|
235
|
+
if (options.get("mime-encode") == NKFCharset.BASE64) {
|
236
|
+
result = Converter.encodeMimeString(runtime, result, PACK_BASE64);
|
237
|
+
} else if (options.get("mime-encode") == NKFCharset.QENCODE) {
|
238
|
+
result = Converter.encodeMimeString(runtime, result, PACK_QENCODE);
|
239
|
+
}
|
240
|
+
|
241
|
+
return result;
|
242
|
+
}
|
243
|
+
|
244
|
+
public static Command parseOption(String s) {
|
245
|
+
Options options = new Options();
|
246
|
+
options.addOption("b");
|
247
|
+
options.addOption("u");
|
248
|
+
options.addOption("j", "jis");
|
249
|
+
options.addOption("s", "sjis");
|
250
|
+
options.addOption("e", "euc");
|
251
|
+
options.addOption("w", null, "[0-9][0-9]");
|
252
|
+
options.addOption("J", "jis-input");
|
253
|
+
options.addOption("S", "sjis-input");
|
254
|
+
options.addOption("E", "euc-input");
|
255
|
+
options.addOption("W", null, "[0-9][0-9]");
|
256
|
+
options.addOption("t");
|
257
|
+
options.addOption("i_");
|
258
|
+
options.addOption("o_");
|
259
|
+
options.addOption("r");
|
260
|
+
options.addOption("h1", "hiragana");
|
261
|
+
options.addOption("h2", "katakana");
|
262
|
+
options.addOption("h3", "katakana-hiragana");
|
263
|
+
options.addOption("T");
|
264
|
+
options.addOption("l");
|
265
|
+
options.addOption("f", null, "[0-9]+-[0-9]*");
|
266
|
+
options.addOption("F");
|
267
|
+
options.addOption("Z", null, "[0-3]");
|
268
|
+
options.addOption("X");
|
269
|
+
options.addOption("x");
|
270
|
+
options.addOption("B", null, "[0-2]");
|
271
|
+
options.addOption("I");
|
272
|
+
options.addOption("L", null, "[uwm]");
|
273
|
+
options.addOption("d");
|
274
|
+
options.addOption("c");
|
275
|
+
options.addOption("m", null, "[BQN0]");
|
276
|
+
options.addOption("M", null, "[BQ]");
|
277
|
+
options.addOption(null, "fj");
|
278
|
+
options.addOption(null, "unix");
|
279
|
+
options.addOption(null, "mac");
|
280
|
+
options.addOption(null, "msdos");
|
281
|
+
options.addOption(null, "windows");
|
282
|
+
options.addOption(null, "mime");
|
283
|
+
options.addOption(null, "base64");
|
284
|
+
options.addOption(null, "mime-input");
|
285
|
+
options.addOption(null, "base64-input");
|
286
|
+
options.addOption(null, "ic", "ic=(.*)");
|
287
|
+
options.addOption(null, "oc", "oc=(.*)");
|
288
|
+
options.addOption(null, "fb-skip");
|
289
|
+
options.addOption(null, "fb-html");
|
290
|
+
options.addOption(null, "fb-xml");
|
291
|
+
options.addOption(null, "fb-perl");
|
292
|
+
options.addOption(null, "fb-java");
|
293
|
+
options.addOption(null, "fb-subchar", "fb-subchar=(.*)");
|
294
|
+
options.addOption(null, "no-cp932ext");
|
295
|
+
options.addOption(null, "cap-input");
|
296
|
+
options.addOption(null, "url-input");
|
297
|
+
options.addOption(null, "numchar-input");
|
298
|
+
options.addOption(null, "no-best-fit-chars");
|
299
|
+
|
300
|
+
CommandParser parser = new CommandParser();
|
301
|
+
Command cmd = parser.parse(options, s);
|
302
|
+
return cmd;
|
303
|
+
}
|
304
|
+
|
305
|
+
private static Map<String, NKFCharset> parseOpt(String s) {
|
306
|
+
Map<String, NKFCharset> options = new HashMap<String, NKFCharset>();
|
307
|
+
|
308
|
+
// default options
|
309
|
+
options.put("input", NKFCharset.AUTO);
|
310
|
+
options.put("output", NKFCharset.JIS);
|
311
|
+
options.put("mime-decode", NKFCharset.MIME_DETECT);
|
312
|
+
options.put("mime-encode", NKFCharset.NOCONV);
|
313
|
+
|
314
|
+
Command cmd = parseOption(s);
|
315
|
+
if (cmd.hasOption("j")) {
|
316
|
+
options.put("output", NKFCharset.JIS);
|
317
|
+
}
|
318
|
+
if (cmd.hasOption("s")) {
|
319
|
+
options.put("output", NKFCharset.SJIS);
|
320
|
+
}
|
321
|
+
if (cmd.hasOption("e")) {
|
322
|
+
options.put("output", NKFCharset.EUC);
|
323
|
+
}
|
324
|
+
if (cmd.hasOption("w")) {
|
325
|
+
Option opt = cmd.getOption("w");
|
326
|
+
if ("32".equals(opt.getValue())) {
|
327
|
+
options.put("output", NKFCharset.UTF32);
|
328
|
+
} else if("16".equals(opt.getValue())) {
|
329
|
+
options.put("output", NKFCharset.UTF16);
|
330
|
+
} else {
|
331
|
+
options.put("output", NKFCharset.UTF8);
|
332
|
+
}
|
333
|
+
}
|
334
|
+
if (cmd.hasOption("J")) {
|
335
|
+
options.put("input", NKFCharset.JIS);
|
336
|
+
}
|
337
|
+
if (cmd.hasOption("S")) {
|
338
|
+
options.put("input", NKFCharset.SJIS);
|
339
|
+
}
|
340
|
+
if (cmd.hasOption("E")) {
|
341
|
+
options.put("input", NKFCharset.EUC);
|
342
|
+
}
|
343
|
+
if (cmd.hasOption("W")) {
|
344
|
+
Option opt = cmd.getOption("W");
|
345
|
+
if ("32".equals(opt.getValue())) {
|
346
|
+
options.put("input", NKFCharset.UTF32);
|
347
|
+
} else if("16".equals(opt.getValue())) {
|
348
|
+
options.put("input", NKFCharset.UTF16);
|
349
|
+
} else {
|
350
|
+
options.put("input", NKFCharset.UTF8);
|
351
|
+
}
|
352
|
+
}
|
353
|
+
if (cmd.hasOption("m")) {
|
354
|
+
Option opt = cmd.getOption("m");
|
355
|
+
if (opt.getValue() == null) {
|
356
|
+
options.put("mime-decode", NKFCharset.MIME_DETECT);
|
357
|
+
} else if ("B".equals(opt.getValue())) {
|
358
|
+
options.put("mime-decode", NKFCharset.BASE64);
|
359
|
+
} else if ("Q".equals(opt.getValue())) {
|
360
|
+
options.put("mime-decode", NKFCharset.QENCODE);
|
361
|
+
} else if ("N".equals(opt.getValue())) {
|
362
|
+
// TODO: non-strict option
|
363
|
+
} else if ("0".equals(opt.getValue())) {
|
364
|
+
options.put("mime-decode", NKFCharset.NOCONV);
|
365
|
+
}
|
366
|
+
}
|
367
|
+
if (cmd.hasOption("M")) {
|
368
|
+
Option opt = cmd.getOption("M");
|
369
|
+
if (opt.getValue() == null) {
|
370
|
+
options.put("mime-encode", NKFCharset.NOCONV);
|
371
|
+
} else if ("B".equals(opt.getValue())) {
|
372
|
+
options.put("mime-encode", NKFCharset.BASE64);
|
373
|
+
} else if ("Q".equals(opt.getValue())) {
|
374
|
+
options.put("mime-encode", NKFCharset.QENCODE);
|
375
|
+
}
|
376
|
+
}
|
377
|
+
if (cmd.hasOption("base64")) {
|
378
|
+
options.put("mime-encode", NKFCharset.BASE64);
|
379
|
+
}
|
380
|
+
if (cmd.hasOption("oc")) {
|
381
|
+
Option opt = cmd.getOption("oc");
|
382
|
+
if ("ISO-2022-JP".compareToIgnoreCase(opt.getValue()) == 0) {
|
383
|
+
options.put("output", NKFCharset.JIS);
|
384
|
+
} else if ("EUC-JP".compareToIgnoreCase(opt.getValue()) == 0) {
|
385
|
+
options.put("output", NKFCharset.EUC);
|
386
|
+
} else if ("CP932".compareToIgnoreCase(opt.getValue()) == 0) {
|
387
|
+
options.put("output", NKFCharset.SJIS);
|
388
|
+
} else if ("Shift_JIS".compareToIgnoreCase(opt.getValue()) == 0) {
|
389
|
+
options.put("output", NKFCharset.SJIS);
|
390
|
+
} else if ("Windows-31J".compareToIgnoreCase(opt.getValue()) == 0) {
|
391
|
+
options.put("output", NKFCharset.JIS);
|
392
|
+
} else if ("UTF-8".compareToIgnoreCase(opt.getValue()) == 0) {
|
393
|
+
options.put("output", NKFCharset.UTF8);
|
394
|
+
} else if ("UTF-8N".compareToIgnoreCase(opt.getValue()) == 0) {
|
395
|
+
options.put("output", NKFCharset.UTF8);
|
396
|
+
} else if ("UTF-16".compareToIgnoreCase(opt.getValue()) == 0) {
|
397
|
+
options.put("output", NKFCharset.UTF16);
|
398
|
+
} else if ("UTF-16BE-BOM".compareToIgnoreCase(opt.getValue()) == 0) {
|
399
|
+
options.put("output", NKFCharset.UTF16);
|
400
|
+
} else if ("UTF-32".compareToIgnoreCase(opt.getValue()) == 0) {
|
401
|
+
options.put("output", NKFCharset.UTF32);
|
402
|
+
} else if ("UTF-32BE-BOM".compareToIgnoreCase(opt.getValue()) == 0) {
|
403
|
+
options.put("output", NKFCharset.UTF32);
|
404
|
+
}
|
405
|
+
}
|
406
|
+
if (cmd.hasOption("ic")) {
|
407
|
+
Option opt = cmd.getOption("ic");
|
408
|
+
if ("ISO-2022-JP".compareToIgnoreCase(opt.getValue()) == 0) {
|
409
|
+
options.put("input", NKFCharset.JIS);
|
410
|
+
} else if ("EUC-JP".compareToIgnoreCase(opt.getValue()) == 0) {
|
411
|
+
options.put("input", NKFCharset.EUC);
|
412
|
+
} else if ("CP932".compareToIgnoreCase(opt.getValue()) == 0) {
|
413
|
+
options.put("input", NKFCharset.SJIS);
|
414
|
+
} else if ("Shift_JIS".compareToIgnoreCase(opt.getValue()) == 0) {
|
415
|
+
options.put("input", NKFCharset.SJIS);
|
416
|
+
} else if ("Windows-31J".compareToIgnoreCase(opt.getValue()) == 0) {
|
417
|
+
options.put("input", NKFCharset.SJIS);
|
418
|
+
} else if ("UTF-8".compareToIgnoreCase(opt.getValue()) == 0) {
|
419
|
+
options.put("input", NKFCharset.UTF8);
|
420
|
+
} else if ("UTF-8N".compareToIgnoreCase(opt.getValue()) == 0) {
|
421
|
+
options.put("input", NKFCharset.UTF8);
|
422
|
+
} else if ("UTF-16".compareToIgnoreCase(opt.getValue()) == 0) {
|
423
|
+
options.put("input", NKFCharset.UTF16);
|
424
|
+
} else if ("UTF-16BE-BOM".compareToIgnoreCase(opt.getValue()) == 0) {
|
425
|
+
options.put("input", NKFCharset.UTF16);
|
426
|
+
} else if ("UTF-32".compareToIgnoreCase(opt.getValue()) == 0) {
|
427
|
+
options.put("input", NKFCharset.UTF32);
|
428
|
+
} else if ("UTF-32BE-BOM".compareToIgnoreCase(opt.getValue()) == 0) {
|
429
|
+
options.put("input", NKFCharset.UTF32);
|
430
|
+
}
|
431
|
+
}
|
432
|
+
|
433
|
+
return options;
|
434
|
+
}
|
435
|
+
|
436
|
+
static abstract class Converter {
|
437
|
+
|
438
|
+
protected final ThreadContext context;
|
439
|
+
protected final Map<String, NKFCharset> options;
|
440
|
+
|
441
|
+
public Converter(ThreadContext ctx, Map<String, NKFCharset> opt) {
|
442
|
+
context = ctx;
|
443
|
+
options = opt;
|
444
|
+
}
|
445
|
+
|
446
|
+
static boolean isMimeText(ByteList str, Map<String, NKFCharset> options) {
|
447
|
+
if (str.length() <= 6) {
|
448
|
+
return false;
|
449
|
+
}
|
450
|
+
if (options.get("mime-decode") == NKFCharset.NOCONV) {
|
451
|
+
return false;
|
452
|
+
}
|
453
|
+
if (str.indexOf(BEGIN_MIME_STRING) < 0) {
|
454
|
+
return false;
|
455
|
+
}
|
456
|
+
if (str.lastIndexOf(END_MIME_STRING) < 0) {
|
457
|
+
return false;
|
458
|
+
}
|
459
|
+
return true;
|
460
|
+
}
|
461
|
+
|
462
|
+
private static RubyString encodeMimeString(Ruby runtime, RubyString str, ByteList format) {
|
463
|
+
RubyArray array = RubyArray.newArray(runtime, str);
|
464
|
+
return Pack.pack(runtime, array, format).chomp(runtime.getCurrentContext());
|
465
|
+
}
|
466
|
+
|
467
|
+
abstract RubyString convert(ByteList str);
|
468
|
+
|
469
|
+
ByteList convert_byte(ByteList str, String inputCharset, NKFCharset output) {
|
470
|
+
String outputCharset = output.getCharset();
|
471
|
+
|
472
|
+
if (inputCharset == null) {
|
473
|
+
inputCharset = str.getEncoding().toString();
|
474
|
+
}
|
475
|
+
|
476
|
+
if (outputCharset.equals(inputCharset)) {
|
477
|
+
return str.dup();
|
478
|
+
}
|
479
|
+
|
480
|
+
byte[] outCharsetBytes = outputCharset.getBytes();
|
481
|
+
|
482
|
+
EConv ec = EncodingUtils.econvOpenOpts(context, inputCharset.getBytes(), outCharsetBytes, 0, context.nil);
|
483
|
+
|
484
|
+
if (ec == null) {
|
485
|
+
throw context.runtime.newArgumentError("invalid encoding pair: " + inputCharset + " to " + outputCharset);
|
486
|
+
}
|
487
|
+
|
488
|
+
ByteList converted = EncodingUtils.econvStrConvert(context, ec, str, EConvFlags.INVALID_REPLACE);
|
489
|
+
|
490
|
+
converted.setEncoding(context.runtime.getEncodingService().findEncodingOrAliasEntry(outCharsetBytes).getEncoding());
|
491
|
+
|
492
|
+
return converted;
|
493
|
+
}
|
494
|
+
}
|
495
|
+
|
496
|
+
static class DefaultConverter extends Converter {
|
497
|
+
|
498
|
+
public DefaultConverter(ThreadContext ctx, Map<String, NKFCharset> opt) {
|
499
|
+
super(ctx, opt);
|
500
|
+
}
|
501
|
+
|
502
|
+
RubyString convert(ByteList str) {
|
503
|
+
NKFCharset input = options.get("input");
|
504
|
+
NKFCharset output = options.get("output");
|
505
|
+
ByteList b = convert_byte(str,
|
506
|
+
input.getCharset(),
|
507
|
+
output);
|
508
|
+
return context.runtime.newString(b);
|
509
|
+
}
|
510
|
+
}
|
511
|
+
|
512
|
+
static class MimeConverter extends Converter {
|
513
|
+
|
514
|
+
public MimeConverter(ThreadContext ctx, Map<String, NKFCharset> opt) {
|
515
|
+
super(ctx, opt);
|
516
|
+
}
|
517
|
+
|
518
|
+
private String detectCharset(String charset) {
|
519
|
+
if (charset.compareToIgnoreCase(NKFCharset.UTF8.getCharset()) == 0) {
|
520
|
+
return NKFCharset.UTF8.getCharset();
|
521
|
+
} else if (charset.compareToIgnoreCase(NKFCharset.JIS.getCharset()) == 0) {
|
522
|
+
return NKFCharset.JIS.getCharset();
|
523
|
+
} else if (charset.compareToIgnoreCase(NKFCharset.EUC.getCharset()) == 0) {
|
524
|
+
return NKFCharset.EUC.getCharset();
|
525
|
+
} else {
|
526
|
+
return NKFCharset.ASCII.getCharset();
|
527
|
+
}
|
528
|
+
}
|
529
|
+
|
530
|
+
private ByteList decodeMimeString(String str) {
|
531
|
+
String[] mime = str.split("^=\\?|\\?|\\?=$");
|
532
|
+
String charset = detectCharset(mime[1]);
|
533
|
+
int encode = mime[2].charAt(0);
|
534
|
+
RubyString body = EncodingUtils.newExternalStringWithEncoding(context.runtime, mime[3], ASCIIEncoding.INSTANCE);
|
535
|
+
|
536
|
+
final RubyArray<?> array;
|
537
|
+
if ('B' == encode || 'b' == encode) { // BASE64
|
538
|
+
array = Pack.unpack(context, body, PACK_BASE64);
|
539
|
+
} else { // Qencode
|
540
|
+
array = Pack.unpack(context, body, PACK_QENCODE);
|
541
|
+
}
|
542
|
+
RubyString s = (RubyString) array.entry(0);
|
543
|
+
ByteList decodeStr = s.asString().getByteList();
|
544
|
+
|
545
|
+
return convert_byte(decodeStr, charset, options.get("output"));
|
546
|
+
}
|
547
|
+
|
548
|
+
RubyString makeRubyString(ArrayList<ByteList> list) {
|
549
|
+
ByteList r = new ByteList();
|
550
|
+
for (ByteList l : list) {
|
551
|
+
r.append(l);
|
552
|
+
}
|
553
|
+
return context.runtime.newString(r);
|
554
|
+
}
|
555
|
+
|
556
|
+
RubyString convert(ByteList str) {
|
557
|
+
String s = Helpers.decodeByteList(context.runtime, str);
|
558
|
+
String[] token = s.split("\\s");
|
559
|
+
ArrayList<ByteList> raw_data = new ArrayList<ByteList>();
|
560
|
+
|
561
|
+
for (int i = 0; i < token.length; i++) {
|
562
|
+
raw_data.add(decodeMimeString(token[i]));
|
563
|
+
}
|
564
|
+
|
565
|
+
return makeRubyString(raw_data);
|
566
|
+
}
|
567
|
+
|
568
|
+
}
|
569
|
+
|
570
|
+
@Deprecated
|
571
|
+
public static final NKFCharset AUTO = NKFCharset.AUTO;
|
572
|
+
// no ISO-2022-JP in jcodings
|
573
|
+
@Deprecated
|
574
|
+
public static final NKFCharset JIS = NKFCharset.JIS;
|
575
|
+
@Deprecated
|
576
|
+
public static final NKFCharset EUC = NKFCharset.EUC;
|
577
|
+
@Deprecated
|
578
|
+
public static final NKFCharset SJIS = NKFCharset.SJIS;
|
579
|
+
@Deprecated
|
580
|
+
public static final NKFCharset BINARY = NKFCharset.BINARY;
|
581
|
+
@Deprecated
|
582
|
+
public static final NKFCharset NOCONV = NKFCharset.NOCONV;
|
583
|
+
@Deprecated
|
584
|
+
public static final NKFCharset UNKNOWN = NKFCharset.UNKNOWN;
|
585
|
+
@Deprecated
|
586
|
+
public static final NKFCharset ASCII = NKFCharset.ASCII;
|
587
|
+
@Deprecated
|
588
|
+
public static final NKFCharset UTF8 = NKFCharset.UTF8;
|
589
|
+
@Deprecated
|
590
|
+
public static final NKFCharset UTF16 = NKFCharset.UTF16;
|
591
|
+
@Deprecated
|
592
|
+
public static final NKFCharset UTF32 = NKFCharset.UTF32;
|
593
|
+
@Deprecated
|
594
|
+
public static final NKFCharset OTHER = NKFCharset.OTHER;
|
595
|
+
@Deprecated
|
596
|
+
public static final NKFCharset BASE64 = NKFCharset.BASE64;
|
597
|
+
@Deprecated
|
598
|
+
public static final NKFCharset QENCODE = NKFCharset.QENCODE;
|
599
|
+
@Deprecated
|
600
|
+
public static final NKFCharset MIME_DETECT = NKFCharset.MIME_DETECT;
|
601
|
+
}
|
data/ext/nkf/nkf.c
CHANGED
@@ -9,6 +9,7 @@
|
|
9
9
|
|
10
10
|
#define RUBY_NKF_REVISION "$Revision$"
|
11
11
|
#define RUBY_NKF_VERSION NKF_VERSION " (" NKF_RELEASE_DATE ")"
|
12
|
+
#define NKF_GEM_VERSION "0.2.0"
|
12
13
|
|
13
14
|
#include "ruby/ruby.h"
|
14
15
|
#include "ruby/encoding.h"
|
@@ -500,4 +501,6 @@ Init_nkf(void)
|
|
500
501
|
rb_define_const(mNKF, "NKF_VERSION", rb_str_new2(NKF_VERSION));
|
501
502
|
/* Release date of nkf */
|
502
503
|
rb_define_const(mNKF, "NKF_RELEASE_DATE", rb_str_new2(NKF_RELEASE_DATE));
|
504
|
+
/* Version of nkf library */
|
505
|
+
rb_define_const(mNKF, "GEM_VERSION", rb_str_new_cstr(NKF_GEM_VERSION));
|
503
506
|
}
|
data/lib/nkf.rb
ADDED
data/nkf.gemspec
CHANGED
@@ -1,8 +1,18 @@
|
|
1
|
+
source_version = ["", "ext/nkf/"].find do |dir|
|
2
|
+
begin
|
3
|
+
break File.open(File.join(__dir__, "#{dir}nkf.c")) {|f|
|
4
|
+
f.gets("\n#define NKF_GEM_VERSION ")
|
5
|
+
f.gets[/\s*"(.+)"/, 1]
|
6
|
+
}
|
7
|
+
rescue Errno::ENOENT
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
1
11
|
Gem::Specification.new do |spec|
|
2
12
|
spec.name = "nkf"
|
3
|
-
spec.version =
|
4
|
-
spec.authors = ["NARUSE Yui"]
|
5
|
-
spec.email = ["naruse@airemix.jp"]
|
13
|
+
spec.version = source_version
|
14
|
+
spec.authors = ["NARUSE Yui", "Charles Oliver Nutter"]
|
15
|
+
spec.email = ["naruse@airemix.jp", "headius@headius.com"]
|
6
16
|
|
7
17
|
spec.summary = %q{Ruby extension for Network Kanji Filter}
|
8
18
|
spec.description = %q{Ruby extension for Network Kanji Filter}
|
@@ -18,6 +28,15 @@ Gem::Specification.new do |spec|
|
|
18
28
|
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
19
29
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
20
30
|
end
|
31
|
+
|
32
|
+
if Gem::Platform === spec.platform and spec.platform =~ 'java' or RUBY_ENGINE == 'jruby'
|
33
|
+
spec.platform = 'java'
|
34
|
+
spec.licenses += ["EPL-2.0", "LGPL-2.1"]
|
35
|
+
spec.files += Dir["lib/nkf.jar"]
|
36
|
+
else
|
37
|
+
spec.extensions = ["ext/nkf/extconf.rb"]
|
38
|
+
end
|
39
|
+
|
21
40
|
spec.bindir = "exe"
|
22
41
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
23
42
|
spec.require_paths = ["lib"]
|
metadata
CHANGED
@@ -1,20 +1,23 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nkf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- NARUSE Yui
|
8
|
+
- Charles Oliver Nutter
|
8
9
|
autorequire:
|
9
10
|
bindir: exe
|
10
11
|
cert_chain: []
|
11
|
-
date:
|
12
|
+
date: 2024-01-22 00:00:00.000000000 Z
|
12
13
|
dependencies: []
|
13
14
|
description: Ruby extension for Network Kanji Filter
|
14
15
|
email:
|
15
16
|
- naruse@airemix.jp
|
17
|
+
- headius@headius.com
|
16
18
|
executables: []
|
17
|
-
extensions:
|
19
|
+
extensions:
|
20
|
+
- ext/nkf/extconf.rb
|
18
21
|
extra_rdoc_files: []
|
19
22
|
files:
|
20
23
|
- ".git-blame-ignore-revs"
|
@@ -27,6 +30,12 @@ files:
|
|
27
30
|
- Rakefile
|
28
31
|
- bin/console
|
29
32
|
- bin/setup
|
33
|
+
- ext/java/org/jruby/ext/nkf/Command.java
|
34
|
+
- ext/java/org/jruby/ext/nkf/CommandParser.java
|
35
|
+
- ext/java/org/jruby/ext/nkf/NKFLibrary.java
|
36
|
+
- ext/java/org/jruby/ext/nkf/Option.java
|
37
|
+
- ext/java/org/jruby/ext/nkf/Options.java
|
38
|
+
- ext/java/org/jruby/ext/nkf/RubyNKF.java
|
30
39
|
- ext/nkf/extconf.rb
|
31
40
|
- ext/nkf/nkf-utf8/config.h
|
32
41
|
- ext/nkf/nkf-utf8/nkf.c
|
@@ -35,6 +44,7 @@ files:
|
|
35
44
|
- ext/nkf/nkf-utf8/utf8tbl.h
|
36
45
|
- ext/nkf/nkf.c
|
37
46
|
- lib/kconv.rb
|
47
|
+
- lib/nkf.rb
|
38
48
|
- nkf.gemspec
|
39
49
|
homepage: https://github.com/ruby/nkf
|
40
50
|
licenses:
|
@@ -58,7 +68,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
58
68
|
- !ruby/object:Gem::Version
|
59
69
|
version: '0'
|
60
70
|
requirements: []
|
61
|
-
rubygems_version: 3.
|
71
|
+
rubygems_version: 3.6.0.dev
|
62
72
|
signing_key:
|
63
73
|
specification_version: 4
|
64
74
|
summary: Ruby extension for Network Kanji Filter
|