nkf 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +9 -2
- data/.gitignore +3 -0
- data/Rakefile +14 -2
- data/ext/java/org/jruby/ext/nkf/Command.java +58 -0
- data/ext/java/org/jruby/ext/nkf/CommandParser.java +70 -0
- data/ext/java/org/jruby/ext/nkf/NKFLibrary.java +13 -0
- data/ext/java/org/jruby/ext/nkf/Option.java +80 -0
- data/ext/java/org/jruby/ext/nkf/Options.java +109 -0
- data/ext/java/org/jruby/ext/nkf/RubyNKF.java +601 -0
- data/ext/nkf/nkf.c +1 -1
- data/lib/nkf.rb +6 -0
- data/nkf.gemspec +11 -3
- metadata +12 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f82f8f21210cbbd491307967db9c664a2afc2707edadb6a93151391c273d83d9
|
4
|
+
data.tar.gz: 91c3a5ef30e7cc7d6f11d4fad788f16ceec857dc2f988fd7a0c70b36ca322d38
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6c50ad8960ac5ac3f16b2956a30f483753ab0bc394e507b335ea0b85b44ee306a23842789457a5c220431667ddca34b9c121b74cd238acc84a7c1964d6ef59aa
|
7
|
+
data.tar.gz: c63500824d58fa3a27c119bc20dc49237e3be1c5f1d531606af5514d6d79f860f99ca44d8b12388773024f79374994fb2b1bcd96cdbb543c549dda21edf4a796
|
data/.github/workflows/test.yml
CHANGED
@@ -3,15 +3,22 @@ name: build
|
|
3
3
|
on: [push, pull_request]
|
4
4
|
|
5
5
|
jobs:
|
6
|
+
ruby-versions:
|
7
|
+
uses: ruby/actions/.github/workflows/ruby_versions.yml@master
|
8
|
+
with:
|
9
|
+
engine: cruby-jruby
|
10
|
+
min_version: 2.5
|
11
|
+
|
6
12
|
build:
|
13
|
+
needs: ruby-versions
|
7
14
|
name: build (${{ matrix.ruby }} / ${{ matrix.os }})
|
8
15
|
strategy:
|
9
16
|
matrix:
|
10
|
-
ruby:
|
17
|
+
ruby: ${{ fromJson(needs.ruby-versions.outputs.versions) }}
|
11
18
|
os: [ ubuntu-latest, macos-latest ]
|
12
19
|
runs-on: ${{ matrix.os }}
|
13
20
|
steps:
|
14
|
-
- uses: actions/checkout@
|
21
|
+
- uses: actions/checkout@v4
|
15
22
|
- name: Set up Ruby
|
16
23
|
uses: ruby/setup-ruby@v1
|
17
24
|
with:
|
data/.gitignore
CHANGED
data/Rakefile
CHANGED
@@ -7,6 +7,18 @@ Rake::TestTask.new(:test) do |t|
|
|
7
7
|
t.test_files = FileList["test/**/test_*.rb"]
|
8
8
|
end
|
9
9
|
|
10
|
-
|
11
|
-
|
10
|
+
if RUBY_ENGINE == "jruby"
|
11
|
+
require "rake/javaextensiontask"
|
12
|
+
Rake::JavaExtensionTask.new("nkf") do |ext|
|
13
|
+
ext.source_version = "1.8"
|
14
|
+
ext.target_version = "1.8"
|
15
|
+
ext.ext_dir = "ext/java"
|
16
|
+
end
|
17
|
+
|
18
|
+
task :build => :compile
|
19
|
+
else
|
20
|
+
require 'rake/extensiontask'
|
21
|
+
Rake::ExtensionTask.new("nkf")
|
22
|
+
end
|
23
|
+
|
12
24
|
task :default => :test
|
@@ -0,0 +1,58 @@
|
|
1
|
+
/***** BEGIN LICENSE BLOCK *****
|
2
|
+
* Version: EPL 2.0/LGPL 2.1
|
3
|
+
*
|
4
|
+
* The contents of this file are subject to the Eclipse Public
|
5
|
+
* License Version 2.0 (the "License"); you may not use this file
|
6
|
+
* except in compliance with the License. You may obtain a copy of
|
7
|
+
* the License at http://www.eclipse.org/legal/epl-v20.html
|
8
|
+
*
|
9
|
+
* Software distributed under the License is distributed on an "AS
|
10
|
+
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
11
|
+
* implied. See the License for the specific language governing
|
12
|
+
* rights and limitations under the License.
|
13
|
+
*
|
14
|
+
* Copyright (C) 2011 Koichiro Ohba <koichiro@meadowy.org>
|
15
|
+
*
|
16
|
+
* Alternatively, the contents of this file may be used under the terms of
|
17
|
+
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
18
|
+
* in which case the provisions of the LGPL are applicable instead
|
19
|
+
* of those above. If you wish to allow use of your version of this file only
|
20
|
+
* under the terms of either the LGPL, and not to allow others to
|
21
|
+
* use your version of this file under the terms of the EPL, indicate your
|
22
|
+
* decision by deleting the provisions above and replace them with the notice
|
23
|
+
* and other provisions required by the LGPL. If you do not delete
|
24
|
+
* the provisions above, a recipient may use your version of this file under
|
25
|
+
* the terms of any one of the EPL, the LGPL.
|
26
|
+
***** END LICENSE BLOCK *****/
|
27
|
+
|
28
|
+
package org.jruby.ext.nkf;
|
29
|
+
|
30
|
+
import java.util.List;
|
31
|
+
import java.util.ArrayList;
|
32
|
+
|
33
|
+
public class Command {
|
34
|
+
private final List<Option> options = new ArrayList<Option>();
|
35
|
+
public boolean hasOption(String opt) {
|
36
|
+
for (Option option : options) {
|
37
|
+
if (opt.equals(option.getOpt())) return true;
|
38
|
+
if (opt.equals(option.getLongOpt())) return true;
|
39
|
+
}
|
40
|
+
return false;
|
41
|
+
}
|
42
|
+
public void addOption(Option opt) {
|
43
|
+
options.add(opt);
|
44
|
+
}
|
45
|
+
public Option getOption(String opt) {
|
46
|
+
for (Option option : options) {
|
47
|
+
if (opt.equals(option.getOpt())) return option;
|
48
|
+
if (opt.equals(option.getLongOpt())) return option;
|
49
|
+
}
|
50
|
+
return null;
|
51
|
+
}
|
52
|
+
public String getOptionValue(String opt) {
|
53
|
+
return getOption(opt).getValue();
|
54
|
+
}
|
55
|
+
public String toString() {
|
56
|
+
return options.toString();
|
57
|
+
}
|
58
|
+
}
|
@@ -0,0 +1,70 @@
|
|
1
|
+
/***** BEGIN LICENSE BLOCK *****
|
2
|
+
* Version: EPL 2.0/LGPL 2.1
|
3
|
+
*
|
4
|
+
* The contents of this file are subject to the Eclipse Public
|
5
|
+
* License Version 2.0 (the "License"); you may not use this file
|
6
|
+
* except in compliance with the License. You may obtain a copy of
|
7
|
+
* the License at http://www.eclipse.org/legal/epl-v20.html
|
8
|
+
*
|
9
|
+
* Software distributed under the License is distributed on an "AS
|
10
|
+
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
11
|
+
* implied. See the License for the specific language governing
|
12
|
+
* rights and limitations under the License.
|
13
|
+
*
|
14
|
+
* Copyright (C) 2011 Koichiro Ohba <koichiro@meadowy.org>
|
15
|
+
*
|
16
|
+
* Alternatively, the contents of this file may be used under the terms of
|
17
|
+
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
18
|
+
* in which case the provisions of the LGPL are applicable instead
|
19
|
+
* of those above. If you wish to allow use of your version of this file only
|
20
|
+
* under the terms of either the LGPL, and not to allow others to
|
21
|
+
* use your version of this file under the terms of the EPL, indicate your
|
22
|
+
* decision by deleting the provisions above and replace them with the notice
|
23
|
+
* and other provisions required by the LGPL. If you do not delete
|
24
|
+
* the provisions above, a recipient may use your version of this file under
|
25
|
+
* the terms of any one of the EPL, the LGPL.
|
26
|
+
***** END LICENSE BLOCK *****/
|
27
|
+
|
28
|
+
package org.jruby.ext.nkf;
|
29
|
+
|
30
|
+
public class CommandParser {
|
31
|
+
public Command parse(Options opt, String args) {
|
32
|
+
Command cc = new Command();
|
33
|
+
String[] tokens = args.split("\\s");
|
34
|
+
for (int i = 0; i < tokens.length; i++) {
|
35
|
+
// long option
|
36
|
+
if (tokens[i].startsWith("--")) {
|
37
|
+
String s = stripDash(tokens[i]);
|
38
|
+
if (opt.hasLongOption(s)) {
|
39
|
+
cc.addOption(opt.matchLongOption(s));
|
40
|
+
}
|
41
|
+
} else {
|
42
|
+
// short option
|
43
|
+
String s = stripDash(tokens[i]);
|
44
|
+
int max = s.length();
|
45
|
+
for (int j = 0; j < max; j++) {
|
46
|
+
if (opt.hasShortOption(s)) {
|
47
|
+
Option cmd = opt.matchShortOption(s);
|
48
|
+
if (cmd.getValue() != null) {
|
49
|
+
int op_len = cmd.getValue().length();
|
50
|
+
s = s.substring(op_len);
|
51
|
+
j = j + op_len;
|
52
|
+
}
|
53
|
+
cc.addOption(cmd);
|
54
|
+
}
|
55
|
+
s = s.substring(1);
|
56
|
+
}
|
57
|
+
}
|
58
|
+
}
|
59
|
+
return cc;
|
60
|
+
}
|
61
|
+
private String stripDash(String s) {
|
62
|
+
if (s.startsWith("--")) {
|
63
|
+
return s.substring(2, s.length());
|
64
|
+
} else if (s.startsWith("-")) {
|
65
|
+
return s.substring(1, s.length());
|
66
|
+
} else {
|
67
|
+
return s;
|
68
|
+
}
|
69
|
+
}
|
70
|
+
}
|
@@ -0,0 +1,13 @@
|
|
1
|
+
package org.jruby.ext.nkf;
|
2
|
+
|
3
|
+
import org.jruby.Ruby;
|
4
|
+
import org.jruby.runtime.load.Library;
|
5
|
+
|
6
|
+
import java.io.IOException;
|
7
|
+
|
8
|
+
public class NKFLibrary implements Library {
|
9
|
+
@Override
|
10
|
+
public void load(Ruby ruby, boolean b) throws IOException {
|
11
|
+
RubyNKF.load(ruby);
|
12
|
+
}
|
13
|
+
}
|
@@ -0,0 +1,80 @@
|
|
1
|
+
/***** BEGIN LICENSE BLOCK *****
|
2
|
+
* Version: EPL 2.0/LGPL 2.1
|
3
|
+
*
|
4
|
+
* The contents of this file are subject to the Eclipse Public
|
5
|
+
* License Version 2.0 (the "License"); you may not use this file
|
6
|
+
* except in compliance with the License. You may obtain a copy of
|
7
|
+
* the License at http://www.eclipse.org/legal/epl-v20.html
|
8
|
+
*
|
9
|
+
* Software distributed under the License is distributed on an "AS
|
10
|
+
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
11
|
+
* implied. See the License for the specific language governing
|
12
|
+
* rights and limitations under the License.
|
13
|
+
*
|
14
|
+
* Copyright (C) 2011 Koichiro Ohba <koichiro@meadowy.org>
|
15
|
+
*
|
16
|
+
* Alternatively, the contents of this file may be used under the terms of
|
17
|
+
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
18
|
+
* in which case the provisions of the LGPL are applicable instead
|
19
|
+
* of those above. If you wish to allow use of your version of this file only
|
20
|
+
* under the terms of either the LGPL, and not to allow others to
|
21
|
+
* use your version of this file under the terms of the EPL, indicate your
|
22
|
+
* decision by deleting the provisions above and replace them with the notice
|
23
|
+
* and other provisions required by the LGPL. If you do not delete
|
24
|
+
* the provisions above, a recipient may use your version of this file under
|
25
|
+
* the terms of any one of the EPL, the LGPL.
|
26
|
+
***** END LICENSE BLOCK *****/
|
27
|
+
|
28
|
+
package org.jruby.ext.nkf;
|
29
|
+
|
30
|
+
import java.util.regex.Pattern;
|
31
|
+
|
32
|
+
public class Option {
|
33
|
+
private final String opt;
|
34
|
+
private final String longOpt;
|
35
|
+
private boolean hasArg = false;
|
36
|
+
private String value = null;
|
37
|
+
private Pattern pattern;
|
38
|
+
|
39
|
+
public Option(String opt, String longOpt, String pattern) {
|
40
|
+
this.opt = opt;
|
41
|
+
this.longOpt = longOpt;
|
42
|
+
if (pattern != null) {
|
43
|
+
this.hasArg = true;
|
44
|
+
this.pattern = Pattern.compile(pattern);
|
45
|
+
}
|
46
|
+
}
|
47
|
+
String getOpt() { return opt; }
|
48
|
+
String getLongOpt() { return longOpt; }
|
49
|
+
boolean hasShortOpt() {
|
50
|
+
return opt != null;
|
51
|
+
}
|
52
|
+
boolean hasLongOpt() {
|
53
|
+
return longOpt != null;
|
54
|
+
}
|
55
|
+
boolean hasArg() {
|
56
|
+
return hasArg;
|
57
|
+
}
|
58
|
+
public String getValue() {
|
59
|
+
return value;
|
60
|
+
}
|
61
|
+
void setValue(String v) {
|
62
|
+
value = v;
|
63
|
+
}
|
64
|
+
String getKey() {
|
65
|
+
if (opt == null)
|
66
|
+
return longOpt;
|
67
|
+
else
|
68
|
+
return opt;
|
69
|
+
}
|
70
|
+
Pattern pattern() {
|
71
|
+
return pattern;
|
72
|
+
}
|
73
|
+
public String toString() {
|
74
|
+
return "[opt: " + opt
|
75
|
+
+ " longOpt: " + longOpt
|
76
|
+
+ " hasArg: " + hasArg
|
77
|
+
+ " pattern: " + pattern
|
78
|
+
+ " value: " + value + "]";
|
79
|
+
}
|
80
|
+
}
|
@@ -0,0 +1,109 @@
|
|
1
|
+
/***** BEGIN LICENSE BLOCK *****
|
2
|
+
* Version: EPL 2.0/LGPL 2.1
|
3
|
+
*
|
4
|
+
* The contents of this file are subject to the Eclipse Public
|
5
|
+
* License Version 2.0 (the "License"); you may not use this file
|
6
|
+
* except in compliance with the License. You may obtain a copy of
|
7
|
+
* the License at http://www.eclipse.org/legal/epl-v20.html
|
8
|
+
*
|
9
|
+
* Software distributed under the License is distributed on an "AS
|
10
|
+
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
11
|
+
* implied. See the License for the specific language governing
|
12
|
+
* rights and limitations under the License.
|
13
|
+
*
|
14
|
+
* Copyright (C) 2011 Koichiro Ohba <koichiro@meadowy.org>
|
15
|
+
*
|
16
|
+
* Alternatively, the contents of this file may be used under the terms of
|
17
|
+
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
18
|
+
* in which case the provisions of the LGPL are applicable instead
|
19
|
+
* of those above. If you wish to allow use of your version of this file only
|
20
|
+
* under the terms of either the LGPL, and not to allow others to
|
21
|
+
* use your version of this file under the terms of the EPL, indicate your
|
22
|
+
* decision by deleting the provisions above and replace them with the notice
|
23
|
+
* and other provisions required by the LGPL. If you do not delete
|
24
|
+
* the provisions above, a recipient may use your version of this file under
|
25
|
+
* the terms of any one of the EPL, the LGPL.
|
26
|
+
***** END LICENSE BLOCK *****/
|
27
|
+
|
28
|
+
package org.jruby.ext.nkf;
|
29
|
+
|
30
|
+
import java.util.Map;
|
31
|
+
import java.util.LinkedHashMap;
|
32
|
+
import java.util.regex.Matcher;
|
33
|
+
|
34
|
+
public class Options {
|
35
|
+
private final Map<String, Option> shortOpts = new LinkedHashMap<String, Option>();
|
36
|
+
private final Map<String, Option> longOpts = new LinkedHashMap<String, Option>();
|
37
|
+
|
38
|
+
public Options addOption(String opt) {
|
39
|
+
return addOption(opt, null);
|
40
|
+
}
|
41
|
+
public Options addOption(String opt, String longOpt) {
|
42
|
+
return addOption(opt, longOpt, null);
|
43
|
+
}
|
44
|
+
public Options addOption(String opt, String longOpt, String pattern) {
|
45
|
+
return addOption(new Option(opt, longOpt, pattern));
|
46
|
+
}
|
47
|
+
public Options addOption(Option opt) {
|
48
|
+
if (opt.hasLongOpt()) {
|
49
|
+
longOpts.put(opt.getLongOpt(), opt);
|
50
|
+
}
|
51
|
+
if (opt.hasShortOpt()) {
|
52
|
+
shortOpts.put(opt.getOpt(), opt);
|
53
|
+
}
|
54
|
+
return this;
|
55
|
+
}
|
56
|
+
boolean hasShortOption(String opt) {
|
57
|
+
for (Map.Entry<String , Option> e : shortOpts.entrySet()) {
|
58
|
+
if (opt.startsWith(e.getKey())) {
|
59
|
+
return true;
|
60
|
+
}
|
61
|
+
}
|
62
|
+
return false;
|
63
|
+
}
|
64
|
+
public Option matchShortOption(String opt) {
|
65
|
+
// independent of opt length
|
66
|
+
for (Map.Entry<String , Option> e : shortOpts.entrySet()) {
|
67
|
+
//System.out.println(opt + " = " + e.getKey());
|
68
|
+
if (opt.startsWith(e.getKey())) {
|
69
|
+
//System.out.println("match[" + e.getKey() + "]");
|
70
|
+
Option cmd = e.getValue();
|
71
|
+
if (cmd.hasArg()) {
|
72
|
+
Matcher m = cmd.pattern().matcher(opt);
|
73
|
+
if (m.find()) {
|
74
|
+
//System.out.println("regix[" + m.group() + "]");
|
75
|
+
cmd.setValue(m.group());
|
76
|
+
}
|
77
|
+
}
|
78
|
+
return cmd;
|
79
|
+
}
|
80
|
+
}
|
81
|
+
return null;
|
82
|
+
}
|
83
|
+
boolean hasLongOption(String opt) {
|
84
|
+
for (Map.Entry<String , Option> e : longOpts.entrySet()) {
|
85
|
+
if (opt.startsWith(e.getKey())) {
|
86
|
+
return true;
|
87
|
+
}
|
88
|
+
}
|
89
|
+
return false;
|
90
|
+
}
|
91
|
+
Option matchLongOption(String opt) {
|
92
|
+
for (Map.Entry<String , Option> e : longOpts.entrySet()) {
|
93
|
+
//System.out.println(opt + " = " + e.getKey());
|
94
|
+
if (opt.startsWith(e.getKey())) {
|
95
|
+
//System.out.println("match[" + e.getKey() + "]");
|
96
|
+
Option cmd = e.getValue();
|
97
|
+
if (cmd.hasArg()) {
|
98
|
+
Matcher m = cmd.pattern().matcher(opt);
|
99
|
+
if (m.find()) {
|
100
|
+
//System.out.println("regix[" + m.group() + "]");
|
101
|
+
cmd.setValue(m.group(1));
|
102
|
+
}
|
103
|
+
}
|
104
|
+
return cmd;
|
105
|
+
}
|
106
|
+
}
|
107
|
+
return null;
|
108
|
+
}
|
109
|
+
}
|
@@ -0,0 +1,601 @@
|
|
1
|
+
/***** BEGIN LICENSE BLOCK *****
|
2
|
+
* Version: EPL 2.0/LGPL 2.1
|
3
|
+
*
|
4
|
+
* The contents of this file are subject to the Eclipse Public
|
5
|
+
* License Version 2.0 (the "License"); you may not use this file
|
6
|
+
* except in compliance with the License. You may obtain a copy of
|
7
|
+
* the License at http://www.eclipse.org/legal/epl-v20.html
|
8
|
+
*
|
9
|
+
* Software distributed under the License is distributed on an "AS
|
10
|
+
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
11
|
+
* implied. See the License for the specific language governing
|
12
|
+
* rights and limitations under the License.
|
13
|
+
*
|
14
|
+
* Copyright (C) 2007-2011 Koichiro Ohba <koichiro@meadowy.org>
|
15
|
+
*
|
16
|
+
* Alternatively, the contents of this file may be used under the terms of
|
17
|
+
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
18
|
+
* in which case the provisions of the LGPL are applicable instead
|
19
|
+
* of those above. If you wish to allow use of your version of this file only
|
20
|
+
* under the terms of either the LGPL, and not to allow others to
|
21
|
+
* use your version of this file under the terms of the EPL, indicate your
|
22
|
+
* decision by deleting the provisions above and replace them with the notice
|
23
|
+
* and other provisions required by the LGPL. If you do not delete
|
24
|
+
* the provisions above, a recipient may use your version of this file under
|
25
|
+
* the terms of any one of the EPL, the LGPL.
|
26
|
+
***** END LICENSE BLOCK *****/
|
27
|
+
|
28
|
+
package org.jruby.ext.nkf;
|
29
|
+
|
30
|
+
import java.nio.ByteBuffer;
|
31
|
+
import java.nio.CharBuffer;
|
32
|
+
import java.nio.charset.CharacterCodingException;
|
33
|
+
import java.nio.charset.Charset;
|
34
|
+
import java.nio.charset.CharsetDecoder;
|
35
|
+
import java.nio.charset.CharsetEncoder;
|
36
|
+
import java.nio.charset.UnsupportedCharsetException;
|
37
|
+
import java.util.ArrayList;
|
38
|
+
import java.util.Map;
|
39
|
+
import java.util.HashMap;
|
40
|
+
|
41
|
+
import org.jcodings.Encoding;
|
42
|
+
import org.jcodings.specific.ASCIIEncoding;
|
43
|
+
import org.jcodings.specific.UTF8Encoding;
|
44
|
+
import org.jcodings.transcode.EConv;
|
45
|
+
import org.jcodings.transcode.EConvFlags;
|
46
|
+
import org.jruby.Ruby;
|
47
|
+
import org.jruby.RubyArray;
|
48
|
+
import org.jruby.RubyModule;
|
49
|
+
import org.jruby.RubyString;
|
50
|
+
|
51
|
+
import org.jruby.anno.JRubyMethod;
|
52
|
+
import org.jruby.anno.JRubyModule;
|
53
|
+
import org.jruby.runtime.Helpers;
|
54
|
+
import org.jruby.runtime.ThreadContext;
|
55
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
56
|
+
import org.jruby.util.ByteList;
|
57
|
+
import org.jruby.util.KCode;
|
58
|
+
import org.jruby.util.Pack;
|
59
|
+
import org.jruby.util.io.EncodingUtils;
|
60
|
+
|
61
|
+
@JRubyModule(name="NKF")
|
62
|
+
public class RubyNKF {
|
63
|
+
public static enum NKFCharset {
|
64
|
+
AUTO(0, "x-JISAutoDetect"),
|
65
|
+
// no ISO-2022-JP in jcodings
|
66
|
+
JIS(1, "ISO-2022-JP"),
|
67
|
+
EUC(2, "EUC-JP"),
|
68
|
+
SJIS(3, "Shift_JIS"),
|
69
|
+
BINARY(4, null),
|
70
|
+
NOCONV(4, null),
|
71
|
+
UNKNOWN(0, null),
|
72
|
+
ASCII(5, "iso-8859-1"),
|
73
|
+
UTF8(6, "UTF-8"),
|
74
|
+
UTF16(8, "UTF-16"),
|
75
|
+
UTF32(12, "UTF-32"),
|
76
|
+
OTHER(16, null),
|
77
|
+
BASE64(20, "base64"),
|
78
|
+
QENCODE(21, "qencode"),
|
79
|
+
MIME_DETECT(22, "MimeAutoDetect");
|
80
|
+
|
81
|
+
private NKFCharset(int value, String charset) {
|
82
|
+
this.value = value;
|
83
|
+
this.charset = charset;
|
84
|
+
}
|
85
|
+
|
86
|
+
public int getValue() {
|
87
|
+
return value;
|
88
|
+
}
|
89
|
+
|
90
|
+
public String getCharset() {
|
91
|
+
return charset;
|
92
|
+
}
|
93
|
+
|
94
|
+
private final int value;
|
95
|
+
private final String charset;
|
96
|
+
}
|
97
|
+
|
98
|
+
private static final ByteList BEGIN_MIME_STRING = new ByteList(ByteList.plain("=?"));
|
99
|
+
private static final ByteList END_MIME_STRING = new ByteList(ByteList.plain("?="));
|
100
|
+
private static final ByteList PACK_BASE64 = new ByteList(ByteList.plain("m"));
|
101
|
+
private static final ByteList PACK_QENCODE = new ByteList(ByteList.plain("M"));
|
102
|
+
|
103
|
+
public static final Map<Integer, String> NKFCharsetMap = new HashMap<Integer, String>(20, 1);
|
104
|
+
|
105
|
+
public static void load(Ruby runtime) {
|
106
|
+
createNKF(runtime);
|
107
|
+
}
|
108
|
+
|
109
|
+
public static void createNKF(Ruby runtime) {
|
110
|
+
final RubyModule NKF = runtime.defineModule("NKF");
|
111
|
+
final String version = "2.1.2";
|
112
|
+
final String relDate = "2011-09-08";
|
113
|
+
|
114
|
+
NKF.defineConstant("NKF_VERSION", runtime.newString(version));
|
115
|
+
NKF.defineConstant("NKF_RELEASE_DATE", runtime.newString(relDate));
|
116
|
+
NKF.defineConstant("VERSION", runtime.newString(version + ' ' + '(' + "JRuby" + '_' + relDate + ')'));
|
117
|
+
|
118
|
+
for ( NKFCharset charset : NKFCharset.values() ) {
|
119
|
+
NKFCharsetMap.put(charset.value, charset.name());
|
120
|
+
|
121
|
+
if (charset.value > 12 ) continue;
|
122
|
+
NKF.defineConstant(charset.name(), charsetMappedValue(runtime, charset));
|
123
|
+
}
|
124
|
+
|
125
|
+
NKF.defineAnnotatedMethods(RubyNKF.class);
|
126
|
+
}
|
127
|
+
|
128
|
+
@JRubyMethod(name = "guess", module = true)
|
129
|
+
public static IRubyObject guess(ThreadContext context, IRubyObject recv, IRubyObject s) {
|
130
|
+
return charsetMappedValue(context.runtime, guess(context, s));
|
131
|
+
}
|
132
|
+
|
133
|
+
public static NKFCharset guess(ThreadContext context, IRubyObject s) {
|
134
|
+
// TODO: Fix charset usage for JRUBY-4553
|
135
|
+
Ruby runtime = context.runtime;
|
136
|
+
if (!s.respondsTo("to_str")) {
|
137
|
+
throw runtime.newTypeError("can't convert " + s.getMetaClass() + " into String");
|
138
|
+
}
|
139
|
+
ByteList bytes = s.convertToString().getByteList();
|
140
|
+
ByteBuffer buf = ByteBuffer.wrap(bytes.getUnsafeBytes(), bytes.begin(), bytes.length());
|
141
|
+
CharsetDecoder decoder;
|
142
|
+
try {
|
143
|
+
decoder = Charset.forName("x-JISAutoDetect").newDecoder();
|
144
|
+
} catch (UnsupportedCharsetException e) {
|
145
|
+
throw runtime.newStandardError("charsets.jar is required to use NKF#guess. Please install JRE which supports m17n.");
|
146
|
+
}
|
147
|
+
try {
|
148
|
+
decoder.decode(buf);
|
149
|
+
|
150
|
+
if ( ! decoder.isCharsetDetected() ) {
|
151
|
+
return NKFCharset.UNKNOWN;
|
152
|
+
}
|
153
|
+
Charset charset = decoder.detectedCharset();
|
154
|
+
String name = charset.name();
|
155
|
+
if ("Shift_JIS".equals(name)) {
|
156
|
+
return NKFCharset.SJIS;
|
157
|
+
}
|
158
|
+
if ("Windows-31j".equalsIgnoreCase(name)) {
|
159
|
+
return NKFCharset.JIS;
|
160
|
+
}
|
161
|
+
if ("EUC-JP".equals(name)) {
|
162
|
+
return NKFCharset.EUC;
|
163
|
+
}
|
164
|
+
if ("ISO-2022-JP".equals(name)) {
|
165
|
+
return NKFCharset.JIS;
|
166
|
+
}
|
167
|
+
}
|
168
|
+
catch (CharacterCodingException e) {
|
169
|
+
// fall through and try direct encoding
|
170
|
+
}
|
171
|
+
|
172
|
+
if (bytes.getEncoding() == UTF8Encoding.INSTANCE) {
|
173
|
+
return NKFCharset.UTF8;
|
174
|
+
}
|
175
|
+
if (bytes.getEncoding().toString().startsWith("UTF-16")) {
|
176
|
+
return NKFCharset.UTF16;
|
177
|
+
}
|
178
|
+
if (bytes.getEncoding().toString().startsWith("UTF-32")) {
|
179
|
+
return NKFCharset.UTF32;
|
180
|
+
}
|
181
|
+
return NKFCharset.UNKNOWN;
|
182
|
+
}
|
183
|
+
|
184
|
+
private static IRubyObject charsetMappedValue(final Ruby runtime, final NKFCharset charset) {
|
185
|
+
final Encoding encoding;
|
186
|
+
switch (charset) {
|
187
|
+
case AUTO: case NOCONV: case UNKNOWN: return runtime.getNil();
|
188
|
+
case BINARY:
|
189
|
+
encoding = runtime.getEncodingService().getAscii8bitEncoding();
|
190
|
+
return runtime.getEncodingService().convertEncodingToRubyEncoding(encoding);
|
191
|
+
}
|
192
|
+
|
193
|
+
encoding = runtime.getEncodingService().getEncodingFromString(charset.getCharset());
|
194
|
+
return runtime.getEncodingService().convertEncodingToRubyEncoding(encoding);
|
195
|
+
}
|
196
|
+
|
197
|
+
@JRubyMethod(name = "guess1", module = true)
|
198
|
+
public static IRubyObject guess1(ThreadContext context, IRubyObject recv, IRubyObject str) {
|
199
|
+
return guess(context, recv, str);
|
200
|
+
}
|
201
|
+
|
202
|
+
@JRubyMethod(name = "guess2", module = true)
|
203
|
+
public static IRubyObject guess2(ThreadContext context, IRubyObject recv, IRubyObject str) {
|
204
|
+
return guess(context, recv, str);
|
205
|
+
}
|
206
|
+
|
207
|
+
@JRubyMethod(name = "nkf", module = true)
|
208
|
+
public static IRubyObject nkf(ThreadContext context, IRubyObject recv, IRubyObject opt, IRubyObject str) {
|
209
|
+
Ruby runtime = context.runtime;
|
210
|
+
|
211
|
+
if (!opt.respondsTo("to_str")) {
|
212
|
+
throw runtime.newTypeError("can't convert " + opt.getMetaClass() + " into String");
|
213
|
+
}
|
214
|
+
|
215
|
+
if (!str.respondsTo("to_str")) {
|
216
|
+
throw runtime.newTypeError("can't convert " + str.getMetaClass() + " into String");
|
217
|
+
}
|
218
|
+
|
219
|
+
Map<String, NKFCharset> options = parseOpt(opt.convertToString().toString());
|
220
|
+
|
221
|
+
if (options.get("input").getValue() == NKFCharset.AUTO.getValue()) {
|
222
|
+
options.put("input", guess(context, str));
|
223
|
+
}
|
224
|
+
|
225
|
+
ByteList bstr = str.convertToString().getByteList();
|
226
|
+
final Converter converter;
|
227
|
+
if (Converter.isMimeText(bstr, options)) {
|
228
|
+
converter = new MimeConverter(context, options);
|
229
|
+
} else {
|
230
|
+
converter = new DefaultConverter(context, options);
|
231
|
+
}
|
232
|
+
|
233
|
+
RubyString result = converter.convert(bstr);
|
234
|
+
|
235
|
+
if (options.get("mime-encode") == NKFCharset.BASE64) {
|
236
|
+
result = Converter.encodeMimeString(runtime, result, PACK_BASE64);
|
237
|
+
} else if (options.get("mime-encode") == NKFCharset.QENCODE) {
|
238
|
+
result = Converter.encodeMimeString(runtime, result, PACK_QENCODE);
|
239
|
+
}
|
240
|
+
|
241
|
+
return result;
|
242
|
+
}
|
243
|
+
|
244
|
+
public static Command parseOption(String s) {
|
245
|
+
Options options = new Options();
|
246
|
+
options.addOption("b");
|
247
|
+
options.addOption("u");
|
248
|
+
options.addOption("j", "jis");
|
249
|
+
options.addOption("s", "sjis");
|
250
|
+
options.addOption("e", "euc");
|
251
|
+
options.addOption("w", null, "[0-9][0-9]");
|
252
|
+
options.addOption("J", "jis-input");
|
253
|
+
options.addOption("S", "sjis-input");
|
254
|
+
options.addOption("E", "euc-input");
|
255
|
+
options.addOption("W", null, "[0-9][0-9]");
|
256
|
+
options.addOption("t");
|
257
|
+
options.addOption("i_");
|
258
|
+
options.addOption("o_");
|
259
|
+
options.addOption("r");
|
260
|
+
options.addOption("h1", "hiragana");
|
261
|
+
options.addOption("h2", "katakana");
|
262
|
+
options.addOption("h3", "katakana-hiragana");
|
263
|
+
options.addOption("T");
|
264
|
+
options.addOption("l");
|
265
|
+
options.addOption("f", null, "[0-9]+-[0-9]*");
|
266
|
+
options.addOption("F");
|
267
|
+
options.addOption("Z", null, "[0-3]");
|
268
|
+
options.addOption("X");
|
269
|
+
options.addOption("x");
|
270
|
+
options.addOption("B", null, "[0-2]");
|
271
|
+
options.addOption("I");
|
272
|
+
options.addOption("L", null, "[uwm]");
|
273
|
+
options.addOption("d");
|
274
|
+
options.addOption("c");
|
275
|
+
options.addOption("m", null, "[BQN0]");
|
276
|
+
options.addOption("M", null, "[BQ]");
|
277
|
+
options.addOption(null, "fj");
|
278
|
+
options.addOption(null, "unix");
|
279
|
+
options.addOption(null, "mac");
|
280
|
+
options.addOption(null, "msdos");
|
281
|
+
options.addOption(null, "windows");
|
282
|
+
options.addOption(null, "mime");
|
283
|
+
options.addOption(null, "base64");
|
284
|
+
options.addOption(null, "mime-input");
|
285
|
+
options.addOption(null, "base64-input");
|
286
|
+
options.addOption(null, "ic", "ic=(.*)");
|
287
|
+
options.addOption(null, "oc", "oc=(.*)");
|
288
|
+
options.addOption(null, "fb-skip");
|
289
|
+
options.addOption(null, "fb-html");
|
290
|
+
options.addOption(null, "fb-xml");
|
291
|
+
options.addOption(null, "fb-perl");
|
292
|
+
options.addOption(null, "fb-java");
|
293
|
+
options.addOption(null, "fb-subchar", "fb-subchar=(.*)");
|
294
|
+
options.addOption(null, "no-cp932ext");
|
295
|
+
options.addOption(null, "cap-input");
|
296
|
+
options.addOption(null, "url-input");
|
297
|
+
options.addOption(null, "numchar-input");
|
298
|
+
options.addOption(null, "no-best-fit-chars");
|
299
|
+
|
300
|
+
CommandParser parser = new CommandParser();
|
301
|
+
Command cmd = parser.parse(options, s);
|
302
|
+
return cmd;
|
303
|
+
}
|
304
|
+
|
305
|
+
private static Map<String, NKFCharset> parseOpt(String s) {
|
306
|
+
Map<String, NKFCharset> options = new HashMap<String, NKFCharset>();
|
307
|
+
|
308
|
+
// default options
|
309
|
+
options.put("input", NKFCharset.AUTO);
|
310
|
+
options.put("output", NKFCharset.JIS);
|
311
|
+
options.put("mime-decode", NKFCharset.MIME_DETECT);
|
312
|
+
options.put("mime-encode", NKFCharset.NOCONV);
|
313
|
+
|
314
|
+
Command cmd = parseOption(s);
|
315
|
+
if (cmd.hasOption("j")) {
|
316
|
+
options.put("output", NKFCharset.JIS);
|
317
|
+
}
|
318
|
+
if (cmd.hasOption("s")) {
|
319
|
+
options.put("output", NKFCharset.SJIS);
|
320
|
+
}
|
321
|
+
if (cmd.hasOption("e")) {
|
322
|
+
options.put("output", NKFCharset.EUC);
|
323
|
+
}
|
324
|
+
if (cmd.hasOption("w")) {
|
325
|
+
Option opt = cmd.getOption("w");
|
326
|
+
if ("32".equals(opt.getValue())) {
|
327
|
+
options.put("output", NKFCharset.UTF32);
|
328
|
+
} else if("16".equals(opt.getValue())) {
|
329
|
+
options.put("output", NKFCharset.UTF16);
|
330
|
+
} else {
|
331
|
+
options.put("output", NKFCharset.UTF8);
|
332
|
+
}
|
333
|
+
}
|
334
|
+
if (cmd.hasOption("J")) {
|
335
|
+
options.put("input", NKFCharset.JIS);
|
336
|
+
}
|
337
|
+
if (cmd.hasOption("S")) {
|
338
|
+
options.put("input", NKFCharset.SJIS);
|
339
|
+
}
|
340
|
+
if (cmd.hasOption("E")) {
|
341
|
+
options.put("input", NKFCharset.EUC);
|
342
|
+
}
|
343
|
+
if (cmd.hasOption("W")) {
|
344
|
+
Option opt = cmd.getOption("W");
|
345
|
+
if ("32".equals(opt.getValue())) {
|
346
|
+
options.put("input", NKFCharset.UTF32);
|
347
|
+
} else if("16".equals(opt.getValue())) {
|
348
|
+
options.put("input", NKFCharset.UTF16);
|
349
|
+
} else {
|
350
|
+
options.put("input", NKFCharset.UTF8);
|
351
|
+
}
|
352
|
+
}
|
353
|
+
if (cmd.hasOption("m")) {
|
354
|
+
Option opt = cmd.getOption("m");
|
355
|
+
if (opt.getValue() == null) {
|
356
|
+
options.put("mime-decode", NKFCharset.MIME_DETECT);
|
357
|
+
} else if ("B".equals(opt.getValue())) {
|
358
|
+
options.put("mime-decode", NKFCharset.BASE64);
|
359
|
+
} else if ("Q".equals(opt.getValue())) {
|
360
|
+
options.put("mime-decode", NKFCharset.QENCODE);
|
361
|
+
} else if ("N".equals(opt.getValue())) {
|
362
|
+
// TODO: non-strict option
|
363
|
+
} else if ("0".equals(opt.getValue())) {
|
364
|
+
options.put("mime-decode", NKFCharset.NOCONV);
|
365
|
+
}
|
366
|
+
}
|
367
|
+
if (cmd.hasOption("M")) {
|
368
|
+
Option opt = cmd.getOption("M");
|
369
|
+
if (opt.getValue() == null) {
|
370
|
+
options.put("mime-encode", NKFCharset.NOCONV);
|
371
|
+
} else if ("B".equals(opt.getValue())) {
|
372
|
+
options.put("mime-encode", NKFCharset.BASE64);
|
373
|
+
} else if ("Q".equals(opt.getValue())) {
|
374
|
+
options.put("mime-encode", NKFCharset.QENCODE);
|
375
|
+
}
|
376
|
+
}
|
377
|
+
if (cmd.hasOption("base64")) {
|
378
|
+
options.put("mime-encode", NKFCharset.BASE64);
|
379
|
+
}
|
380
|
+
if (cmd.hasOption("oc")) {
|
381
|
+
Option opt = cmd.getOption("oc");
|
382
|
+
if ("ISO-2022-JP".compareToIgnoreCase(opt.getValue()) == 0) {
|
383
|
+
options.put("output", NKFCharset.JIS);
|
384
|
+
} else if ("EUC-JP".compareToIgnoreCase(opt.getValue()) == 0) {
|
385
|
+
options.put("output", NKFCharset.EUC);
|
386
|
+
} else if ("CP932".compareToIgnoreCase(opt.getValue()) == 0) {
|
387
|
+
options.put("output", NKFCharset.SJIS);
|
388
|
+
} else if ("Shift_JIS".compareToIgnoreCase(opt.getValue()) == 0) {
|
389
|
+
options.put("output", NKFCharset.SJIS);
|
390
|
+
} else if ("Windows-31J".compareToIgnoreCase(opt.getValue()) == 0) {
|
391
|
+
options.put("output", NKFCharset.JIS);
|
392
|
+
} else if ("UTF-8".compareToIgnoreCase(opt.getValue()) == 0) {
|
393
|
+
options.put("output", NKFCharset.UTF8);
|
394
|
+
} else if ("UTF-8N".compareToIgnoreCase(opt.getValue()) == 0) {
|
395
|
+
options.put("output", NKFCharset.UTF8);
|
396
|
+
} else if ("UTF-16".compareToIgnoreCase(opt.getValue()) == 0) {
|
397
|
+
options.put("output", NKFCharset.UTF16);
|
398
|
+
} else if ("UTF-16BE-BOM".compareToIgnoreCase(opt.getValue()) == 0) {
|
399
|
+
options.put("output", NKFCharset.UTF16);
|
400
|
+
} else if ("UTF-32".compareToIgnoreCase(opt.getValue()) == 0) {
|
401
|
+
options.put("output", NKFCharset.UTF32);
|
402
|
+
} else if ("UTF-32BE-BOM".compareToIgnoreCase(opt.getValue()) == 0) {
|
403
|
+
options.put("output", NKFCharset.UTF32);
|
404
|
+
}
|
405
|
+
}
|
406
|
+
if (cmd.hasOption("ic")) {
|
407
|
+
Option opt = cmd.getOption("ic");
|
408
|
+
if ("ISO-2022-JP".compareToIgnoreCase(opt.getValue()) == 0) {
|
409
|
+
options.put("input", NKFCharset.JIS);
|
410
|
+
} else if ("EUC-JP".compareToIgnoreCase(opt.getValue()) == 0) {
|
411
|
+
options.put("input", NKFCharset.EUC);
|
412
|
+
} else if ("CP932".compareToIgnoreCase(opt.getValue()) == 0) {
|
413
|
+
options.put("input", NKFCharset.SJIS);
|
414
|
+
} else if ("Shift_JIS".compareToIgnoreCase(opt.getValue()) == 0) {
|
415
|
+
options.put("input", NKFCharset.SJIS);
|
416
|
+
} else if ("Windows-31J".compareToIgnoreCase(opt.getValue()) == 0) {
|
417
|
+
options.put("input", NKFCharset.SJIS);
|
418
|
+
} else if ("UTF-8".compareToIgnoreCase(opt.getValue()) == 0) {
|
419
|
+
options.put("input", NKFCharset.UTF8);
|
420
|
+
} else if ("UTF-8N".compareToIgnoreCase(opt.getValue()) == 0) {
|
421
|
+
options.put("input", NKFCharset.UTF8);
|
422
|
+
} else if ("UTF-16".compareToIgnoreCase(opt.getValue()) == 0) {
|
423
|
+
options.put("input", NKFCharset.UTF16);
|
424
|
+
} else if ("UTF-16BE-BOM".compareToIgnoreCase(opt.getValue()) == 0) {
|
425
|
+
options.put("input", NKFCharset.UTF16);
|
426
|
+
} else if ("UTF-32".compareToIgnoreCase(opt.getValue()) == 0) {
|
427
|
+
options.put("input", NKFCharset.UTF32);
|
428
|
+
} else if ("UTF-32BE-BOM".compareToIgnoreCase(opt.getValue()) == 0) {
|
429
|
+
options.put("input", NKFCharset.UTF32);
|
430
|
+
}
|
431
|
+
}
|
432
|
+
|
433
|
+
return options;
|
434
|
+
}
|
435
|
+
|
436
|
+
static abstract class Converter {
|
437
|
+
|
438
|
+
protected final ThreadContext context;
|
439
|
+
protected final Map<String, NKFCharset> options;
|
440
|
+
|
441
|
+
public Converter(ThreadContext ctx, Map<String, NKFCharset> opt) {
|
442
|
+
context = ctx;
|
443
|
+
options = opt;
|
444
|
+
}
|
445
|
+
|
446
|
+
static boolean isMimeText(ByteList str, Map<String, NKFCharset> options) {
|
447
|
+
if (str.length() <= 6) {
|
448
|
+
return false;
|
449
|
+
}
|
450
|
+
if (options.get("mime-decode") == NKFCharset.NOCONV) {
|
451
|
+
return false;
|
452
|
+
}
|
453
|
+
if (str.indexOf(BEGIN_MIME_STRING) < 0) {
|
454
|
+
return false;
|
455
|
+
}
|
456
|
+
if (str.lastIndexOf(END_MIME_STRING) < 0) {
|
457
|
+
return false;
|
458
|
+
}
|
459
|
+
return true;
|
460
|
+
}
|
461
|
+
|
462
|
+
private static RubyString encodeMimeString(Ruby runtime, RubyString str, ByteList format) {
|
463
|
+
RubyArray array = RubyArray.newArray(runtime, str);
|
464
|
+
return Pack.pack(runtime, array, format).chomp(runtime.getCurrentContext());
|
465
|
+
}
|
466
|
+
|
467
|
+
abstract RubyString convert(ByteList str);
|
468
|
+
|
469
|
+
ByteList convert_byte(ByteList str, String inputCharset, NKFCharset output) {
|
470
|
+
String outputCharset = output.getCharset();
|
471
|
+
|
472
|
+
if (inputCharset == null) {
|
473
|
+
inputCharset = str.getEncoding().toString();
|
474
|
+
}
|
475
|
+
|
476
|
+
if (outputCharset.equals(inputCharset)) {
|
477
|
+
return str.dup();
|
478
|
+
}
|
479
|
+
|
480
|
+
byte[] outCharsetBytes = outputCharset.getBytes();
|
481
|
+
|
482
|
+
EConv ec = EncodingUtils.econvOpenOpts(context, inputCharset.getBytes(), outCharsetBytes, 0, context.nil);
|
483
|
+
|
484
|
+
if (ec == null) {
|
485
|
+
throw context.runtime.newArgumentError("invalid encoding pair: " + inputCharset + " to " + outputCharset);
|
486
|
+
}
|
487
|
+
|
488
|
+
ByteList converted = EncodingUtils.econvStrConvert(context, ec, str, EConvFlags.INVALID_REPLACE);
|
489
|
+
|
490
|
+
converted.setEncoding(context.runtime.getEncodingService().findEncodingOrAliasEntry(outCharsetBytes).getEncoding());
|
491
|
+
|
492
|
+
return converted;
|
493
|
+
}
|
494
|
+
}
|
495
|
+
|
496
|
+
static class DefaultConverter extends Converter {
|
497
|
+
|
498
|
+
public DefaultConverter(ThreadContext ctx, Map<String, NKFCharset> opt) {
|
499
|
+
super(ctx, opt);
|
500
|
+
}
|
501
|
+
|
502
|
+
RubyString convert(ByteList str) {
|
503
|
+
NKFCharset input = options.get("input");
|
504
|
+
NKFCharset output = options.get("output");
|
505
|
+
ByteList b = convert_byte(str,
|
506
|
+
input.getCharset(),
|
507
|
+
output);
|
508
|
+
return context.runtime.newString(b);
|
509
|
+
}
|
510
|
+
}
|
511
|
+
|
512
|
+
static class MimeConverter extends Converter {
|
513
|
+
|
514
|
+
public MimeConverter(ThreadContext ctx, Map<String, NKFCharset> opt) {
|
515
|
+
super(ctx, opt);
|
516
|
+
}
|
517
|
+
|
518
|
+
private String detectCharset(String charset) {
|
519
|
+
if (charset.compareToIgnoreCase(NKFCharset.UTF8.getCharset()) == 0) {
|
520
|
+
return NKFCharset.UTF8.getCharset();
|
521
|
+
} else if (charset.compareToIgnoreCase(NKFCharset.JIS.getCharset()) == 0) {
|
522
|
+
return NKFCharset.JIS.getCharset();
|
523
|
+
} else if (charset.compareToIgnoreCase(NKFCharset.EUC.getCharset()) == 0) {
|
524
|
+
return NKFCharset.EUC.getCharset();
|
525
|
+
} else {
|
526
|
+
return NKFCharset.ASCII.getCharset();
|
527
|
+
}
|
528
|
+
}
|
529
|
+
|
530
|
+
private ByteList decodeMimeString(String str) {
|
531
|
+
String[] mime = str.split("^=\\?|\\?|\\?=$");
|
532
|
+
String charset = detectCharset(mime[1]);
|
533
|
+
int encode = mime[2].charAt(0);
|
534
|
+
RubyString body = EncodingUtils.newExternalStringWithEncoding(context.runtime, mime[3], ASCIIEncoding.INSTANCE);
|
535
|
+
|
536
|
+
final RubyArray<?> array;
|
537
|
+
if ('B' == encode || 'b' == encode) { // BASE64
|
538
|
+
array = Pack.unpack(context, body, PACK_BASE64);
|
539
|
+
} else { // Qencode
|
540
|
+
array = Pack.unpack(context, body, PACK_QENCODE);
|
541
|
+
}
|
542
|
+
RubyString s = (RubyString) array.entry(0);
|
543
|
+
ByteList decodeStr = s.asString().getByteList();
|
544
|
+
|
545
|
+
return convert_byte(decodeStr, charset, options.get("output"));
|
546
|
+
}
|
547
|
+
|
548
|
+
RubyString makeRubyString(ArrayList<ByteList> list) {
|
549
|
+
ByteList r = new ByteList();
|
550
|
+
for (ByteList l : list) {
|
551
|
+
r.append(l);
|
552
|
+
}
|
553
|
+
return context.runtime.newString(r);
|
554
|
+
}
|
555
|
+
|
556
|
+
RubyString convert(ByteList str) {
|
557
|
+
String s = Helpers.decodeByteList(context.runtime, str);
|
558
|
+
String[] token = s.split("\\s");
|
559
|
+
ArrayList<ByteList> raw_data = new ArrayList<ByteList>();
|
560
|
+
|
561
|
+
for (int i = 0; i < token.length; i++) {
|
562
|
+
raw_data.add(decodeMimeString(token[i]));
|
563
|
+
}
|
564
|
+
|
565
|
+
return makeRubyString(raw_data);
|
566
|
+
}
|
567
|
+
|
568
|
+
}
|
569
|
+
|
570
|
+
@Deprecated
|
571
|
+
public static final NKFCharset AUTO = NKFCharset.AUTO;
|
572
|
+
// no ISO-2022-JP in jcodings
|
573
|
+
@Deprecated
|
574
|
+
public static final NKFCharset JIS = NKFCharset.JIS;
|
575
|
+
@Deprecated
|
576
|
+
public static final NKFCharset EUC = NKFCharset.EUC;
|
577
|
+
@Deprecated
|
578
|
+
public static final NKFCharset SJIS = NKFCharset.SJIS;
|
579
|
+
@Deprecated
|
580
|
+
public static final NKFCharset BINARY = NKFCharset.BINARY;
|
581
|
+
@Deprecated
|
582
|
+
public static final NKFCharset NOCONV = NKFCharset.NOCONV;
|
583
|
+
@Deprecated
|
584
|
+
public static final NKFCharset UNKNOWN = NKFCharset.UNKNOWN;
|
585
|
+
@Deprecated
|
586
|
+
public static final NKFCharset ASCII = NKFCharset.ASCII;
|
587
|
+
@Deprecated
|
588
|
+
public static final NKFCharset UTF8 = NKFCharset.UTF8;
|
589
|
+
@Deprecated
|
590
|
+
public static final NKFCharset UTF16 = NKFCharset.UTF16;
|
591
|
+
@Deprecated
|
592
|
+
public static final NKFCharset UTF32 = NKFCharset.UTF32;
|
593
|
+
@Deprecated
|
594
|
+
public static final NKFCharset OTHER = NKFCharset.OTHER;
|
595
|
+
@Deprecated
|
596
|
+
public static final NKFCharset BASE64 = NKFCharset.BASE64;
|
597
|
+
@Deprecated
|
598
|
+
public static final NKFCharset QENCODE = NKFCharset.QENCODE;
|
599
|
+
@Deprecated
|
600
|
+
public static final NKFCharset MIME_DETECT = NKFCharset.MIME_DETECT;
|
601
|
+
}
|
data/ext/nkf/nkf.c
CHANGED
data/lib/nkf.rb
ADDED
data/nkf.gemspec
CHANGED
@@ -11,8 +11,8 @@ end
|
|
11
11
|
Gem::Specification.new do |spec|
|
12
12
|
spec.name = "nkf"
|
13
13
|
spec.version = source_version
|
14
|
-
spec.authors = ["NARUSE Yui"]
|
15
|
-
spec.email = ["naruse@airemix.jp"]
|
14
|
+
spec.authors = ["NARUSE Yui", "Charles Oliver Nutter"]
|
15
|
+
spec.email = ["naruse@airemix.jp", "headius@headius.com"]
|
16
16
|
|
17
17
|
spec.summary = %q{Ruby extension for Network Kanji Filter}
|
18
18
|
spec.description = %q{Ruby extension for Network Kanji Filter}
|
@@ -28,8 +28,16 @@ Gem::Specification.new do |spec|
|
|
28
28
|
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
29
29
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
30
30
|
end
|
31
|
+
|
32
|
+
if Gem::Platform === spec.platform and spec.platform =~ 'java' or RUBY_ENGINE == 'jruby'
|
33
|
+
spec.platform = 'java'
|
34
|
+
spec.licenses += ["EPL-2.0", "LGPL-2.1"]
|
35
|
+
spec.files += Dir["lib/nkf.jar"]
|
36
|
+
else
|
37
|
+
spec.extensions = ["ext/nkf/extconf.rb"]
|
38
|
+
end
|
39
|
+
|
31
40
|
spec.bindir = "exe"
|
32
41
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
33
42
|
spec.require_paths = ["lib"]
|
34
|
-
spec.extensions = ["ext/nkf/extconf.rb"]
|
35
43
|
end
|
metadata
CHANGED
@@ -1,18 +1,20 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nkf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- NARUSE Yui
|
8
|
+
- Charles Oliver Nutter
|
8
9
|
autorequire:
|
9
10
|
bindir: exe
|
10
11
|
cert_chain: []
|
11
|
-
date:
|
12
|
+
date: 2024-01-22 00:00:00.000000000 Z
|
12
13
|
dependencies: []
|
13
14
|
description: Ruby extension for Network Kanji Filter
|
14
15
|
email:
|
15
16
|
- naruse@airemix.jp
|
17
|
+
- headius@headius.com
|
16
18
|
executables: []
|
17
19
|
extensions:
|
18
20
|
- ext/nkf/extconf.rb
|
@@ -28,6 +30,12 @@ files:
|
|
28
30
|
- Rakefile
|
29
31
|
- bin/console
|
30
32
|
- bin/setup
|
33
|
+
- ext/java/org/jruby/ext/nkf/Command.java
|
34
|
+
- ext/java/org/jruby/ext/nkf/CommandParser.java
|
35
|
+
- ext/java/org/jruby/ext/nkf/NKFLibrary.java
|
36
|
+
- ext/java/org/jruby/ext/nkf/Option.java
|
37
|
+
- ext/java/org/jruby/ext/nkf/Options.java
|
38
|
+
- ext/java/org/jruby/ext/nkf/RubyNKF.java
|
31
39
|
- ext/nkf/extconf.rb
|
32
40
|
- ext/nkf/nkf-utf8/config.h
|
33
41
|
- ext/nkf/nkf-utf8/nkf.c
|
@@ -36,6 +44,7 @@ files:
|
|
36
44
|
- ext/nkf/nkf-utf8/utf8tbl.h
|
37
45
|
- ext/nkf/nkf.c
|
38
46
|
- lib/kconv.rb
|
47
|
+
- lib/nkf.rb
|
39
48
|
- nkf.gemspec
|
40
49
|
homepage: https://github.com/ruby/nkf
|
41
50
|
licenses:
|
@@ -59,7 +68,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
59
68
|
- !ruby/object:Gem::Version
|
60
69
|
version: '0'
|
61
70
|
requirements: []
|
62
|
-
rubygems_version: 3.
|
71
|
+
rubygems_version: 3.6.0.dev
|
63
72
|
signing_key:
|
64
73
|
specification_version: 4
|
65
74
|
summary: Ruby extension for Network Kanji Filter
|