nkf 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/test.yml +24 -0
- data/.gitignore +11 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +22 -0
- data/README.md +38 -0
- data/Rakefile +12 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/ext/nkf/extconf.rb +3 -0
- data/ext/nkf/nkf-utf8/config.h +51 -0
- data/ext/nkf/nkf-utf8/nkf.c +7205 -0
- data/ext/nkf/nkf-utf8/nkf.h +189 -0
- data/ext/nkf/nkf-utf8/utf8tbl.c +14638 -0
- data/ext/nkf/nkf-utf8/utf8tbl.h +72 -0
- data/ext/nkf/nkf.c +503 -0
- data/lib/kconv.rb +283 -0
- data/nkf.gemspec +24 -0
- metadata +63 -0
data/lib/kconv.rb
ADDED
@@ -0,0 +1,283 @@
|
|
1
|
+
# frozen_string_literal: false
|
2
|
+
#
|
3
|
+
# kconv.rb - Kanji Converter.
|
4
|
+
#
|
5
|
+
# $Id$
|
6
|
+
#
|
7
|
+
# ----
|
8
|
+
#
|
9
|
+
# kconv.rb implements the Kconv class for Kanji Converter. Additionally,
|
10
|
+
# some methods in String classes are added to allow easy conversion.
|
11
|
+
#
|
12
|
+
|
13
|
+
require 'nkf'
|
14
|
+
|
15
|
+
#
|
16
|
+
# Kanji Converter for Ruby.
|
17
|
+
#
|
18
|
+
module Kconv
|
19
|
+
#
|
20
|
+
# Public Constants
|
21
|
+
#
|
22
|
+
|
23
|
+
#Constant of Encoding
|
24
|
+
|
25
|
+
# Auto-Detect
|
26
|
+
AUTO = NKF::AUTO
|
27
|
+
# ISO-2022-JP
|
28
|
+
JIS = NKF::JIS
|
29
|
+
# EUC-JP
|
30
|
+
EUC = NKF::EUC
|
31
|
+
# Shift_JIS
|
32
|
+
SJIS = NKF::SJIS
|
33
|
+
# BINARY
|
34
|
+
BINARY = NKF::BINARY
|
35
|
+
# NOCONV
|
36
|
+
NOCONV = NKF::NOCONV
|
37
|
+
# ASCII
|
38
|
+
ASCII = NKF::ASCII
|
39
|
+
# UTF-8
|
40
|
+
UTF8 = NKF::UTF8
|
41
|
+
# UTF-16
|
42
|
+
UTF16 = NKF::UTF16
|
43
|
+
# UTF-32
|
44
|
+
UTF32 = NKF::UTF32
|
45
|
+
# UNKNOWN
|
46
|
+
UNKNOWN = NKF::UNKNOWN
|
47
|
+
|
48
|
+
#
|
49
|
+
# Public Methods
|
50
|
+
#
|
51
|
+
|
52
|
+
# call-seq:
|
53
|
+
# Kconv.kconv(str, to_enc, from_enc=nil)
|
54
|
+
#
|
55
|
+
# Convert <code>str</code> to <code>to_enc</code>.
|
56
|
+
# <code>to_enc</code> and <code>from_enc</code> are given as constants of Kconv or Encoding objects.
|
57
|
+
def kconv(str, to_enc, from_enc=nil)
|
58
|
+
opt = ''
|
59
|
+
opt += ' --ic=' + from_enc.to_s if from_enc
|
60
|
+
opt += ' --oc=' + to_enc.to_s if to_enc
|
61
|
+
|
62
|
+
::NKF::nkf(opt, str)
|
63
|
+
end
|
64
|
+
module_function :kconv
|
65
|
+
|
66
|
+
#
|
67
|
+
# Encode to
|
68
|
+
#
|
69
|
+
|
70
|
+
# call-seq:
|
71
|
+
# Kconv.tojis(str) => string
|
72
|
+
#
|
73
|
+
# Convert <code>str</code> to ISO-2022-JP
|
74
|
+
def tojis(str)
|
75
|
+
kconv(str, JIS)
|
76
|
+
end
|
77
|
+
module_function :tojis
|
78
|
+
|
79
|
+
# call-seq:
|
80
|
+
# Kconv.toeuc(str) => string
|
81
|
+
#
|
82
|
+
# Convert <code>str</code> to EUC-JP
|
83
|
+
def toeuc(str)
|
84
|
+
kconv(str, EUC)
|
85
|
+
end
|
86
|
+
module_function :toeuc
|
87
|
+
|
88
|
+
# call-seq:
|
89
|
+
# Kconv.tosjis(str) => string
|
90
|
+
#
|
91
|
+
# Convert <code>str</code> to Shift_JIS
|
92
|
+
def tosjis(str)
|
93
|
+
kconv(str, SJIS)
|
94
|
+
end
|
95
|
+
module_function :tosjis
|
96
|
+
|
97
|
+
# call-seq:
|
98
|
+
# Kconv.toutf8(str) => string
|
99
|
+
#
|
100
|
+
# Convert <code>str</code> to UTF-8
|
101
|
+
def toutf8(str)
|
102
|
+
kconv(str, UTF8)
|
103
|
+
end
|
104
|
+
module_function :toutf8
|
105
|
+
|
106
|
+
# call-seq:
|
107
|
+
# Kconv.toutf16(str) => string
|
108
|
+
#
|
109
|
+
# Convert <code>str</code> to UTF-16
|
110
|
+
def toutf16(str)
|
111
|
+
kconv(str, UTF16)
|
112
|
+
end
|
113
|
+
module_function :toutf16
|
114
|
+
|
115
|
+
# call-seq:
|
116
|
+
# Kconv.toutf32(str) => string
|
117
|
+
#
|
118
|
+
# Convert <code>str</code> to UTF-32
|
119
|
+
def toutf32(str)
|
120
|
+
kconv(str, UTF32)
|
121
|
+
end
|
122
|
+
module_function :toutf32
|
123
|
+
|
124
|
+
# call-seq:
|
125
|
+
# Kconv.tolocale => string
|
126
|
+
#
|
127
|
+
# Convert <code>self</code> to locale encoding
|
128
|
+
def tolocale(str)
|
129
|
+
kconv(str, Encoding.locale_charmap)
|
130
|
+
end
|
131
|
+
module_function :tolocale
|
132
|
+
|
133
|
+
#
|
134
|
+
# guess
|
135
|
+
#
|
136
|
+
|
137
|
+
# call-seq:
|
138
|
+
# Kconv.guess(str) => encoding
|
139
|
+
#
|
140
|
+
# Guess input encoding by NKF.guess
|
141
|
+
def guess(str)
|
142
|
+
::NKF::guess(str)
|
143
|
+
end
|
144
|
+
module_function :guess
|
145
|
+
|
146
|
+
#
|
147
|
+
# isEncoding
|
148
|
+
#
|
149
|
+
|
150
|
+
# call-seq:
|
151
|
+
# Kconv.iseuc(str) => true or false
|
152
|
+
#
|
153
|
+
# Returns whether input encoding is EUC-JP or not.
|
154
|
+
#
|
155
|
+
# *Note* don't expect this return value is MatchData.
|
156
|
+
def iseuc(str)
|
157
|
+
str.dup.force_encoding(EUC).valid_encoding?
|
158
|
+
end
|
159
|
+
module_function :iseuc
|
160
|
+
|
161
|
+
# call-seq:
|
162
|
+
# Kconv.issjis(str) => true or false
|
163
|
+
#
|
164
|
+
# Returns whether input encoding is Shift_JIS or not.
|
165
|
+
def issjis(str)
|
166
|
+
str.dup.force_encoding(SJIS).valid_encoding?
|
167
|
+
end
|
168
|
+
module_function :issjis
|
169
|
+
|
170
|
+
# call-seq:
|
171
|
+
# Kconv.isjis(str) => true or false
|
172
|
+
#
|
173
|
+
# Returns whether input encoding is ISO-2022-JP or not.
|
174
|
+
def isjis(str)
|
175
|
+
/\A [\t\n\r\x20-\x7E]*
|
176
|
+
(?:
|
177
|
+
(?:\x1b \x28 I [\x21-\x7E]*
|
178
|
+
|\x1b \x28 J [\x21-\x7E]*
|
179
|
+
|\x1b \x24 @ (?:[\x21-\x7E]{2})*
|
180
|
+
|\x1b \x24 B (?:[\x21-\x7E]{2})*
|
181
|
+
|\x1b \x24 \x28 D (?:[\x21-\x7E]{2})*
|
182
|
+
)*
|
183
|
+
\x1b \x28 B [\t\n\r\x20-\x7E]*
|
184
|
+
)*
|
185
|
+
\z/nox =~ str.dup.force_encoding('BINARY') ? true : false
|
186
|
+
end
|
187
|
+
module_function :isjis
|
188
|
+
|
189
|
+
# call-seq:
|
190
|
+
# Kconv.isutf8(str) => true or false
|
191
|
+
#
|
192
|
+
# Returns whether input encoding is UTF-8 or not.
|
193
|
+
def isutf8(str)
|
194
|
+
str.dup.force_encoding(UTF8).valid_encoding?
|
195
|
+
end
|
196
|
+
module_function :isutf8
|
197
|
+
end
|
198
|
+
|
199
|
+
class String
|
200
|
+
# call-seq:
|
201
|
+
# String#kconv(to_enc, from_enc)
|
202
|
+
#
|
203
|
+
# Convert <code>self</code> to <code>to_enc</code>.
|
204
|
+
# <code>to_enc</code> and <code>from_enc</code> are given as constants of Kconv or Encoding objects.
|
205
|
+
def kconv(to_enc, from_enc=nil)
|
206
|
+
from_enc = self.encoding if !from_enc && self.encoding != Encoding.list[0]
|
207
|
+
Kconv::kconv(self, to_enc, from_enc)
|
208
|
+
end
|
209
|
+
|
210
|
+
#
|
211
|
+
# to Encoding
|
212
|
+
#
|
213
|
+
|
214
|
+
# call-seq:
|
215
|
+
# String#tojis => string
|
216
|
+
#
|
217
|
+
# Convert <code>self</code> to ISO-2022-JP
|
218
|
+
def tojis; Kconv.tojis(self) end
|
219
|
+
|
220
|
+
# call-seq:
|
221
|
+
# String#toeuc => string
|
222
|
+
#
|
223
|
+
# Convert <code>self</code> to EUC-JP
|
224
|
+
def toeuc; Kconv.toeuc(self) end
|
225
|
+
|
226
|
+
# call-seq:
|
227
|
+
# String#tosjis => string
|
228
|
+
#
|
229
|
+
# Convert <code>self</code> to Shift_JIS
|
230
|
+
def tosjis; Kconv.tosjis(self) end
|
231
|
+
|
232
|
+
# call-seq:
|
233
|
+
# String#toutf8 => string
|
234
|
+
#
|
235
|
+
# Convert <code>self</code> to UTF-8
|
236
|
+
def toutf8; Kconv.toutf8(self) end
|
237
|
+
|
238
|
+
# call-seq:
|
239
|
+
# String#toutf16 => string
|
240
|
+
#
|
241
|
+
# Convert <code>self</code> to UTF-16
|
242
|
+
def toutf16; Kconv.toutf16(self) end
|
243
|
+
|
244
|
+
# call-seq:
|
245
|
+
# String#toutf32 => string
|
246
|
+
#
|
247
|
+
# Convert <code>self</code> to UTF-32
|
248
|
+
def toutf32; Kconv.toutf32(self) end
|
249
|
+
|
250
|
+
# call-seq:
|
251
|
+
# String#tolocale => string
|
252
|
+
#
|
253
|
+
# Convert <code>self</code> to locale encoding
|
254
|
+
def tolocale; Kconv.tolocale(self) end
|
255
|
+
|
256
|
+
#
|
257
|
+
# is Encoding
|
258
|
+
#
|
259
|
+
|
260
|
+
# call-seq:
|
261
|
+
# String#iseuc => true or false
|
262
|
+
#
|
263
|
+
# Returns whether <code>self</code>'s encoding is EUC-JP or not.
|
264
|
+
def iseuc; Kconv.iseuc(self) end
|
265
|
+
|
266
|
+
# call-seq:
|
267
|
+
# String#issjis => true or false
|
268
|
+
#
|
269
|
+
# Returns whether <code>self</code>'s encoding is Shift_JIS or not.
|
270
|
+
def issjis; Kconv.issjis(self) end
|
271
|
+
|
272
|
+
# call-seq:
|
273
|
+
# String#isjis => true or false
|
274
|
+
#
|
275
|
+
# Returns whether <code>self</code>'s encoding is ISO-2022-JP or not.
|
276
|
+
def isjis; Kconv.isjis(self) end
|
277
|
+
|
278
|
+
# call-seq:
|
279
|
+
# String#isutf8 => true or false
|
280
|
+
#
|
281
|
+
# Returns whether <code>self</code>'s encoding is UTF-8 or not.
|
282
|
+
def isutf8; Kconv.isutf8(self) end
|
283
|
+
end
|
data/nkf.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
Gem::Specification.new do |spec|
|
2
|
+
spec.name = "nkf"
|
3
|
+
spec.version = "0.1.0"
|
4
|
+
spec.authors = ["NARUSE Yui"]
|
5
|
+
spec.email = ["naruse@airemix.jp"]
|
6
|
+
|
7
|
+
spec.summary = %q{Ruby extension for Network Kanji Filter}
|
8
|
+
spec.description = %q{Ruby extension for Network Kanji Filter}
|
9
|
+
spec.homepage = "https://github.com/ruby/nkf"
|
10
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
11
|
+
spec.licenses = ["Ruby", "BSD-2-Clause"]
|
12
|
+
|
13
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
14
|
+
spec.metadata["source_code_uri"] = spec.homepage
|
15
|
+
|
16
|
+
# Specify which files should be added to the gem when it is released.
|
17
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
18
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
19
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
20
|
+
end
|
21
|
+
spec.bindir = "exe"
|
22
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
23
|
+
spec.require_paths = ["lib"]
|
24
|
+
end
|
metadata
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: nkf
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- NARUSE Yui
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-09-18 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Ruby extension for Network Kanji Filter
|
14
|
+
email:
|
15
|
+
- naruse@airemix.jp
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- ".github/workflows/test.yml"
|
21
|
+
- ".gitignore"
|
22
|
+
- Gemfile
|
23
|
+
- LICENSE.txt
|
24
|
+
- README.md
|
25
|
+
- Rakefile
|
26
|
+
- bin/console
|
27
|
+
- bin/setup
|
28
|
+
- ext/nkf/extconf.rb
|
29
|
+
- ext/nkf/nkf-utf8/config.h
|
30
|
+
- ext/nkf/nkf-utf8/nkf.c
|
31
|
+
- ext/nkf/nkf-utf8/nkf.h
|
32
|
+
- ext/nkf/nkf-utf8/utf8tbl.c
|
33
|
+
- ext/nkf/nkf-utf8/utf8tbl.h
|
34
|
+
- ext/nkf/nkf.c
|
35
|
+
- lib/kconv.rb
|
36
|
+
- nkf.gemspec
|
37
|
+
homepage: https://github.com/ruby/nkf
|
38
|
+
licenses:
|
39
|
+
- Ruby
|
40
|
+
- BSD-2-Clause
|
41
|
+
metadata:
|
42
|
+
homepage_uri: https://github.com/ruby/nkf
|
43
|
+
source_code_uri: https://github.com/ruby/nkf
|
44
|
+
post_install_message:
|
45
|
+
rdoc_options: []
|
46
|
+
require_paths:
|
47
|
+
- lib
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 2.3.0
|
53
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0'
|
58
|
+
requirements: []
|
59
|
+
rubygems_version: 3.2.0.rc.1
|
60
|
+
signing_key:
|
61
|
+
specification_version: 4
|
62
|
+
summary: Ruby extension for Network Kanji Filter
|
63
|
+
test_files: []
|