rubysl-nkf 1.1.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +3 -2
- data/ext/rubysl/nkf/nkf-utf8/config.h +20 -57
- data/ext/rubysl/nkf/nkf-utf8/nkf.c +5705 -5028
- data/ext/rubysl/nkf/nkf-utf8/nkf.h +192 -0
- data/ext/rubysl/nkf/nkf-utf8/utf8tbl.c +863 -609
- data/ext/rubysl/nkf/nkf-utf8/utf8tbl.h +27 -0
- data/ext/rubysl/nkf/nkf.c +127 -279
- data/lib/kconv.rb +101 -186
- data/lib/rubysl/nkf/version.rb +1 -1
- data/rubysl-nkf.gemspec +3 -1
- metadata +21 -19
data/lib/kconv.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#
|
2
2
|
# kconv.rb - Kanji Converter.
|
3
3
|
#
|
4
|
-
# $Id: kconv.rb
|
4
|
+
# $Id: kconv.rb 30112 2010-12-07 11:47:39Z naruse $
|
5
5
|
#
|
6
6
|
# ----
|
7
7
|
#
|
@@ -18,9 +18,9 @@ module Kconv
|
|
18
18
|
#
|
19
19
|
# Public Constants
|
20
20
|
#
|
21
|
-
|
21
|
+
|
22
22
|
#Constant of Encoding
|
23
|
-
|
23
|
+
|
24
24
|
# Auto-Detect
|
25
25
|
AUTO = NKF::AUTO
|
26
26
|
# ISO-2022-JP
|
@@ -44,85 +44,19 @@ module Kconv
|
|
44
44
|
# UNKNOWN
|
45
45
|
UNKNOWN = NKF::UNKNOWN
|
46
46
|
|
47
|
-
#
|
48
|
-
# Private Constants
|
49
|
-
#
|
50
|
-
|
51
|
-
# Revision of kconv.rb
|
52
|
-
REVISION = %q$Revision: 11708 $
|
53
|
-
|
54
|
-
#Regexp of Encoding
|
55
|
-
|
56
|
-
# Regexp of Shift_JIS string (private constant)
|
57
|
-
RegexpShiftjis = /\A(?:
|
58
|
-
[\x00-\x7f\xa1-\xdf] |
|
59
|
-
[\x81-\x9f\xe0-\xfc][\x40-\x7e\x80-\xfc]
|
60
|
-
)*\z/nx
|
61
|
-
|
62
|
-
# Regexp of EUC-JP string (private constant)
|
63
|
-
RegexpEucjp = /\A(?:
|
64
|
-
[\x00-\x7f] |
|
65
|
-
\x8e [\xa1-\xdf] |
|
66
|
-
\x8f [\xa1-\xfe] [\xa1-\xfe] |
|
67
|
-
[\xa1-\xfe] [\xa1-\xfe]
|
68
|
-
)*\z/nx
|
69
|
-
|
70
|
-
# Regexp of UTF-8 string (private constant)
|
71
|
-
RegexpUtf8 = /\A(?:
|
72
|
-
[\x00-\x7f] |
|
73
|
-
[\xc2-\xdf] [\x80-\xbf] |
|
74
|
-
\xe0 [\xa0-\xbf] [\x80-\xbf] |
|
75
|
-
[\xe1-\xef] [\x80-\xbf] [\x80-\xbf] |
|
76
|
-
\xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] |
|
77
|
-
[\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] |
|
78
|
-
\xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf]
|
79
|
-
)*\z/nx
|
80
|
-
|
81
47
|
#
|
82
48
|
# Public Methods
|
83
49
|
#
|
84
|
-
|
50
|
+
|
85
51
|
# call-seq:
|
86
|
-
# Kconv.kconv(str,
|
87
|
-
#
|
88
|
-
# Convert <code>str</code> to
|
89
|
-
# <code>
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
# If you don't want to decode them, use NKF.nkf.
|
95
|
-
def kconv(str, out_code, in_code = AUTO)
|
96
|
-
opt = '-'
|
97
|
-
case in_code
|
98
|
-
when ::NKF::JIS
|
99
|
-
opt << 'J'
|
100
|
-
when ::NKF::EUC
|
101
|
-
opt << 'E'
|
102
|
-
when ::NKF::SJIS
|
103
|
-
opt << 'S'
|
104
|
-
when ::NKF::UTF8
|
105
|
-
opt << 'W'
|
106
|
-
when ::NKF::UTF16
|
107
|
-
opt << 'W16'
|
108
|
-
end
|
109
|
-
|
110
|
-
case out_code
|
111
|
-
when ::NKF::JIS
|
112
|
-
opt << 'j'
|
113
|
-
when ::NKF::EUC
|
114
|
-
opt << 'e'
|
115
|
-
when ::NKF::SJIS
|
116
|
-
opt << 's'
|
117
|
-
when ::NKF::UTF8
|
118
|
-
opt << 'w'
|
119
|
-
when ::NKF::UTF16
|
120
|
-
opt << 'w16'
|
121
|
-
when ::NKF::NOCONV
|
122
|
-
return str
|
123
|
-
end
|
124
|
-
|
125
|
-
opt = '' if opt == '-'
|
52
|
+
# Kconv.kconv(str, to_enc, from_enc=nil)
|
53
|
+
#
|
54
|
+
# Convert <code>str</code> to <code>to_enc</code>.
|
55
|
+
# <code>to_enc</code> and <code>from_enc</code> are given as constants of Kconv or Encoding objects.
|
56
|
+
def kconv(str, to_enc, from_enc=nil)
|
57
|
+
opt = ''
|
58
|
+
opt += ' --ic=' + from_enc.to_s if from_enc
|
59
|
+
opt += ' --oc=' + to_enc.to_s if to_enc
|
126
60
|
|
127
61
|
::NKF::nkf(opt, str)
|
128
62
|
end
|
@@ -133,235 +67,216 @@ module Kconv
|
|
133
67
|
#
|
134
68
|
|
135
69
|
# call-seq:
|
136
|
-
# Kconv.tojis(str)
|
70
|
+
# Kconv.tojis(str) => string
|
137
71
|
#
|
138
72
|
# Convert <code>str</code> to ISO-2022-JP
|
139
|
-
#
|
140
|
-
# *Note*
|
141
|
-
# This method decode MIME encoded string and
|
142
|
-
# convert halfwidth katakana to fullwidth katakana.
|
143
|
-
# If you don't want it, use NKF.nkf('-jxm0', str).
|
144
73
|
def tojis(str)
|
145
|
-
|
74
|
+
kconv(str, JIS)
|
146
75
|
end
|
147
76
|
module_function :tojis
|
148
77
|
|
149
78
|
# call-seq:
|
150
|
-
# Kconv.toeuc(str)
|
79
|
+
# Kconv.toeuc(str) => string
|
151
80
|
#
|
152
81
|
# Convert <code>str</code> to EUC-JP
|
153
|
-
#
|
154
|
-
# *Note*
|
155
|
-
# This method decode MIME encoded string and
|
156
|
-
# convert halfwidth katakana to fullwidth katakana.
|
157
|
-
# If you don't want it, use NKF.nkf('-exm0', str).
|
158
82
|
def toeuc(str)
|
159
|
-
|
83
|
+
kconv(str, EUC)
|
160
84
|
end
|
161
85
|
module_function :toeuc
|
162
86
|
|
163
87
|
# call-seq:
|
164
|
-
# Kconv.tosjis(str)
|
88
|
+
# Kconv.tosjis(str) => string
|
165
89
|
#
|
166
90
|
# Convert <code>str</code> to Shift_JIS
|
167
|
-
#
|
168
|
-
# *Note*
|
169
|
-
# This method decode MIME encoded string and
|
170
|
-
# convert halfwidth katakana to fullwidth katakana.
|
171
|
-
# If you don't want it, use NKF.nkf('-sxm0', str).
|
172
91
|
def tosjis(str)
|
173
|
-
|
92
|
+
kconv(str, SJIS)
|
174
93
|
end
|
175
94
|
module_function :tosjis
|
176
95
|
|
177
96
|
# call-seq:
|
178
|
-
# Kconv.toutf8(str)
|
97
|
+
# Kconv.toutf8(str) => string
|
179
98
|
#
|
180
99
|
# Convert <code>str</code> to UTF-8
|
181
|
-
#
|
182
|
-
# *Note*
|
183
|
-
# This method decode MIME encoded string and
|
184
|
-
# convert halfwidth katakana to fullwidth katakana.
|
185
|
-
# If you don't want it, use NKF.nkf('-wxm0', str).
|
186
100
|
def toutf8(str)
|
187
|
-
|
101
|
+
kconv(str, UTF8)
|
188
102
|
end
|
189
103
|
module_function :toutf8
|
190
104
|
|
191
105
|
# call-seq:
|
192
|
-
# Kconv.toutf16(str)
|
106
|
+
# Kconv.toutf16(str) => string
|
193
107
|
#
|
194
108
|
# Convert <code>str</code> to UTF-16
|
195
|
-
#
|
196
|
-
# *Note*
|
197
|
-
# This method decode MIME encoded string and
|
198
|
-
# convert halfwidth katakana to fullwidth katakana.
|
199
|
-
# If you don't want it, use NKF.nkf('-w16xm0', str).
|
200
109
|
def toutf16(str)
|
201
|
-
|
110
|
+
kconv(str, UTF16)
|
202
111
|
end
|
203
112
|
module_function :toutf16
|
204
113
|
|
114
|
+
# call-seq:
|
115
|
+
# Kconv.toutf32(str) => string
|
116
|
+
#
|
117
|
+
# Convert <code>str</code> to UTF-32
|
118
|
+
def toutf32(str)
|
119
|
+
kconv(str, UTF32)
|
120
|
+
end
|
121
|
+
module_function :toutf32
|
122
|
+
|
123
|
+
# call-seq:
|
124
|
+
# Kconv.tolocale => string
|
125
|
+
#
|
126
|
+
# Convert <code>self</code> to locale encoding
|
127
|
+
def tolocale(str)
|
128
|
+
kconv(str, Encoding.locale_charmap)
|
129
|
+
end
|
130
|
+
module_function :tolocale
|
131
|
+
|
205
132
|
#
|
206
133
|
# guess
|
207
134
|
#
|
208
135
|
|
209
136
|
# call-seq:
|
210
|
-
# Kconv.guess(str)
|
137
|
+
# Kconv.guess(str) => encoding
|
211
138
|
#
|
212
|
-
# Guess input encoding by NKF.
|
139
|
+
# Guess input encoding by NKF.guess
|
213
140
|
def guess(str)
|
214
141
|
::NKF::guess(str)
|
215
142
|
end
|
216
143
|
module_function :guess
|
217
144
|
|
218
|
-
# call-seq:
|
219
|
-
# Kconv.guess_old(str) -> integer
|
220
|
-
#
|
221
|
-
# Guess input encoding by NKF.guess1
|
222
|
-
def guess_old(str)
|
223
|
-
::NKF::guess1(str)
|
224
|
-
end
|
225
|
-
module_function :guess_old
|
226
|
-
|
227
145
|
#
|
228
146
|
# isEncoding
|
229
147
|
#
|
230
148
|
|
231
149
|
# call-seq:
|
232
|
-
# Kconv.iseuc(str)
|
150
|
+
# Kconv.iseuc(str) => true or false
|
233
151
|
#
|
234
152
|
# Returns whether input encoding is EUC-JP or not.
|
235
153
|
#
|
236
154
|
# *Note* don't expect this return value is MatchData.
|
237
155
|
def iseuc(str)
|
238
|
-
|
156
|
+
str.dup.force_encoding(EUC).valid_encoding?
|
239
157
|
end
|
240
158
|
module_function :iseuc
|
241
159
|
|
242
160
|
# call-seq:
|
243
|
-
# Kconv.issjis(str)
|
161
|
+
# Kconv.issjis(str) => true or false
|
244
162
|
#
|
245
163
|
# Returns whether input encoding is Shift_JIS or not.
|
246
|
-
#
|
247
|
-
# *Note* don't expect this return value is MatchData.
|
248
164
|
def issjis(str)
|
249
|
-
|
165
|
+
str.dup.force_encoding(SJIS).valid_encoding?
|
250
166
|
end
|
251
167
|
module_function :issjis
|
252
168
|
|
253
169
|
# call-seq:
|
254
|
-
# Kconv.
|
170
|
+
# Kconv.isjis(str) => true or false
|
171
|
+
#
|
172
|
+
# Returns whether input encoding is ISO-2022-JP or not.
|
173
|
+
def isjis(str)
|
174
|
+
/\A [\t\n\r\x20-\x7E]*
|
175
|
+
(?:
|
176
|
+
(?:\x1b \x28 I [\x21-\x7E]*
|
177
|
+
|\x1b \x28 J [\x21-\x7E]*
|
178
|
+
|\x1b \x24 @ (?:[\x21-\x7E]{2})*
|
179
|
+
|\x1b \x24 B (?:[\x21-\x7E]{2})*
|
180
|
+
|\x1b \x24 \x28 D (?:[\x21-\x7E]{2})*
|
181
|
+
)*
|
182
|
+
\x1b \x28 B [\t\n\r\x20-\x7E]*
|
183
|
+
)*
|
184
|
+
\z/nox =~ str.dup.force_encoding('BINARY') ? true : false
|
185
|
+
end
|
186
|
+
module_function :isjis
|
187
|
+
|
188
|
+
# call-seq:
|
189
|
+
# Kconv.isutf8(str) => true or false
|
255
190
|
#
|
256
191
|
# Returns whether input encoding is UTF-8 or not.
|
257
|
-
#
|
258
|
-
# *Note* don't expect this return value is MatchData.
|
259
192
|
def isutf8(str)
|
260
|
-
|
193
|
+
str.dup.force_encoding(UTF8).valid_encoding?
|
261
194
|
end
|
262
195
|
module_function :isutf8
|
263
|
-
|
264
196
|
end
|
265
197
|
|
266
198
|
class String
|
267
199
|
# call-seq:
|
268
|
-
# String#kconv(
|
200
|
+
# String#kconv(to_enc, from_enc)
|
269
201
|
#
|
270
|
-
# Convert <code>self</code> to
|
271
|
-
# <code>
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
# convert halfwidth katakana to fullwidth katakana.
|
276
|
-
# If you don't want to decode them, use NKF.nkf.
|
277
|
-
def kconv(out_code, in_code=Kconv::AUTO)
|
278
|
-
Kconv::kconv(self, out_code, in_code)
|
202
|
+
# Convert <code>self</code> to <code>to_enc</code>.
|
203
|
+
# <code>to_enc</code> and <code>from_enc</code> are given as constants of Kconv or Encoding objects.
|
204
|
+
def kconv(to_enc, from_enc=nil)
|
205
|
+
from_enc = self.encoding if !from_enc && self.encoding != Encoding.list[0]
|
206
|
+
Kconv::kconv(self, to_enc, from_enc)
|
279
207
|
end
|
280
|
-
|
208
|
+
|
281
209
|
#
|
282
210
|
# to Encoding
|
283
211
|
#
|
284
|
-
|
212
|
+
|
285
213
|
# call-seq:
|
286
|
-
# String#tojis
|
214
|
+
# String#tojis => string
|
287
215
|
#
|
288
216
|
# Convert <code>self</code> to ISO-2022-JP
|
289
|
-
#
|
290
|
-
# *Note*
|
291
|
-
# This method decode MIME encoded string and
|
292
|
-
# convert halfwidth katakana to fullwidth katakana.
|
293
|
-
# If you don't want it, use NKF.nkf('-jxm0', str).
|
294
217
|
def tojis; Kconv.tojis(self) end
|
295
218
|
|
296
219
|
# call-seq:
|
297
|
-
# String#toeuc
|
220
|
+
# String#toeuc => string
|
298
221
|
#
|
299
222
|
# Convert <code>self</code> to EUC-JP
|
300
|
-
#
|
301
|
-
# *Note*
|
302
|
-
# This method decode MIME encoded string and
|
303
|
-
# convert halfwidth katakana to fullwidth katakana.
|
304
|
-
# If you don't want it, use NKF.nkf('-exm0', str).
|
305
223
|
def toeuc; Kconv.toeuc(self) end
|
306
224
|
|
307
225
|
# call-seq:
|
308
|
-
# String#tosjis
|
226
|
+
# String#tosjis => string
|
309
227
|
#
|
310
228
|
# Convert <code>self</code> to Shift_JIS
|
311
|
-
#
|
312
|
-
# *Note*
|
313
|
-
# This method decode MIME encoded string and
|
314
|
-
# convert halfwidth katakana to fullwidth katakana.
|
315
|
-
# If you don't want it, use NKF.nkf('-sxm0', str).
|
316
229
|
def tosjis; Kconv.tosjis(self) end
|
317
230
|
|
318
231
|
# call-seq:
|
319
|
-
# String#toutf8
|
232
|
+
# String#toutf8 => string
|
320
233
|
#
|
321
234
|
# Convert <code>self</code> to UTF-8
|
322
|
-
#
|
323
|
-
# *Note*
|
324
|
-
# This method decode MIME encoded string and
|
325
|
-
# convert halfwidth katakana to fullwidth katakana.
|
326
|
-
# If you don't want it, use NKF.nkf('-wxm0', str).
|
327
235
|
def toutf8; Kconv.toutf8(self) end
|
328
236
|
|
329
237
|
# call-seq:
|
330
|
-
# String#toutf16
|
238
|
+
# String#toutf16 => string
|
331
239
|
#
|
332
240
|
# Convert <code>self</code> to UTF-16
|
333
|
-
#
|
334
|
-
# *Note*
|
335
|
-
# This method decode MIME encoded string and
|
336
|
-
# convert halfwidth katakana to fullwidth katakana.
|
337
|
-
# If you don't want it, use NKF.nkf('-w16xm0', str).
|
338
241
|
def toutf16; Kconv.toutf16(self) end
|
339
242
|
|
243
|
+
# call-seq:
|
244
|
+
# String#toutf32 => string
|
245
|
+
#
|
246
|
+
# Convert <code>self</code> to UTF-32
|
247
|
+
def toutf32; Kconv.toutf32(self) end
|
248
|
+
|
249
|
+
# call-seq:
|
250
|
+
# String#tolocale => string
|
251
|
+
#
|
252
|
+
# Convert <code>self</code> to locale encoding
|
253
|
+
def tolocale; Kconv.tolocale(self) end
|
254
|
+
|
340
255
|
#
|
341
256
|
# is Encoding
|
342
257
|
#
|
343
258
|
|
344
259
|
# call-seq:
|
345
|
-
# String#iseuc
|
260
|
+
# String#iseuc => true or false
|
346
261
|
#
|
347
262
|
# Returns whether <code>self</code>'s encoding is EUC-JP or not.
|
348
|
-
#
|
349
|
-
# *Note* don't expect this return value is MatchData.
|
350
263
|
def iseuc; Kconv.iseuc(self) end
|
351
264
|
|
352
265
|
# call-seq:
|
353
|
-
# String#issjis
|
266
|
+
# String#issjis => true or false
|
354
267
|
#
|
355
268
|
# Returns whether <code>self</code>'s encoding is Shift_JIS or not.
|
356
|
-
#
|
357
|
-
# *Note* don't expect this return value is MatchData.
|
358
269
|
def issjis; Kconv.issjis(self) end
|
359
270
|
|
360
271
|
# call-seq:
|
361
|
-
# String#
|
272
|
+
# String#isjis => true or false
|
362
273
|
#
|
363
|
-
# Returns whether <code>self</code>'s encoding is
|
274
|
+
# Returns whether <code>self</code>'s encoding is ISO-2022-JP or not.
|
275
|
+
def isjis; Kconv.isjis(self) end
|
276
|
+
|
277
|
+
# call-seq:
|
278
|
+
# String#isutf8 => true or false
|
364
279
|
#
|
365
|
-
#
|
280
|
+
# Returns whether <code>self</code>'s encoding is UTF-8 or not.
|
366
281
|
def isutf8; Kconv.isutf8(self) end
|
367
282
|
end
|
data/lib/rubysl/nkf/version.rb
CHANGED
data/rubysl-nkf.gemspec
CHANGED
@@ -17,8 +17,10 @@ Gem::Specification.new do |spec|
|
|
17
17
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
18
|
spec.require_paths = ["lib"]
|
19
19
|
|
20
|
+
spec.required_ruby_version = "~> 2.0"
|
21
|
+
|
20
22
|
spec.add_development_dependency "bundler", "~> 1.3"
|
21
23
|
spec.add_development_dependency "rake", "~> 10.0"
|
22
24
|
spec.add_development_dependency "mspec", "~> 1.5"
|
23
|
-
spec.add_development_dependency "rubysl-prettyprint", "~>
|
25
|
+
spec.add_development_dependency "rubysl-prettyprint", "~> 2.0"
|
24
26
|
end
|