rubysl-nkf 1.1.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # kconv.rb - Kanji Converter.
3
3
  #
4
- # $Id: kconv.rb 11708 2007-02-12 23:01:19Z shyouhei $
4
+ # $Id: kconv.rb 30112 2010-12-07 11:47:39Z naruse $
5
5
  #
6
6
  # ----
7
7
  #
@@ -18,9 +18,9 @@ module Kconv
18
18
  #
19
19
  # Public Constants
20
20
  #
21
-
21
+
22
22
  #Constant of Encoding
23
-
23
+
24
24
  # Auto-Detect
25
25
  AUTO = NKF::AUTO
26
26
  # ISO-2022-JP
@@ -44,85 +44,19 @@ module Kconv
44
44
  # UNKNOWN
45
45
  UNKNOWN = NKF::UNKNOWN
46
46
 
47
- #
48
- # Private Constants
49
- #
50
-
51
- # Revision of kconv.rb
52
- REVISION = %q$Revision: 11708 $
53
-
54
- #Regexp of Encoding
55
-
56
- # Regexp of Shift_JIS string (private constant)
57
- RegexpShiftjis = /\A(?:
58
- [\x00-\x7f\xa1-\xdf] |
59
- [\x81-\x9f\xe0-\xfc][\x40-\x7e\x80-\xfc]
60
- )*\z/nx
61
-
62
- # Regexp of EUC-JP string (private constant)
63
- RegexpEucjp = /\A(?:
64
- [\x00-\x7f] |
65
- \x8e [\xa1-\xdf] |
66
- \x8f [\xa1-\xfe] [\xa1-\xfe] |
67
- [\xa1-\xfe] [\xa1-\xfe]
68
- )*\z/nx
69
-
70
- # Regexp of UTF-8 string (private constant)
71
- RegexpUtf8 = /\A(?:
72
- [\x00-\x7f] |
73
- [\xc2-\xdf] [\x80-\xbf] |
74
- \xe0 [\xa0-\xbf] [\x80-\xbf] |
75
- [\xe1-\xef] [\x80-\xbf] [\x80-\xbf] |
76
- \xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] |
77
- [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] |
78
- \xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf]
79
- )*\z/nx
80
-
81
47
  #
82
48
  # Public Methods
83
49
  #
84
-
50
+
85
51
  # call-seq:
86
- # Kconv.kconv(str, out_code, in_code = Kconv::AUTO)
87
- #
88
- # Convert <code>str</code> to out_code.
89
- # <code>out_code</code> and <code>in_code</code> are given as constants of Kconv.
90
- #
91
- # *Note*
92
- # This method decode MIME encoded string and
93
- # convert halfwidth katakana to fullwidth katakana.
94
- # If you don't want to decode them, use NKF.nkf.
95
- def kconv(str, out_code, in_code = AUTO)
96
- opt = '-'
97
- case in_code
98
- when ::NKF::JIS
99
- opt << 'J'
100
- when ::NKF::EUC
101
- opt << 'E'
102
- when ::NKF::SJIS
103
- opt << 'S'
104
- when ::NKF::UTF8
105
- opt << 'W'
106
- when ::NKF::UTF16
107
- opt << 'W16'
108
- end
109
-
110
- case out_code
111
- when ::NKF::JIS
112
- opt << 'j'
113
- when ::NKF::EUC
114
- opt << 'e'
115
- when ::NKF::SJIS
116
- opt << 's'
117
- when ::NKF::UTF8
118
- opt << 'w'
119
- when ::NKF::UTF16
120
- opt << 'w16'
121
- when ::NKF::NOCONV
122
- return str
123
- end
124
-
125
- opt = '' if opt == '-'
52
+ # Kconv.kconv(str, to_enc, from_enc=nil)
53
+ #
54
+ # Convert <code>str</code> to <code>to_enc</code>.
55
+ # <code>to_enc</code> and <code>from_enc</code> are given as constants of Kconv or Encoding objects.
56
+ def kconv(str, to_enc, from_enc=nil)
57
+ opt = ''
58
+ opt += ' --ic=' + from_enc.to_s if from_enc
59
+ opt += ' --oc=' + to_enc.to_s if to_enc
126
60
 
127
61
  ::NKF::nkf(opt, str)
128
62
  end
@@ -133,235 +67,216 @@ module Kconv
133
67
  #
134
68
 
135
69
  # call-seq:
136
- # Kconv.tojis(str) -> string
70
+ # Kconv.tojis(str) => string
137
71
  #
138
72
  # Convert <code>str</code> to ISO-2022-JP
139
- #
140
- # *Note*
141
- # This method decode MIME encoded string and
142
- # convert halfwidth katakana to fullwidth katakana.
143
- # If you don't want it, use NKF.nkf('-jxm0', str).
144
73
  def tojis(str)
145
- ::NKF::nkf('-jm', str)
74
+ kconv(str, JIS)
146
75
  end
147
76
  module_function :tojis
148
77
 
149
78
  # call-seq:
150
- # Kconv.toeuc(str) -> string
79
+ # Kconv.toeuc(str) => string
151
80
  #
152
81
  # Convert <code>str</code> to EUC-JP
153
- #
154
- # *Note*
155
- # This method decode MIME encoded string and
156
- # convert halfwidth katakana to fullwidth katakana.
157
- # If you don't want it, use NKF.nkf('-exm0', str).
158
82
  def toeuc(str)
159
- ::NKF::nkf('-em', str)
83
+ kconv(str, EUC)
160
84
  end
161
85
  module_function :toeuc
162
86
 
163
87
  # call-seq:
164
- # Kconv.tosjis(str) -> string
88
+ # Kconv.tosjis(str) => string
165
89
  #
166
90
  # Convert <code>str</code> to Shift_JIS
167
- #
168
- # *Note*
169
- # This method decode MIME encoded string and
170
- # convert halfwidth katakana to fullwidth katakana.
171
- # If you don't want it, use NKF.nkf('-sxm0', str).
172
91
  def tosjis(str)
173
- ::NKF::nkf('-sm', str)
92
+ kconv(str, SJIS)
174
93
  end
175
94
  module_function :tosjis
176
95
 
177
96
  # call-seq:
178
- # Kconv.toutf8(str) -> string
97
+ # Kconv.toutf8(str) => string
179
98
  #
180
99
  # Convert <code>str</code> to UTF-8
181
- #
182
- # *Note*
183
- # This method decode MIME encoded string and
184
- # convert halfwidth katakana to fullwidth katakana.
185
- # If you don't want it, use NKF.nkf('-wxm0', str).
186
100
  def toutf8(str)
187
- ::NKF::nkf('-wm', str)
101
+ kconv(str, UTF8)
188
102
  end
189
103
  module_function :toutf8
190
104
 
191
105
  # call-seq:
192
- # Kconv.toutf16(str) -> string
106
+ # Kconv.toutf16(str) => string
193
107
  #
194
108
  # Convert <code>str</code> to UTF-16
195
- #
196
- # *Note*
197
- # This method decode MIME encoded string and
198
- # convert halfwidth katakana to fullwidth katakana.
199
- # If you don't want it, use NKF.nkf('-w16xm0', str).
200
109
  def toutf16(str)
201
- ::NKF::nkf('-w16m', str)
110
+ kconv(str, UTF16)
202
111
  end
203
112
  module_function :toutf16
204
113
 
114
+ # call-seq:
115
+ # Kconv.toutf32(str) => string
116
+ #
117
+ # Convert <code>str</code> to UTF-32
118
+ def toutf32(str)
119
+ kconv(str, UTF32)
120
+ end
121
+ module_function :toutf32
122
+
123
+ # call-seq:
124
+ # Kconv.tolocale => string
125
+ #
126
+ # Convert <code>self</code> to locale encoding
127
+ def tolocale(str)
128
+ kconv(str, Encoding.locale_charmap)
129
+ end
130
+ module_function :tolocale
131
+
205
132
  #
206
133
  # guess
207
134
  #
208
135
 
209
136
  # call-seq:
210
- # Kconv.guess(str) -> integer
137
+ # Kconv.guess(str) => encoding
211
138
  #
212
- # Guess input encoding by NKF.guess2
139
+ # Guess input encoding by NKF.guess
213
140
  def guess(str)
214
141
  ::NKF::guess(str)
215
142
  end
216
143
  module_function :guess
217
144
 
218
- # call-seq:
219
- # Kconv.guess_old(str) -> integer
220
- #
221
- # Guess input encoding by NKF.guess1
222
- def guess_old(str)
223
- ::NKF::guess1(str)
224
- end
225
- module_function :guess_old
226
-
227
145
  #
228
146
  # isEncoding
229
147
  #
230
148
 
231
149
  # call-seq:
232
- # Kconv.iseuc(str) -> obj or nil
150
+ # Kconv.iseuc(str) => true or false
233
151
  #
234
152
  # Returns whether input encoding is EUC-JP or not.
235
153
  #
236
154
  # *Note* don't expect this return value is MatchData.
237
155
  def iseuc(str)
238
- RegexpEucjp.match( str )
156
+ str.dup.force_encoding(EUC).valid_encoding?
239
157
  end
240
158
  module_function :iseuc
241
159
 
242
160
  # call-seq:
243
- # Kconv.issjis(str) -> obj or nil
161
+ # Kconv.issjis(str) => true or false
244
162
  #
245
163
  # Returns whether input encoding is Shift_JIS or not.
246
- #
247
- # *Note* don't expect this return value is MatchData.
248
164
  def issjis(str)
249
- RegexpShiftjis.match( str )
165
+ str.dup.force_encoding(SJIS).valid_encoding?
250
166
  end
251
167
  module_function :issjis
252
168
 
253
169
  # call-seq:
254
- # Kconv.isutf8(str) -> obj or nil
170
+ # Kconv.isjis(str) => true or false
171
+ #
172
+ # Returns whether input encoding is ISO-2022-JP or not.
173
+ def isjis(str)
174
+ /\A [\t\n\r\x20-\x7E]*
175
+ (?:
176
+ (?:\x1b \x28 I [\x21-\x7E]*
177
+ |\x1b \x28 J [\x21-\x7E]*
178
+ |\x1b \x24 @ (?:[\x21-\x7E]{2})*
179
+ |\x1b \x24 B (?:[\x21-\x7E]{2})*
180
+ |\x1b \x24 \x28 D (?:[\x21-\x7E]{2})*
181
+ )*
182
+ \x1b \x28 B [\t\n\r\x20-\x7E]*
183
+ )*
184
+ \z/nox =~ str.dup.force_encoding('BINARY') ? true : false
185
+ end
186
+ module_function :isjis
187
+
188
+ # call-seq:
189
+ # Kconv.isutf8(str) => true or false
255
190
  #
256
191
  # Returns whether input encoding is UTF-8 or not.
257
- #
258
- # *Note* don't expect this return value is MatchData.
259
192
  def isutf8(str)
260
- RegexpUtf8.match( str )
193
+ str.dup.force_encoding(UTF8).valid_encoding?
261
194
  end
262
195
  module_function :isutf8
263
-
264
196
  end
265
197
 
266
198
  class String
267
199
  # call-seq:
268
- # String#kconv(out_code, in_code = Kconv::AUTO)
200
+ # String#kconv(to_enc, from_enc)
269
201
  #
270
- # Convert <code>self</code> to out_code.
271
- # <code>out_code</code> and <code>in_code</code> are given as constants of Kconv.
272
- #
273
- # *Note*
274
- # This method decode MIME encoded string and
275
- # convert halfwidth katakana to fullwidth katakana.
276
- # If you don't want to decode them, use NKF.nkf.
277
- def kconv(out_code, in_code=Kconv::AUTO)
278
- Kconv::kconv(self, out_code, in_code)
202
+ # Convert <code>self</code> to <code>to_enc</code>.
203
+ # <code>to_enc</code> and <code>from_enc</code> are given as constants of Kconv or Encoding objects.
204
+ def kconv(to_enc, from_enc=nil)
205
+ from_enc = self.encoding if !from_enc && self.encoding != Encoding.list[0]
206
+ Kconv::kconv(self, to_enc, from_enc)
279
207
  end
280
-
208
+
281
209
  #
282
210
  # to Encoding
283
211
  #
284
-
212
+
285
213
  # call-seq:
286
- # String#tojis -> string
214
+ # String#tojis => string
287
215
  #
288
216
  # Convert <code>self</code> to ISO-2022-JP
289
- #
290
- # *Note*
291
- # This method decode MIME encoded string and
292
- # convert halfwidth katakana to fullwidth katakana.
293
- # If you don't want it, use NKF.nkf('-jxm0', str).
294
217
  def tojis; Kconv.tojis(self) end
295
218
 
296
219
  # call-seq:
297
- # String#toeuc -> string
220
+ # String#toeuc => string
298
221
  #
299
222
  # Convert <code>self</code> to EUC-JP
300
- #
301
- # *Note*
302
- # This method decode MIME encoded string and
303
- # convert halfwidth katakana to fullwidth katakana.
304
- # If you don't want it, use NKF.nkf('-exm0', str).
305
223
  def toeuc; Kconv.toeuc(self) end
306
224
 
307
225
  # call-seq:
308
- # String#tosjis -> string
226
+ # String#tosjis => string
309
227
  #
310
228
  # Convert <code>self</code> to Shift_JIS
311
- #
312
- # *Note*
313
- # This method decode MIME encoded string and
314
- # convert halfwidth katakana to fullwidth katakana.
315
- # If you don't want it, use NKF.nkf('-sxm0', str).
316
229
  def tosjis; Kconv.tosjis(self) end
317
230
 
318
231
  # call-seq:
319
- # String#toutf8 -> string
232
+ # String#toutf8 => string
320
233
  #
321
234
  # Convert <code>self</code> to UTF-8
322
- #
323
- # *Note*
324
- # This method decode MIME encoded string and
325
- # convert halfwidth katakana to fullwidth katakana.
326
- # If you don't want it, use NKF.nkf('-wxm0', str).
327
235
  def toutf8; Kconv.toutf8(self) end
328
236
 
329
237
  # call-seq:
330
- # String#toutf16 -> string
238
+ # String#toutf16 => string
331
239
  #
332
240
  # Convert <code>self</code> to UTF-16
333
- #
334
- # *Note*
335
- # This method decode MIME encoded string and
336
- # convert halfwidth katakana to fullwidth katakana.
337
- # If you don't want it, use NKF.nkf('-w16xm0', str).
338
241
  def toutf16; Kconv.toutf16(self) end
339
242
 
243
+ # call-seq:
244
+ # String#toutf32 => string
245
+ #
246
+ # Convert <code>self</code> to UTF-32
247
+ def toutf32; Kconv.toutf32(self) end
248
+
249
+ # call-seq:
250
+ # String#tolocale => string
251
+ #
252
+ # Convert <code>self</code> to locale encoding
253
+ def tolocale; Kconv.tolocale(self) end
254
+
340
255
  #
341
256
  # is Encoding
342
257
  #
343
258
 
344
259
  # call-seq:
345
- # String#iseuc -> obj or nil
260
+ # String#iseuc => true or false
346
261
  #
347
262
  # Returns whether <code>self</code>'s encoding is EUC-JP or not.
348
- #
349
- # *Note* don't expect this return value is MatchData.
350
263
  def iseuc; Kconv.iseuc(self) end
351
264
 
352
265
  # call-seq:
353
- # String#issjis -> obj or nil
266
+ # String#issjis => true or false
354
267
  #
355
268
  # Returns whether <code>self</code>'s encoding is Shift_JIS or not.
356
- #
357
- # *Note* don't expect this return value is MatchData.
358
269
  def issjis; Kconv.issjis(self) end
359
270
 
360
271
  # call-seq:
361
- # String#isutf8 -> obj or nil
272
+ # String#isjis => true or false
362
273
  #
363
- # Returns whether <code>self</code>'s encoding is UTF-8 or not.
274
+ # Returns whether <code>self</code>'s encoding is ISO-2022-JP or not.
275
+ def isjis; Kconv.isjis(self) end
276
+
277
+ # call-seq:
278
+ # String#isutf8 => true or false
364
279
  #
365
- # *Note* don't expect this return value is MatchData.
280
+ # Returns whether <code>self</code>'s encoding is UTF-8 or not.
366
281
  def isutf8; Kconv.isutf8(self) end
367
282
  end
@@ -1,5 +1,5 @@
1
1
  module RubySL
2
2
  module NKF
3
- VERSION = "1.1.0"
3
+ VERSION = "2.0.1"
4
4
  end
5
5
  end
@@ -17,8 +17,10 @@ Gem::Specification.new do |spec|
17
17
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
18
  spec.require_paths = ["lib"]
19
19
 
20
+ spec.required_ruby_version = "~> 2.0"
21
+
20
22
  spec.add_development_dependency "bundler", "~> 1.3"
21
23
  spec.add_development_dependency "rake", "~> 10.0"
22
24
  spec.add_development_dependency "mspec", "~> 1.5"
23
- spec.add_development_dependency "rubysl-prettyprint", "~> 1.0"
25
+ spec.add_development_dependency "rubysl-prettyprint", "~> 2.0"
24
26
  end