rubysl-nkf 1.1.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # kconv.rb - Kanji Converter.
3
3
  #
4
- # $Id: kconv.rb 11708 2007-02-12 23:01:19Z shyouhei $
4
+ # $Id: kconv.rb 30112 2010-12-07 11:47:39Z naruse $
5
5
  #
6
6
  # ----
7
7
  #
@@ -18,9 +18,9 @@ module Kconv
18
18
  #
19
19
  # Public Constants
20
20
  #
21
-
21
+
22
22
  #Constant of Encoding
23
-
23
+
24
24
  # Auto-Detect
25
25
  AUTO = NKF::AUTO
26
26
  # ISO-2022-JP
@@ -44,85 +44,19 @@ module Kconv
44
44
  # UNKNOWN
45
45
  UNKNOWN = NKF::UNKNOWN
46
46
 
47
- #
48
- # Private Constants
49
- #
50
-
51
- # Revision of kconv.rb
52
- REVISION = %q$Revision: 11708 $
53
-
54
- #Regexp of Encoding
55
-
56
- # Regexp of Shift_JIS string (private constant)
57
- RegexpShiftjis = /\A(?:
58
- [\x00-\x7f\xa1-\xdf] |
59
- [\x81-\x9f\xe0-\xfc][\x40-\x7e\x80-\xfc]
60
- )*\z/nx
61
-
62
- # Regexp of EUC-JP string (private constant)
63
- RegexpEucjp = /\A(?:
64
- [\x00-\x7f] |
65
- \x8e [\xa1-\xdf] |
66
- \x8f [\xa1-\xfe] [\xa1-\xfe] |
67
- [\xa1-\xfe] [\xa1-\xfe]
68
- )*\z/nx
69
-
70
- # Regexp of UTF-8 string (private constant)
71
- RegexpUtf8 = /\A(?:
72
- [\x00-\x7f] |
73
- [\xc2-\xdf] [\x80-\xbf] |
74
- \xe0 [\xa0-\xbf] [\x80-\xbf] |
75
- [\xe1-\xef] [\x80-\xbf] [\x80-\xbf] |
76
- \xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] |
77
- [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] |
78
- \xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf]
79
- )*\z/nx
80
-
81
47
  #
82
48
  # Public Methods
83
49
  #
84
-
50
+
85
51
  # call-seq:
86
- # Kconv.kconv(str, out_code, in_code = Kconv::AUTO)
87
- #
88
- # Convert <code>str</code> to out_code.
89
- # <code>out_code</code> and <code>in_code</code> are given as constants of Kconv.
90
- #
91
- # *Note*
92
- # This method decode MIME encoded string and
93
- # convert halfwidth katakana to fullwidth katakana.
94
- # If you don't want to decode them, use NKF.nkf.
95
- def kconv(str, out_code, in_code = AUTO)
96
- opt = '-'
97
- case in_code
98
- when ::NKF::JIS
99
- opt << 'J'
100
- when ::NKF::EUC
101
- opt << 'E'
102
- when ::NKF::SJIS
103
- opt << 'S'
104
- when ::NKF::UTF8
105
- opt << 'W'
106
- when ::NKF::UTF16
107
- opt << 'W16'
108
- end
109
-
110
- case out_code
111
- when ::NKF::JIS
112
- opt << 'j'
113
- when ::NKF::EUC
114
- opt << 'e'
115
- when ::NKF::SJIS
116
- opt << 's'
117
- when ::NKF::UTF8
118
- opt << 'w'
119
- when ::NKF::UTF16
120
- opt << 'w16'
121
- when ::NKF::NOCONV
122
- return str
123
- end
124
-
125
- opt = '' if opt == '-'
52
+ # Kconv.kconv(str, to_enc, from_enc=nil)
53
+ #
54
+ # Convert <code>str</code> to <code>to_enc</code>.
55
+ # <code>to_enc</code> and <code>from_enc</code> are given as constants of Kconv or Encoding objects.
56
+ def kconv(str, to_enc, from_enc=nil)
57
+ opt = ''
58
+ opt += ' --ic=' + from_enc.to_s if from_enc
59
+ opt += ' --oc=' + to_enc.to_s if to_enc
126
60
 
127
61
  ::NKF::nkf(opt, str)
128
62
  end
@@ -133,235 +67,216 @@ module Kconv
133
67
  #
134
68
 
135
69
  # call-seq:
136
- # Kconv.tojis(str) -> string
70
+ # Kconv.tojis(str) => string
137
71
  #
138
72
  # Convert <code>str</code> to ISO-2022-JP
139
- #
140
- # *Note*
141
- # This method decode MIME encoded string and
142
- # convert halfwidth katakana to fullwidth katakana.
143
- # If you don't want it, use NKF.nkf('-jxm0', str).
144
73
  def tojis(str)
145
- ::NKF::nkf('-jm', str)
74
+ kconv(str, JIS)
146
75
  end
147
76
  module_function :tojis
148
77
 
149
78
  # call-seq:
150
- # Kconv.toeuc(str) -> string
79
+ # Kconv.toeuc(str) => string
151
80
  #
152
81
  # Convert <code>str</code> to EUC-JP
153
- #
154
- # *Note*
155
- # This method decode MIME encoded string and
156
- # convert halfwidth katakana to fullwidth katakana.
157
- # If you don't want it, use NKF.nkf('-exm0', str).
158
82
  def toeuc(str)
159
- ::NKF::nkf('-em', str)
83
+ kconv(str, EUC)
160
84
  end
161
85
  module_function :toeuc
162
86
 
163
87
  # call-seq:
164
- # Kconv.tosjis(str) -> string
88
+ # Kconv.tosjis(str) => string
165
89
  #
166
90
  # Convert <code>str</code> to Shift_JIS
167
- #
168
- # *Note*
169
- # This method decode MIME encoded string and
170
- # convert halfwidth katakana to fullwidth katakana.
171
- # If you don't want it, use NKF.nkf('-sxm0', str).
172
91
  def tosjis(str)
173
- ::NKF::nkf('-sm', str)
92
+ kconv(str, SJIS)
174
93
  end
175
94
  module_function :tosjis
176
95
 
177
96
  # call-seq:
178
- # Kconv.toutf8(str) -> string
97
+ # Kconv.toutf8(str) => string
179
98
  #
180
99
  # Convert <code>str</code> to UTF-8
181
- #
182
- # *Note*
183
- # This method decode MIME encoded string and
184
- # convert halfwidth katakana to fullwidth katakana.
185
- # If you don't want it, use NKF.nkf('-wxm0', str).
186
100
  def toutf8(str)
187
- ::NKF::nkf('-wm', str)
101
+ kconv(str, UTF8)
188
102
  end
189
103
  module_function :toutf8
190
104
 
191
105
  # call-seq:
192
- # Kconv.toutf16(str) -> string
106
+ # Kconv.toutf16(str) => string
193
107
  #
194
108
  # Convert <code>str</code> to UTF-16
195
- #
196
- # *Note*
197
- # This method decode MIME encoded string and
198
- # convert halfwidth katakana to fullwidth katakana.
199
- # If you don't want it, use NKF.nkf('-w16xm0', str).
200
109
  def toutf16(str)
201
- ::NKF::nkf('-w16m', str)
110
+ kconv(str, UTF16)
202
111
  end
203
112
  module_function :toutf16
204
113
 
114
+ # call-seq:
115
+ # Kconv.toutf32(str) => string
116
+ #
117
+ # Convert <code>str</code> to UTF-32
118
+ def toutf32(str)
119
+ kconv(str, UTF32)
120
+ end
121
+ module_function :toutf32
122
+
123
+ # call-seq:
124
+ # Kconv.tolocale => string
125
+ #
126
+ # Convert <code>self</code> to locale encoding
127
+ def tolocale(str)
128
+ kconv(str, Encoding.locale_charmap)
129
+ end
130
+ module_function :tolocale
131
+
205
132
  #
206
133
  # guess
207
134
  #
208
135
 
209
136
  # call-seq:
210
- # Kconv.guess(str) -> integer
137
+ # Kconv.guess(str) => encoding
211
138
  #
212
- # Guess input encoding by NKF.guess2
139
+ # Guess input encoding by NKF.guess
213
140
  def guess(str)
214
141
  ::NKF::guess(str)
215
142
  end
216
143
  module_function :guess
217
144
 
218
- # call-seq:
219
- # Kconv.guess_old(str) -> integer
220
- #
221
- # Guess input encoding by NKF.guess1
222
- def guess_old(str)
223
- ::NKF::guess1(str)
224
- end
225
- module_function :guess_old
226
-
227
145
  #
228
146
  # isEncoding
229
147
  #
230
148
 
231
149
  # call-seq:
232
- # Kconv.iseuc(str) -> obj or nil
150
+ # Kconv.iseuc(str) => true or false
233
151
  #
234
152
  # Returns whether input encoding is EUC-JP or not.
235
153
  #
236
154
  # *Note* don't expect this return value is MatchData.
237
155
  def iseuc(str)
238
- RegexpEucjp.match( str )
156
+ str.dup.force_encoding(EUC).valid_encoding?
239
157
  end
240
158
  module_function :iseuc
241
159
 
242
160
  # call-seq:
243
- # Kconv.issjis(str) -> obj or nil
161
+ # Kconv.issjis(str) => true or false
244
162
  #
245
163
  # Returns whether input encoding is Shift_JIS or not.
246
- #
247
- # *Note* don't expect this return value is MatchData.
248
164
  def issjis(str)
249
- RegexpShiftjis.match( str )
165
+ str.dup.force_encoding(SJIS).valid_encoding?
250
166
  end
251
167
  module_function :issjis
252
168
 
253
169
  # call-seq:
254
- # Kconv.isutf8(str) -> obj or nil
170
+ # Kconv.isjis(str) => true or false
171
+ #
172
+ # Returns whether input encoding is ISO-2022-JP or not.
173
+ def isjis(str)
174
+ /\A [\t\n\r\x20-\x7E]*
175
+ (?:
176
+ (?:\x1b \x28 I [\x21-\x7E]*
177
+ |\x1b \x28 J [\x21-\x7E]*
178
+ |\x1b \x24 @ (?:[\x21-\x7E]{2})*
179
+ |\x1b \x24 B (?:[\x21-\x7E]{2})*
180
+ |\x1b \x24 \x28 D (?:[\x21-\x7E]{2})*
181
+ )*
182
+ \x1b \x28 B [\t\n\r\x20-\x7E]*
183
+ )*
184
+ \z/nox =~ str.dup.force_encoding('BINARY') ? true : false
185
+ end
186
+ module_function :isjis
187
+
188
+ # call-seq:
189
+ # Kconv.isutf8(str) => true or false
255
190
  #
256
191
  # Returns whether input encoding is UTF-8 or not.
257
- #
258
- # *Note* don't expect this return value is MatchData.
259
192
  def isutf8(str)
260
- RegexpUtf8.match( str )
193
+ str.dup.force_encoding(UTF8).valid_encoding?
261
194
  end
262
195
  module_function :isutf8
263
-
264
196
  end
265
197
 
266
198
  class String
267
199
  # call-seq:
268
- # String#kconv(out_code, in_code = Kconv::AUTO)
200
+ # String#kconv(to_enc, from_enc)
269
201
  #
270
- # Convert <code>self</code> to out_code.
271
- # <code>out_code</code> and <code>in_code</code> are given as constants of Kconv.
272
- #
273
- # *Note*
274
- # This method decode MIME encoded string and
275
- # convert halfwidth katakana to fullwidth katakana.
276
- # If you don't want to decode them, use NKF.nkf.
277
- def kconv(out_code, in_code=Kconv::AUTO)
278
- Kconv::kconv(self, out_code, in_code)
202
+ # Convert <code>self</code> to <code>to_enc</code>.
203
+ # <code>to_enc</code> and <code>from_enc</code> are given as constants of Kconv or Encoding objects.
204
+ def kconv(to_enc, from_enc=nil)
205
+ from_enc = self.encoding if !from_enc && self.encoding != Encoding.list[0]
206
+ Kconv::kconv(self, to_enc, from_enc)
279
207
  end
280
-
208
+
281
209
  #
282
210
  # to Encoding
283
211
  #
284
-
212
+
285
213
  # call-seq:
286
- # String#tojis -> string
214
+ # String#tojis => string
287
215
  #
288
216
  # Convert <code>self</code> to ISO-2022-JP
289
- #
290
- # *Note*
291
- # This method decode MIME encoded string and
292
- # convert halfwidth katakana to fullwidth katakana.
293
- # If you don't want it, use NKF.nkf('-jxm0', str).
294
217
  def tojis; Kconv.tojis(self) end
295
218
 
296
219
  # call-seq:
297
- # String#toeuc -> string
220
+ # String#toeuc => string
298
221
  #
299
222
  # Convert <code>self</code> to EUC-JP
300
- #
301
- # *Note*
302
- # This method decode MIME encoded string and
303
- # convert halfwidth katakana to fullwidth katakana.
304
- # If you don't want it, use NKF.nkf('-exm0', str).
305
223
  def toeuc; Kconv.toeuc(self) end
306
224
 
307
225
  # call-seq:
308
- # String#tosjis -> string
226
+ # String#tosjis => string
309
227
  #
310
228
  # Convert <code>self</code> to Shift_JIS
311
- #
312
- # *Note*
313
- # This method decode MIME encoded string and
314
- # convert halfwidth katakana to fullwidth katakana.
315
- # If you don't want it, use NKF.nkf('-sxm0', str).
316
229
  def tosjis; Kconv.tosjis(self) end
317
230
 
318
231
  # call-seq:
319
- # String#toutf8 -> string
232
+ # String#toutf8 => string
320
233
  #
321
234
  # Convert <code>self</code> to UTF-8
322
- #
323
- # *Note*
324
- # This method decode MIME encoded string and
325
- # convert halfwidth katakana to fullwidth katakana.
326
- # If you don't want it, use NKF.nkf('-wxm0', str).
327
235
  def toutf8; Kconv.toutf8(self) end
328
236
 
329
237
  # call-seq:
330
- # String#toutf16 -> string
238
+ # String#toutf16 => string
331
239
  #
332
240
  # Convert <code>self</code> to UTF-16
333
- #
334
- # *Note*
335
- # This method decode MIME encoded string and
336
- # convert halfwidth katakana to fullwidth katakana.
337
- # If you don't want it, use NKF.nkf('-w16xm0', str).
338
241
  def toutf16; Kconv.toutf16(self) end
339
242
 
243
+ # call-seq:
244
+ # String#toutf32 => string
245
+ #
246
+ # Convert <code>self</code> to UTF-32
247
+ def toutf32; Kconv.toutf32(self) end
248
+
249
+ # call-seq:
250
+ # String#tolocale => string
251
+ #
252
+ # Convert <code>self</code> to locale encoding
253
+ def tolocale; Kconv.tolocale(self) end
254
+
340
255
  #
341
256
  # is Encoding
342
257
  #
343
258
 
344
259
  # call-seq:
345
- # String#iseuc -> obj or nil
260
+ # String#iseuc => true or false
346
261
  #
347
262
  # Returns whether <code>self</code>'s encoding is EUC-JP or not.
348
- #
349
- # *Note* don't expect this return value is MatchData.
350
263
  def iseuc; Kconv.iseuc(self) end
351
264
 
352
265
  # call-seq:
353
- # String#issjis -> obj or nil
266
+ # String#issjis => true or false
354
267
  #
355
268
  # Returns whether <code>self</code>'s encoding is Shift_JIS or not.
356
- #
357
- # *Note* don't expect this return value is MatchData.
358
269
  def issjis; Kconv.issjis(self) end
359
270
 
360
271
  # call-seq:
361
- # String#isutf8 -> obj or nil
272
+ # String#isjis => true or false
362
273
  #
363
- # Returns whether <code>self</code>'s encoding is UTF-8 or not.
274
+ # Returns whether <code>self</code>'s encoding is ISO-2022-JP or not.
275
+ def isjis; Kconv.isjis(self) end
276
+
277
+ # call-seq:
278
+ # String#isutf8 => true or false
364
279
  #
365
- # *Note* don't expect this return value is MatchData.
280
+ # Returns whether <code>self</code>'s encoding is UTF-8 or not.
366
281
  def isutf8; Kconv.isutf8(self) end
367
282
  end
@@ -1,5 +1,5 @@
1
1
  module RubySL
2
2
  module NKF
3
- VERSION = "1.1.0"
3
+ VERSION = "2.0.1"
4
4
  end
5
5
  end
@@ -17,8 +17,10 @@ Gem::Specification.new do |spec|
17
17
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
18
  spec.require_paths = ["lib"]
19
19
 
20
+ spec.required_ruby_version = "~> 2.0"
21
+
20
22
  spec.add_development_dependency "bundler", "~> 1.3"
21
23
  spec.add_development_dependency "rake", "~> 10.0"
22
24
  spec.add_development_dependency "mspec", "~> 1.5"
23
- spec.add_development_dependency "rubysl-prettyprint", "~> 1.0"
25
+ spec.add_development_dependency "rubysl-prettyprint", "~> 2.0"
24
26
  end