uchardet 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +3 -0
- data/.travis.yml +5 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +25 -0
- data/LICENSE.txt +21 -0
- data/README.md +60 -0
- data/Rakefile +19 -18
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/{bin → exe}/uchardet +0 -2
- data/ext/extconf.rb +12 -0
- data/ext/{uchardet/uchardet.c → uchardet.c} +55 -45
- data/lib/uchardet.rb +2 -18
- data/lib/uchardet/cli.rb +11 -7
- data/lib/uchardet/version.rb +3 -0
- data/uchardet.gemspec +27 -0
- metadata +102 -72
- data/History.txt +0 -11
- data/Manifest.txt +0 -18
- data/README.rdoc +0 -52
- data/ext/uchardet/extconf.rb +0 -12
- data/script/console +0 -10
- data/script/destroy +0 -14
- data/script/generate +0 -14
- data/tasks/extconf.rake +0 -13
- data/tasks/extconf/uchardet.rake +0 -43
- data/test/test_helper.rb +0 -3
- data/test/test_uchardet.rb +0 -22
- data/test/test_uchardet_cli.rb +0 -14
- data/test/test_uchardet_extn.rb +0 -101
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3f88bdf62e92c58a707c9099d3b00128ba1e944d7ed242cc756401e4d6ccdecc
|
4
|
+
data.tar.gz: 3e7ad2051d31269fdffde17c3f247836c8c69e525b6da8c27b0e8b7c58ef9880
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e0e698ab4a3eec93dc0cbde2aff53728fb98947266703f34ab29c93ede539b0ef32a4e32589d62b0bff319e485e6c2f7b4dad38d8299d98b97ab32167a4749fc
|
7
|
+
data.tar.gz: 4bd4efbb61bf4cad064d5c78b3b8d412790691bdfea58dd062c7e4168ccd37791b56a62803e9604efa03527b84e6bbdcf8607995ef2c9ea913397779636009ce
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
uchardet (0.2.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
minitest (5.11.3)
|
10
|
+
rake (10.5.0)
|
11
|
+
rake-compiler (1.0.4)
|
12
|
+
rake
|
13
|
+
|
14
|
+
PLATFORMS
|
15
|
+
ruby
|
16
|
+
|
17
|
+
DEPENDENCIES
|
18
|
+
bundler (~> 1.16)
|
19
|
+
minitest (~> 5.0)
|
20
|
+
rake (~> 10.0)
|
21
|
+
rake-compiler (~> 1.0)
|
22
|
+
uchardet!
|
23
|
+
|
24
|
+
BUNDLED WITH
|
25
|
+
1.16.2
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2009-2018 Dmitri Goutnik
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# Uchardet
|
2
|
+
|
3
|
+
Fast character set encoding detection using International Components for Unicode library: [International Components for Unicode](http://site.icu-project.org/)
|
4
|
+
|
5
|
+
* https://rubygems.org/gems/uchardet
|
6
|
+
* https://github.com/dmgk/uchardet
|
7
|
+
* https://www.rubydoc.info/gems/uchardet/
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
Add this line to your application's Gemfile:
|
12
|
+
|
13
|
+
```ruby
|
14
|
+
gem 'uchardet'
|
15
|
+
```
|
16
|
+
|
17
|
+
And then execute:
|
18
|
+
|
19
|
+
$ bundle
|
20
|
+
|
21
|
+
Or install it yourself as:
|
22
|
+
|
23
|
+
$ gem install uchardet
|
24
|
+
|
25
|
+
## Usage
|
26
|
+
|
27
|
+
```ruby
|
28
|
+
require 'open-uri'
|
29
|
+
require 'uchardet'
|
30
|
+
|
31
|
+
text = open('https://raw.githubusercontent.com/dmgk/uchardet/master/test/samples/shift_jis.txt').read
|
32
|
+
encoding = ICU::UCharsetDetector.detect(text)
|
33
|
+
encoding # => {:encoding=>"Shift_JIS", :confidence=>100, :language=>"ja"}
|
34
|
+
```
|
35
|
+
|
36
|
+
From command line:
|
37
|
+
|
38
|
+
```
|
39
|
+
$ uchardet
|
40
|
+
|
41
|
+
Usage: uchardet [options] file
|
42
|
+
-l, --list Display list of detectable character sets.
|
43
|
+
-s, --strip Strip HTML or XML markup before detection.
|
44
|
+
-e, --encoding Hint the charset detector about possible encoding.
|
45
|
+
-a, --all Show all matching encodings.
|
46
|
+
-h, --help Show this help message.
|
47
|
+
-v, --version Show version.
|
48
|
+
|
49
|
+
$ uchardet `which uchardet`
|
50
|
+
|
51
|
+
ISO-8859-1 (confidence 25%)
|
52
|
+
```
|
53
|
+
|
54
|
+
## Contributing
|
55
|
+
|
56
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/dmgk/uchardet
|
57
|
+
|
58
|
+
## License
|
59
|
+
|
60
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
CHANGED
@@ -1,23 +1,24 @@
|
|
1
|
-
require '
|
2
|
-
|
3
|
-
require '
|
4
|
-
require 'fileutils'
|
5
|
-
require './lib/uchardet'
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rake/extensiontask'
|
3
|
+
require 'rake/testtask'
|
6
4
|
|
7
|
-
|
5
|
+
GEMSPEC = Gem::Specification.load("uchardet.gemspec")
|
8
6
|
|
9
|
-
#
|
10
|
-
#
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
7
|
+
# Rake::ExtensionTask.new(:uchardet_ext) do |t|
|
8
|
+
# t.lib_dir = 'lib/uchardet'
|
9
|
+
# end
|
10
|
+
|
11
|
+
Rake::ExtensionTask.new(:uchardet_ext, GEMSPEC) do |t|
|
12
|
+
t.ext_dir = 'ext'
|
13
|
+
end
|
14
|
+
|
15
|
+
Rake::TestTask.new(:test) do |t|
|
16
|
+
t.libs << 'test'
|
17
|
+
t.libs << 'lib'
|
18
|
+
t.test_files = FileList['test/**/*_test.rb']
|
16
19
|
end
|
17
20
|
|
18
|
-
|
19
|
-
|
21
|
+
task build: :compile
|
22
|
+
task test: :compile
|
20
23
|
|
21
|
-
|
22
|
-
# remove_task :default
|
23
|
-
# task :default => [:spec, :features]
|
24
|
+
task default: :test
|
data/bin/console
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "uchardet"
|
5
|
+
require 'open-uri'
|
6
|
+
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
9
|
+
|
10
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
11
|
+
# require "pry"
|
12
|
+
# Pry.start
|
13
|
+
|
14
|
+
require "irb"
|
15
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/{bin → exe}/uchardet
RENAMED
data/ext/extconf.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'mkmf'
|
2
|
+
|
3
|
+
icu_config = `which icu-config`.strip
|
4
|
+
if icu_config.empty?
|
5
|
+
abort %q{Could not find ICU libraries and/or development tools. Try installing "icu-devtools" or "icu" package.}
|
6
|
+
end
|
7
|
+
|
8
|
+
$LIBS << ' ' + `#{icu_config} --ldflags-system`.chomp
|
9
|
+
$LIBS << ' ' + `#{icu_config} --ldflags-libsonly`.chomp
|
10
|
+
$LDFLAGS << ' ' + `#{icu_config} --ldflags-searchpath`.chomp
|
11
|
+
|
12
|
+
create_makefile('uchardet_ext')
|
@@ -56,7 +56,7 @@ UCharsetDetector_get_input_filtered(VALUE self)
|
|
56
56
|
*
|
57
57
|
* Enable filtering of input text. If filtering is enabled,
|
58
58
|
* text within angle brackets ("<" and ">") will be removed
|
59
|
-
* before detection, which will remove most HTML or
|
59
|
+
* before detection, which will remove most HTML or XML markup.
|
60
60
|
*/
|
61
61
|
static VALUE
|
62
62
|
UCharsetDetector_set_input_filtered(VALUE self, VALUE flag)
|
@@ -110,7 +110,7 @@ UCharsetDetector_get_declared_encoding(VALUE self)
|
|
110
110
|
*
|
111
111
|
* Set the declared encoding for charset detection.
|
112
112
|
* The declared encoding of an input text is an encoding obtained
|
113
|
-
* by the user from an
|
113
|
+
* by the user from an HTTP header or XML declaration or similar source that
|
114
114
|
* can be provided as an additional hint to the charset detector.
|
115
115
|
*/
|
116
116
|
static VALUE
|
@@ -123,12 +123,12 @@ static void
|
|
123
123
|
set_text(VALUE self, VALUE text)
|
124
124
|
{
|
125
125
|
if (!NIL_P(text)) {
|
126
|
-
text = StringValue(text);
|
127
|
-
|
128
126
|
UErrorCode status = U_ZERO_ERROR;
|
129
127
|
UCharsetDetector *detector;
|
128
|
+
|
130
129
|
Data_Get_Struct(self, UCharsetDetector, detector);
|
131
|
-
|
130
|
+
|
131
|
+
text = StringValue(text);
|
132
132
|
ucsdet_setText(detector, StringValuePtr(text), RSTRING_LEN(text), &status);
|
133
133
|
ensure(status);
|
134
134
|
|
@@ -140,12 +140,12 @@ static void
|
|
140
140
|
set_declared_encoding(VALUE self, VALUE declared_encoding)
|
141
141
|
{
|
142
142
|
if (!NIL_P(declared_encoding)){
|
143
|
-
declared_encoding = StringValue(declared_encoding);
|
144
|
-
|
145
143
|
UErrorCode status = U_ZERO_ERROR;
|
146
144
|
UCharsetDetector *detector;
|
145
|
+
|
147
146
|
Data_Get_Struct(self, UCharsetDetector, detector);
|
148
147
|
|
148
|
+
declared_encoding = StringValue(declared_encoding);
|
149
149
|
ucsdet_setDeclaredEncoding(detector, StringValuePtr(declared_encoding), RSTRING_LEN(declared_encoding), &status);
|
150
150
|
ensure(status);
|
151
151
|
|
@@ -183,7 +183,8 @@ UCharsetDetector_initialize(int argc, VALUE *argv, VALUE self)
|
|
183
183
|
* call-seq:
|
184
184
|
* detect(text=nil, declared_encoding=nil)
|
185
185
|
*
|
186
|
-
* Return the charset that best matches the supplied input data.
|
186
|
+
* Return the charset that best matches the supplied input data. If no match
|
187
|
+
* could be found, this method returns nil.
|
187
188
|
*
|
188
189
|
* Note though, that because the detection
|
189
190
|
* only looks at the start of the input data,
|
@@ -199,28 +200,32 @@ UCharsetDetector_detect(int argc, VALUE *argv, VALUE self)
|
|
199
200
|
{
|
200
201
|
VALUE text;
|
201
202
|
VALUE declared_encoding;
|
203
|
+
UErrorCode status = U_ZERO_ERROR;
|
204
|
+
UCharsetDetector *detector;
|
205
|
+
const UCharsetMatch *match = NULL;
|
206
|
+
const char *encoding_name = "";
|
207
|
+
int32_t encoding_confidence = 0;
|
208
|
+
const char *encoding_language = "";
|
209
|
+
VALUE hash = rb_hash_new();
|
202
210
|
|
203
211
|
rb_scan_args(argc, argv, "02", &text, &declared_encoding);
|
204
212
|
set_text(self, text);
|
205
213
|
set_declared_encoding(self, declared_encoding);
|
206
214
|
|
207
|
-
UErrorCode status = U_ZERO_ERROR;
|
208
|
-
UCharsetDetector *detector;
|
209
215
|
Data_Get_Struct(self, UCharsetDetector, detector);
|
210
|
-
|
211
|
-
const UCharsetMatch *match = ucsdet_detect(detector, &status);
|
212
|
-
ensure(status);
|
213
|
-
|
214
|
-
const char *encoding_name = ucsdet_getName(match, &status);
|
215
|
-
ensure(status);
|
216
216
|
|
217
|
-
|
217
|
+
match = ucsdet_detect(detector, &status);
|
218
218
|
ensure(status);
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
219
|
+
|
220
|
+
if (match) {
|
221
|
+
encoding_name = ucsdet_getName(match, &status);
|
222
|
+
ensure(status);
|
223
|
+
encoding_confidence = ucsdet_getConfidence(match, &status);
|
224
|
+
ensure(status);
|
225
|
+
encoding_language = ucsdet_getLanguage(match, &status);
|
226
|
+
ensure(status);
|
227
|
+
}
|
228
|
+
|
224
229
|
rb_hash_aset(hash, ID2SYM(rb_intern("encoding")), rb_str_new2(encoding_name));
|
225
230
|
rb_hash_aset(hash, ID2SYM(rb_intern("confidence")), INT2NUM(encoding_confidence));
|
226
231
|
rb_hash_aset(hash, ID2SYM(rb_intern("language")), rb_str_new2(encoding_language));
|
@@ -249,37 +254,41 @@ UCharsetDetector_detect_all(int argc, VALUE *argv, VALUE self)
|
|
249
254
|
{
|
250
255
|
VALUE text;
|
251
256
|
VALUE declared_encoding;
|
257
|
+
UCharsetDetector *detector;
|
258
|
+
UErrorCode status = U_ZERO_ERROR;
|
259
|
+
const UCharsetMatch **matches = NULL;
|
260
|
+
int32_t matches_found = 0;
|
261
|
+
VALUE ary = rb_ary_new();
|
262
|
+
int i;
|
252
263
|
|
253
264
|
rb_scan_args(argc, argv, "02", &text, &declared_encoding);
|
254
265
|
set_text(self, text);
|
255
266
|
set_declared_encoding(self, declared_encoding);
|
256
267
|
|
257
|
-
UCharsetDetector *detector;
|
258
268
|
Data_Get_Struct(self, UCharsetDetector, detector);
|
259
|
-
UErrorCode status = U_ZERO_ERROR;
|
260
|
-
int32_t matches_found;
|
261
269
|
|
262
|
-
|
270
|
+
matches = ucsdet_detectAll(detector, &matches_found, &status);
|
263
271
|
ensure(status);
|
264
272
|
|
265
|
-
VALUE ary = rb_ary_new();
|
266
|
-
int i = 0;
|
267
|
-
|
268
273
|
for (i = 0; i < matches_found; i++) {
|
269
|
-
const char *encoding_name =
|
270
|
-
|
274
|
+
const char *encoding_name = "";
|
275
|
+
int32_t encoding_confidence = 0;
|
276
|
+
const char *encoding_language = "";
|
277
|
+
VALUE hash = rb_hash_new();
|
271
278
|
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
279
|
+
if (matches[i]) {
|
280
|
+
encoding_name = ucsdet_getName(matches[i], &status);
|
281
|
+
ensure(status);
|
282
|
+
encoding_confidence = ucsdet_getConfidence(matches[i], &status);
|
283
|
+
ensure(status);
|
284
|
+
encoding_language = ucsdet_getLanguage(matches[i], &status);
|
285
|
+
ensure(status);
|
286
|
+
}
|
277
287
|
|
278
|
-
VALUE hash = rb_hash_new();
|
279
288
|
rb_hash_aset(hash, ID2SYM(rb_intern("encoding")), rb_str_new2(encoding_name));
|
280
289
|
rb_hash_aset(hash, ID2SYM(rb_intern("confidence")), INT2NUM(encoding_confidence));
|
281
290
|
rb_hash_aset(hash, ID2SYM(rb_intern("language")), rb_str_new2(encoding_language));
|
282
|
-
|
291
|
+
|
283
292
|
rb_ary_push(ary, hash);
|
284
293
|
}
|
285
294
|
|
@@ -296,16 +305,17 @@ static VALUE
|
|
296
305
|
UCharsetDetector_get_detectable_charsets(VALUE self)
|
297
306
|
{
|
298
307
|
UCharsetDetector *detector;
|
299
|
-
Data_Get_Struct(self, UCharsetDetector, detector);
|
300
308
|
UErrorCode status = U_ZERO_ERROR;
|
309
|
+
UEnumeration *charsets = NULL;
|
310
|
+
const char *charset_name = "";
|
311
|
+
int32_t result_length = 0;
|
312
|
+
VALUE ary = rb_ary_new();
|
313
|
+
|
314
|
+
Data_Get_Struct(self, UCharsetDetector, detector);
|
301
315
|
|
302
|
-
|
316
|
+
charsets = ucsdet_getAllDetectableCharsets(detector, &status);
|
303
317
|
ensure(status);
|
304
318
|
|
305
|
-
VALUE ary = rb_ary_new();
|
306
|
-
int32_t result_length;
|
307
|
-
const char *charset_name;
|
308
|
-
|
309
319
|
while (charset_name = uenum_next(charsets, &result_length, &status)) {
|
310
320
|
ensure(status);
|
311
321
|
rb_ary_push(ary, rb_str_new2(charset_name));
|
@@ -318,7 +328,7 @@ UCharsetDetector_get_detectable_charsets(VALUE self)
|
|
318
328
|
/*
|
319
329
|
*/
|
320
330
|
void
|
321
|
-
|
331
|
+
Init_uchardet_ext()
|
322
332
|
{
|
323
333
|
VALUE mICU = rb_define_module("ICU");
|
324
334
|
|
data/lib/uchardet.rb
CHANGED
@@ -1,35 +1,19 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
module Uchardet
|
5
|
-
VERSION = '0.1.3'
|
6
|
-
end
|
7
|
-
|
8
|
-
begin
|
9
|
-
require 'uchardet.so'
|
10
|
-
rescue LoadError
|
11
|
-
# uh-oh
|
12
|
-
end
|
1
|
+
require 'uchardet_ext'
|
2
|
+
require 'uchardet/version'
|
13
3
|
|
14
4
|
module ICU # :main: README
|
15
5
|
class UCharsetDetector # :main: README
|
16
|
-
##
|
17
6
|
# Shortcut for ICU::UCharsetDetector#detect
|
18
|
-
#
|
19
7
|
def self.detect(*args)
|
20
8
|
self.new.detect(*args)
|
21
9
|
end
|
22
10
|
|
23
|
-
##
|
24
11
|
# Shortcut for ICU::UCharsetDetector#detect_all
|
25
|
-
#
|
26
12
|
def self.detect_all(*args)
|
27
13
|
self.new.detect_all(*args)
|
28
14
|
end
|
29
15
|
|
30
|
-
##
|
31
16
|
# Shortcut for ICU::UCharsetDetector#detectable_charsets
|
32
|
-
#
|
33
17
|
def self.detectable_charsets
|
34
18
|
self.new.detectable_charsets
|
35
19
|
end
|
data/lib/uchardet/cli.rb
CHANGED
@@ -1,18 +1,19 @@
|
|
1
1
|
require 'optparse'
|
2
|
+
require 'uchardet'
|
2
3
|
|
3
4
|
module Uchardet
|
4
5
|
class CLI
|
5
6
|
def self.execute(stdout, args=[])
|
6
7
|
@stdout = stdout
|
7
8
|
@options = {
|
8
|
-
:
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
9
|
+
input_filtered: false,
|
10
|
+
declared_encoding: nil,
|
11
|
+
detect_all: false,
|
12
|
+
path: nil
|
12
13
|
}
|
13
14
|
|
14
|
-
|
15
|
-
opts.banner = <<-BANNER.gsub(/^\s*/,'')
|
15
|
+
OptionParser.new do |opts|
|
16
|
+
opts.banner = <<-BANNER.gsub(/^\s*/, '')
|
16
17
|
Usage: #{File.basename($0)} [options] file
|
17
18
|
BANNER
|
18
19
|
|
@@ -31,6 +32,9 @@ module Uchardet
|
|
31
32
|
opts.on("-h", "--help",
|
32
33
|
"Show this help message."
|
33
34
|
) { @stdout.puts opts; exit }
|
35
|
+
opts.on("-v", "--version",
|
36
|
+
"Show version."
|
37
|
+
) { @stdout.puts Uchardet::VERSION; exit }
|
34
38
|
|
35
39
|
if args.empty?
|
36
40
|
@stdout.puts opts
|
@@ -54,7 +58,7 @@ module Uchardet
|
|
54
58
|
end
|
55
59
|
|
56
60
|
def self.list
|
57
|
-
ICU::UCharsetDetector.detectable_charsets.uniq.sort.each {
|
61
|
+
ICU::UCharsetDetector.detectable_charsets.uniq.sort.each {|name| @stdout.puts name}
|
58
62
|
end
|
59
63
|
|
60
64
|
def self.detect
|
data/uchardet.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
lib = File.expand_path('../lib', __FILE__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
require 'uchardet/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = 'uchardet'
|
7
|
+
spec.version = Uchardet::VERSION
|
8
|
+
spec.authors = ['Dmitri Goutnik']
|
9
|
+
spec.email = ['dg@syrec.org']
|
10
|
+
|
11
|
+
spec.summary = 'Fast character set encoding detection using International Components for Unicode library.'
|
12
|
+
spec.homepage = 'https://github.com/dmgk/uchardet'
|
13
|
+
spec.license = 'MIT'
|
14
|
+
|
15
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
16
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
17
|
+
end
|
18
|
+
spec.bindir = 'exe'
|
19
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
+
spec.require_paths = ['lib']
|
21
|
+
spec.extensions = ['ext/extconf.rb']
|
22
|
+
|
23
|
+
spec.add_development_dependency 'bundler', '~> 1.16'
|
24
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
25
|
+
spec.add_development_dependency 'rake-compiler', '~> 1.0'
|
26
|
+
spec.add_development_dependency 'minitest', '~> 5.0'
|
27
|
+
end
|
metadata
CHANGED
@@ -1,89 +1,119 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: uchardet
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
|
-
authors:
|
6
|
+
authors:
|
7
7
|
- Dmitri Goutnik
|
8
8
|
autorequire:
|
9
|
-
bindir:
|
9
|
+
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
11
|
+
date: 2018-05-29 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.16'
|
17
20
|
type: :development
|
18
|
-
|
19
|
-
version_requirements: !ruby/object:Gem::Requirement
|
20
|
-
requirements:
|
21
|
-
- - "
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version:
|
24
|
-
|
25
|
-
|
26
|
-
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.16'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake-compiler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '5.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '5.0'
|
69
|
+
description:
|
70
|
+
email:
|
27
71
|
- dg@syrec.org
|
28
|
-
executables:
|
72
|
+
executables:
|
29
73
|
- uchardet
|
30
|
-
extensions:
|
31
|
-
- ext/
|
32
|
-
extra_rdoc_files:
|
33
|
-
|
34
|
-
-
|
35
|
-
-
|
36
|
-
|
37
|
-
-
|
38
|
-
-
|
39
|
-
- README.
|
74
|
+
extensions:
|
75
|
+
- ext/extconf.rb
|
76
|
+
extra_rdoc_files: []
|
77
|
+
files:
|
78
|
+
- ".gitignore"
|
79
|
+
- ".travis.yml"
|
80
|
+
- Gemfile
|
81
|
+
- Gemfile.lock
|
82
|
+
- LICENSE.txt
|
83
|
+
- README.md
|
40
84
|
- Rakefile
|
41
|
-
- bin/
|
42
|
-
-
|
43
|
-
-
|
85
|
+
- bin/console
|
86
|
+
- bin/setup
|
87
|
+
- exe/uchardet
|
88
|
+
- ext/extconf.rb
|
89
|
+
- ext/uchardet.c
|
44
90
|
- lib/uchardet.rb
|
45
91
|
- lib/uchardet/cli.rb
|
46
|
-
-
|
47
|
-
-
|
48
|
-
|
49
|
-
|
50
|
-
-
|
51
|
-
|
52
|
-
- test/test_uchardet.rb
|
53
|
-
- test/test_uchardet_cli.rb
|
54
|
-
- test/test_uchardet_extn.rb
|
55
|
-
has_rdoc: true
|
56
|
-
homepage: http://rubyforge.org/projects/uchardet/
|
57
|
-
licenses: []
|
58
|
-
|
92
|
+
- lib/uchardet/version.rb
|
93
|
+
- uchardet.gemspec
|
94
|
+
homepage: https://github.com/dmgk/uchardet
|
95
|
+
licenses:
|
96
|
+
- MIT
|
97
|
+
metadata: {}
|
59
98
|
post_install_message:
|
60
|
-
rdoc_options:
|
61
|
-
|
62
|
-
- README.rdoc
|
63
|
-
require_paths:
|
99
|
+
rdoc_options: []
|
100
|
+
require_paths:
|
64
101
|
- lib
|
65
|
-
|
66
|
-
|
67
|
-
requirements:
|
102
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
103
|
+
requirements:
|
68
104
|
- - ">="
|
69
|
-
- !ruby/object:Gem::Version
|
70
|
-
version:
|
71
|
-
|
72
|
-
|
73
|
-
requirements:
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
version: '0'
|
107
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
108
|
+
requirements:
|
74
109
|
- - ">="
|
75
|
-
- !ruby/object:Gem::Version
|
76
|
-
version:
|
77
|
-
version:
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '0'
|
78
112
|
requirements: []
|
79
|
-
|
80
|
-
|
81
|
-
rubygems_version: 1.3.5
|
113
|
+
rubyforge_project:
|
114
|
+
rubygems_version: 2.7.7
|
82
115
|
signing_key:
|
83
|
-
specification_version:
|
84
|
-
summary: Fast character set encoding detection using International Components for
|
85
|
-
|
86
|
-
|
87
|
-
- test/test_uchardet.rb
|
88
|
-
- test/test_uchardet_cli.rb
|
89
|
-
- test/test_uchardet_extn.rb
|
116
|
+
specification_version: 4
|
117
|
+
summary: Fast character set encoding detection using International Components for
|
118
|
+
Unicode library.
|
119
|
+
test_files: []
|
data/History.txt
DELETED
data/Manifest.txt
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
History.txt
|
2
|
-
Manifest.txt
|
3
|
-
README.rdoc
|
4
|
-
Rakefile
|
5
|
-
bin/uchardet
|
6
|
-
ext/uchardet/extconf.rb
|
7
|
-
ext/uchardet/uchardet.c
|
8
|
-
lib/uchardet.rb
|
9
|
-
lib/uchardet/cli.rb
|
10
|
-
script/console
|
11
|
-
script/destroy
|
12
|
-
script/generate
|
13
|
-
tasks/extconf.rake
|
14
|
-
tasks/extconf/uchardet.rake
|
15
|
-
test/test_helper.rb
|
16
|
-
test/test_uchardet.rb
|
17
|
-
test/test_uchardet_cli.rb
|
18
|
-
test/test_uchardet_extn.rb
|
data/README.rdoc
DELETED
@@ -1,52 +0,0 @@
|
|
1
|
-
= uchardet
|
2
|
-
|
3
|
-
* http://rubyforge.org/projects/uchardet/
|
4
|
-
* http://github.com/invisiblellama/uchardet
|
5
|
-
* http://uchardet.rubyforge.org/rdoc/
|
6
|
-
|
7
|
-
== DESCRIPTION:
|
8
|
-
|
9
|
-
Fast character set encoding detection using International Components for Unicode C++ library.
|
10
|
-
|
11
|
-
== SYNOPSIS:
|
12
|
-
|
13
|
-
require 'open-uri'
|
14
|
-
require 'uchardet'
|
15
|
-
|
16
|
-
encoding = ICU::UCharsetDetector.detect open('http://google.jp').read
|
17
|
-
encoding # => {:language=>"ja", :encoding=>"Shift_JIS", :confidence=>100}
|
18
|
-
|
19
|
-
From command line:
|
20
|
-
|
21
|
-
$ uchardet
|
22
|
-
|
23
|
-
Usage: uchardet [options] file
|
24
|
-
-l, --list Display list of detectable character sets.
|
25
|
-
-s, --strip Strip HTML or XML markup before detection.
|
26
|
-
-e, --encoding Hint the charset detector about possible encoding.
|
27
|
-
-a, --all Show all matching encodings.
|
28
|
-
-h, --help Show this help message.
|
29
|
-
|
30
|
-
$ uchardet `which uchardet`
|
31
|
-
|
32
|
-
ISO-8859-1 (confidence 60%)
|
33
|
-
|
34
|
-
== REQUIREMENTS:
|
35
|
-
|
36
|
-
ICU[http://site.icu-project.org/] (International Components for Unicode):
|
37
|
-
|
38
|
-
on Mac OS X:
|
39
|
-
|
40
|
-
sudo port install icu
|
41
|
-
|
42
|
-
on Debian/Ubuntu
|
43
|
-
|
44
|
-
sudo apt-get install libicu-dev
|
45
|
-
|
46
|
-
== INSTALL:
|
47
|
-
|
48
|
-
sudo gem install uchardet
|
49
|
-
|
50
|
-
== LICENSE:
|
51
|
-
|
52
|
-
Copyright (c) 2009 Dmitri Goutnik, released under the MIT license.
|
data/ext/uchardet/extconf.rb
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
require 'mkmf'
|
2
|
-
|
3
|
-
icu_config = `which icu-config`.strip
|
4
|
-
if icu_config.empty?
|
5
|
-
abort "ICU seems to be missing. Try 'port install icu' or 'apt-get install libicu-dev'"
|
6
|
-
end
|
7
|
-
|
8
|
-
$LIBS << ' ' + `#{icu_config} --ldflags-system`.strip
|
9
|
-
$LIBS << ' ' + `#{icu_config} --ldflags-libsonly`.strip
|
10
|
-
$LDFLAGS << ' ' + `#{icu_config} --ldflags-searchpath`.strip
|
11
|
-
|
12
|
-
create_makefile("uchardet")
|
data/script/console
DELETED
@@ -1,10 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
# File: script/console
|
3
|
-
irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
|
4
|
-
|
5
|
-
libs = " -r irb/completion"
|
6
|
-
# Perhaps use a console_lib to store any extra methods I may want available in the cosole
|
7
|
-
# libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
|
8
|
-
libs << " -r #{File.dirname(__FILE__) + '/../lib/chardet-icu.rb'}"
|
9
|
-
puts "Loading chardet-icu gem"
|
10
|
-
exec "#{irb} #{libs} --simple-prompt"
|
data/script/destroy
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
-
|
4
|
-
begin
|
5
|
-
require 'rubigen'
|
6
|
-
rescue LoadError
|
7
|
-
require 'rubygems'
|
8
|
-
require 'rubigen'
|
9
|
-
end
|
10
|
-
require 'rubigen/scripts/destroy'
|
11
|
-
|
12
|
-
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
-
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
-
RubiGen::Scripts::Destroy.new.run(ARGV)
|
data/script/generate
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
-
|
4
|
-
begin
|
5
|
-
require 'rubigen'
|
6
|
-
rescue LoadError
|
7
|
-
require 'rubygems'
|
8
|
-
require 'rubigen'
|
9
|
-
end
|
10
|
-
require 'rubigen/scripts/generate'
|
11
|
-
|
12
|
-
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
-
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
-
RubiGen::Scripts::Generate.new.run(ARGV)
|
data/tasks/extconf.rake
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
namespace :extconf do
|
2
|
-
desc "Compiles the Ruby extension"
|
3
|
-
task :compile
|
4
|
-
end
|
5
|
-
|
6
|
-
task :compile => "extconf:compile"
|
7
|
-
|
8
|
-
task :test => :compile
|
9
|
-
|
10
|
-
BIN = "*.{o,bundle,jar,so,obj,pdb,lib,def,exp}"
|
11
|
-
$hoe.clean_globs |= ["ext/**/#{BIN}", "lib/**/#{BIN}", 'ext/**/Makefile']
|
12
|
-
$hoe.spec.require_paths = Dir['{lib,ext/*}']
|
13
|
-
$hoe.spec.extensions = FileList["ext/**/extconf.rb"].to_a
|
data/tasks/extconf/uchardet.rake
DELETED
@@ -1,43 +0,0 @@
|
|
1
|
-
namespace :extconf do
|
2
|
-
extension = File.basename(__FILE__, '.rake')
|
3
|
-
|
4
|
-
ext = "ext/#{extension}"
|
5
|
-
ext_so = "#{ext}/#{extension}.#{Config::CONFIG['DLEXT']}"
|
6
|
-
ext_files = FileList[
|
7
|
-
"#{ext}/*.c",
|
8
|
-
"#{ext}/*.h",
|
9
|
-
"#{ext}/*.rl",
|
10
|
-
"#{ext}/extconf.rb",
|
11
|
-
"#{ext}/Makefile",
|
12
|
-
# "lib"
|
13
|
-
]
|
14
|
-
|
15
|
-
|
16
|
-
task :compile => extension do
|
17
|
-
if Dir.glob("**/#{extension}.{o,so,dll}").length == 0
|
18
|
-
STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
|
19
|
-
STDERR.puts "Gem actually failed to build. Your system is"
|
20
|
-
STDERR.puts "NOT configured properly to build #{GEM_NAME}."
|
21
|
-
STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
|
22
|
-
exit(1)
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
desc "Builds just the #{extension} extension"
|
27
|
-
task extension.to_sym => ["#{ext}/Makefile", ext_so ]
|
28
|
-
|
29
|
-
file "#{ext}/Makefile" => ["#{ext}/extconf.rb"] do
|
30
|
-
Dir.chdir(ext) do ruby "extconf.rb" end
|
31
|
-
end
|
32
|
-
|
33
|
-
file ext_so => ext_files do
|
34
|
-
Dir.chdir(ext) do
|
35
|
-
sh(RUBY_PLATFORM =~ /win32/ ? 'nmake' : 'make') do |ok, res|
|
36
|
-
if !ok
|
37
|
-
require "fileutils"
|
38
|
-
FileUtils.rm Dir.glob('*.{so,o,dll,bundle}')
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
data/test/test_helper.rb
DELETED
data/test/test_uchardet.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require File.dirname(__FILE__) + '/test_helper.rb'
|
4
|
-
|
5
|
-
class TestUchardet < Test::Unit::TestCase # :nodoc:
|
6
|
-
|
7
|
-
def test_detect
|
8
|
-
detector = ICU::UCharsetDetector.new
|
9
|
-
assert_equal(detector.detect(''), ICU::UCharsetDetector.detect(''))
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_detect_all
|
13
|
-
detector = ICU::UCharsetDetector.new
|
14
|
-
assert_equal(detector.detect_all('∑'), ICU::UCharsetDetector.detect_all('∑'))
|
15
|
-
end
|
16
|
-
|
17
|
-
def test_detectable_charsets
|
18
|
-
detector = ICU::UCharsetDetector.new
|
19
|
-
assert_equal(detector.detectable_charsets, ICU::UCharsetDetector.detectable_charsets)
|
20
|
-
end
|
21
|
-
|
22
|
-
end
|
data/test/test_uchardet_cli.rb
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), "test_helper.rb")
|
2
|
-
require 'uchardet/cli'
|
3
|
-
|
4
|
-
class TestUchardetCli < Test::Unit::TestCase
|
5
|
-
def setup
|
6
|
-
Uchardet::CLI.execute(@stdout_io = StringIO.new, [])
|
7
|
-
@stdout_io.rewind
|
8
|
-
@stdout = @stdout_io.read
|
9
|
-
end
|
10
|
-
|
11
|
-
def test_print_default_output
|
12
|
-
assert_match(/Usage: .* \[options\] file/, @stdout)
|
13
|
-
end
|
14
|
-
end
|
data/test/test_uchardet_extn.rb
DELETED
@@ -1,101 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require "test/unit"
|
4
|
-
|
5
|
-
$:.unshift File.dirname(__FILE__) + "/../ext/uchardet"
|
6
|
-
require "uchardet.so"
|
7
|
-
|
8
|
-
class TestUchardetExtn < Test::Unit::TestCase # :nodoc:
|
9
|
-
|
10
|
-
def test_init
|
11
|
-
assert_not_nil(ICU::UCharsetDetector)
|
12
|
-
|
13
|
-
assert_nothing_raised do
|
14
|
-
detector = ICU::UCharsetDetector.new
|
15
|
-
assert_not_nil(detector)
|
16
|
-
|
17
|
-
detector = ICU::UCharsetDetector.new nil
|
18
|
-
assert_not_nil(detector)
|
19
|
-
|
20
|
-
detector = ICU::UCharsetDetector.new 'some text'
|
21
|
-
assert_not_nil(detector)
|
22
|
-
end
|
23
|
-
|
24
|
-
assert_raise(TypeError) do
|
25
|
-
detector = ICU::UCharsetDetector.new 0
|
26
|
-
end
|
27
|
-
|
28
|
-
assert_raise(TypeError) do
|
29
|
-
detector = ICU::UCharsetDetector.new Time.now
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
def test_detect
|
34
|
-
detector = ICU::UCharsetDetector.new
|
35
|
-
assert_raise(ICU::Error) do
|
36
|
-
detector.detect
|
37
|
-
end
|
38
|
-
e = detector.detect '∂∆∂∆∂∆'
|
39
|
-
assert(e.is_a? Hash)
|
40
|
-
assert(e.has_key? :encoding)
|
41
|
-
assert(e.has_key? :confidence)
|
42
|
-
assert(e.has_key? :language)
|
43
|
-
assert_equal('utf-8', e[:encoding].downcase)
|
44
|
-
e = detector.detect '··', 'utf-8'
|
45
|
-
assert_equal('utf-8', e[:encoding].downcase)
|
46
|
-
e = detector.detect '··', 'Shift_JIS'
|
47
|
-
assert_equal('utf-8', e[:encoding].downcase)
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_detect_all
|
51
|
-
detector = ICU::UCharsetDetector.new
|
52
|
-
assert_raise(ICU::Error) do
|
53
|
-
detector.detect_all
|
54
|
-
end
|
55
|
-
a = detector.detect_all '€‹€‹€'
|
56
|
-
assert(a.is_a? Array)
|
57
|
-
assert_equal(false, a.empty?)
|
58
|
-
assert(a[0].is_a? Hash)
|
59
|
-
assert(a[0].has_key? :encoding)
|
60
|
-
assert(a[0].has_key? :confidence)
|
61
|
-
assert(a[0].has_key? :language)
|
62
|
-
end
|
63
|
-
|
64
|
-
def test_input_filtered_accessor
|
65
|
-
detector = ICU::UCharsetDetector.new
|
66
|
-
assert_equal(false, detector.input_filtered?)
|
67
|
-
detector.input_filtered = true
|
68
|
-
assert_equal(true, detector.input_filtered?)
|
69
|
-
detector.input_filtered = ''
|
70
|
-
assert_equal(true, detector.input_filtered?)
|
71
|
-
detector.input_filtered = nil
|
72
|
-
assert_equal(false, detector.input_filtered?)
|
73
|
-
end
|
74
|
-
|
75
|
-
def test_text_accessor
|
76
|
-
detector = ICU::UCharsetDetector.new
|
77
|
-
assert_equal(nil, detector.text)
|
78
|
-
detector = ICU::UCharsetDetector.new 'blah'
|
79
|
-
assert_equal('blah', detector.text)
|
80
|
-
detector.text = 'test'
|
81
|
-
assert_equal('test', detector.text)
|
82
|
-
detector.detect
|
83
|
-
assert_equal('test', detector.text)
|
84
|
-
end
|
85
|
-
|
86
|
-
def test_declared_encoding_accessor
|
87
|
-
detector = ICU::UCharsetDetector.new
|
88
|
-
assert_equal(nil, detector.declared_encoding)
|
89
|
-
detector.declared_encoding = 'iso-8859-15'
|
90
|
-
assert_equal('iso-8859-15', detector.declared_encoding)
|
91
|
-
detector.detect 'test'
|
92
|
-
assert_equal('iso-8859-15', detector.declared_encoding)
|
93
|
-
end
|
94
|
-
|
95
|
-
def test_detectable_charsets
|
96
|
-
detector = ICU::UCharsetDetector.new
|
97
|
-
assert_not_nil(detector.detectable_charsets)
|
98
|
-
assert(detector.detectable_charsets.is_a? Array)
|
99
|
-
end
|
100
|
-
|
101
|
-
end
|