charlock_holmes 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/README.md +24 -10
- data/benchmark/detection.rb +2 -2
- data/ext/charlock_holmes/converter.c +0 -1
- data/ext/charlock_holmes/encoding_detector.c +102 -22
- data/ext/charlock_holmes/extconf.rb +6 -3
- data/lib/charlock_holmes/encoding_detector.rb +2 -2
- data/lib/charlock_holmes/string.rb +4 -4
- data/lib/charlock_holmes/version.rb +1 -1
- data/spec/encoding_detector_spec.rb +9 -7
- data/spec/fixtures/hello_world +0 -0
- metadata +6 -4
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -10,12 +10,12 @@ First you'll need to require it
|
|
10
10
|
require 'charlock_holmes'
|
11
11
|
```
|
12
12
|
|
13
|
-
|
13
|
+
## Encoding detection
|
14
14
|
|
15
15
|
``` ruby
|
16
16
|
contents = File.read('test.xml')
|
17
17
|
detection = CharlockHolmes::EncodingDetector.detect(contents)
|
18
|
-
# => {:encoding => 'UTF-8', :confidence => 100}
|
18
|
+
# => {:encoding => 'UTF-8', :confidence => 100, :type => :text}
|
19
19
|
|
20
20
|
# optionally there will be a :language key as well, but
|
21
21
|
# that's mostly only returned for legacy encodings like ISO-8859-1
|
@@ -23,6 +23,8 @@ detection = CharlockHolmes::EncodingDetector.detect(contents)
|
|
23
23
|
|
24
24
|
NOTE: `CharlockHolmes::EncodingDetector.detect` will return `nil` if it was unable to find an encoding.
|
25
25
|
|
26
|
+
For binary content, `:type` will be set to `:binary`
|
27
|
+
|
26
28
|
Though it's more efficient to reuse once detector instance:
|
27
29
|
|
28
30
|
``` ruby
|
@@ -34,7 +36,7 @@ detection2 = detector.detect(File.read('test2.json'))
|
|
34
36
|
# and so on...
|
35
37
|
```
|
36
38
|
|
37
|
-
|
39
|
+
### String monkey patch
|
38
40
|
|
39
41
|
Alternatively, you can just use the `detect_encoding` method on the `String` class
|
40
42
|
|
@@ -46,7 +48,7 @@ contents = File.read('test.xml')
|
|
46
48
|
detection = contents.detect_encoding
|
47
49
|
```
|
48
50
|
|
49
|
-
|
51
|
+
### Ruby 1.9 specific
|
50
52
|
|
51
53
|
NOTE: This method only exists on Ruby 1.9+
|
52
54
|
|
@@ -57,18 +59,30 @@ require 'charlock_holmes/string'
|
|
57
59
|
|
58
60
|
contents = File.read('test.xml')
|
59
61
|
|
60
|
-
# this will detect and set the encoding of `contents
|
62
|
+
# this will detect and set the encoding of `contents`, then return self
|
61
63
|
contents.detect_encoding!
|
62
64
|
```
|
63
65
|
|
66
|
+
## Transcoding
|
67
|
+
|
68
|
+
Being able to detect the encoding of some arbitrary content is nice, but what you probably want is to be able to transcode that content into an encoding your application is using.
|
69
|
+
|
70
|
+
``` ruby
|
71
|
+
content = File.read('test2.txt')
|
72
|
+
detection = CharlockHolmes::EncodingDetector.detect(content)
|
73
|
+
utf8_encoded_content CharlockHolmes::Converter.convert content, detection[:encoding], 'UTF-8'
|
74
|
+
```
|
75
|
+
|
76
|
+
The first parameter is the content to transcode, the second is the source encoding (the encoding the content is assumed to be in), and the third parameter is the destination encoding.
|
77
|
+
|
64
78
|
## Installing
|
65
79
|
|
66
|
-
If the traditional `gem install charlock_holmes` doesn't work, you may need to specify the path to your installation of ICU using the `--with-icu-dir` option during the gem install.
|
80
|
+
If the traditional `gem install charlock_holmes` doesn't work, you may need to specify the path to your installation of ICU and libmagic using the `--with-icu-dir` and/or `--with-magic-dir` option during the gem install.
|
67
81
|
|
68
|
-
At the time of writing,
|
82
|
+
At the time of writing, Homebrew for OSX installs ICU (icu4c is the package name) and libmagic as keg-only installs so you'll have to specify the location during the gem install:
|
69
83
|
|
70
|
-
`gem install charlock_holmes --with-icu-dir=/usr/local/Cellar/icu4c/4.4.1`
|
84
|
+
`gem install charlock_holmes --with-icu-dir=/usr/local/Cellar/icu4c/4.4.1 --with-magic-dir=/usr/local/Cellar/libmagic/5.04`
|
71
85
|
|
72
|
-
If you're using Bundler and need to specify a custom path, you can do so with the `bundle config` command:
|
86
|
+
If you're using Bundler and need to specify a custom path(s), you can do so with the `bundle config` command:
|
73
87
|
|
74
|
-
`bundle config build.charlock_holmes --with-icu-dir=/usr/local/Cellar/icu4c/4.4.1`
|
88
|
+
`bundle config build.charlock_holmes --with-icu-dir=/usr/local/Cellar/icu4c/4.4.1 --with-magic-dir=/usr/local/Cellar/libmagic/5.04`
|
data/benchmark/detection.rb
CHANGED
@@ -16,14 +16,14 @@ DETECTOR = CharlockHolmes::EncodingDetector.new
|
|
16
16
|
|
17
17
|
Benchmark.bmbm do |x|
|
18
18
|
# new detector every iteration
|
19
|
-
x.report '
|
19
|
+
x.report 'singleton call' do
|
20
20
|
TIMES.times do
|
21
21
|
CharlockHolmes::EncodingDetector.detect CONTENT
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
25
25
|
# shared detector for all iterations
|
26
|
-
x.report '
|
26
|
+
x.report 'reusing a single detector' do
|
27
27
|
TIMES.times do
|
28
28
|
DETECTOR.detect CONTENT
|
29
29
|
end
|
@@ -49,6 +49,5 @@ static VALUE rb_converter_convert(VALUE self, VALUE rb_txt, VALUE rb_src_enc, VA
|
|
49
49
|
void _init_charlock_converter() {
|
50
50
|
rb_cConverter = rb_define_class_under(rb_mCharlockHolmes, "Converter", rb_cObject);
|
51
51
|
|
52
|
-
// rb_define_alloc_func(rb_cConverter, rb_converter__alloc);
|
53
52
|
rb_define_singleton_method(rb_cConverter, "convert", rb_converter_convert, 3);
|
54
53
|
}
|
@@ -1,9 +1,15 @@
|
|
1
1
|
#include "unicode/ucsdet.h"
|
2
|
+
#include "magic.h"
|
2
3
|
#include "common.h"
|
3
4
|
|
4
5
|
extern VALUE rb_mCharlockHolmes;
|
5
6
|
static VALUE rb_cEncodingDetector;
|
6
7
|
|
8
|
+
typedef struct {
|
9
|
+
UCharsetDetector *csd;
|
10
|
+
magic_t magic;
|
11
|
+
} charlock_detector_t;
|
12
|
+
|
7
13
|
static VALUE rb_encdec_buildmatch(const UCharsetMatch *match)
|
8
14
|
{
|
9
15
|
UErrorCode status = U_ZERO_ERROR;
|
@@ -21,6 +27,7 @@ static VALUE rb_encdec_buildmatch(const UCharsetMatch *match)
|
|
21
27
|
|
22
28
|
rb_match = rb_hash_new();
|
23
29
|
|
30
|
+
rb_hash_aset(rb_match, ID2SYM(rb_intern("type")), ID2SYM(rb_intern("text")));
|
24
31
|
rb_hash_aset(rb_match, ID2SYM(rb_intern("encoding")), charlock_new_str2(mname));
|
25
32
|
rb_hash_aset(rb_match, ID2SYM(rb_intern("confidence")), INT2NUM(mconfidence));
|
26
33
|
|
@@ -30,6 +37,36 @@ static VALUE rb_encdec_buildmatch(const UCharsetMatch *match)
|
|
30
37
|
return rb_match;
|
31
38
|
}
|
32
39
|
|
40
|
+
static VALUE rb_encdec_binarymatch() {
|
41
|
+
VALUE rb_match;
|
42
|
+
|
43
|
+
rb_match = rb_hash_new();
|
44
|
+
|
45
|
+
rb_hash_aset(rb_match, ID2SYM(rb_intern("type")), ID2SYM(rb_intern("binary")));
|
46
|
+
rb_hash_aset(rb_match, ID2SYM(rb_intern("confidence")), INT2NUM(100));
|
47
|
+
|
48
|
+
return rb_match;
|
49
|
+
}
|
50
|
+
|
51
|
+
static int detect_binary_content(charlock_detector_t *detector, VALUE rb_str) {
|
52
|
+
const char *binary_result;
|
53
|
+
|
54
|
+
binary_result = magic_buffer(detector->magic, RSTRING_PTR(rb_str), RSTRING_LEN(rb_str));
|
55
|
+
|
56
|
+
if (binary_result) {
|
57
|
+
if (strstr(binary_result, "library") ||
|
58
|
+
strstr(binary_result, "bundle") ||
|
59
|
+
strstr(binary_result, "archive") ||
|
60
|
+
(!strstr(binary_result, "text") && strstr(binary_result, "executable")) ||
|
61
|
+
strstr(binary_result, "data"))
|
62
|
+
return 1;
|
63
|
+
} else {
|
64
|
+
rb_raise(rb_eStandardError, magic_error(detector->magic));
|
65
|
+
}
|
66
|
+
|
67
|
+
return 0;
|
68
|
+
}
|
69
|
+
|
33
70
|
/*
|
34
71
|
* call-seq: detection_hash = EncodingDetector.detect str[, hint_enc]
|
35
72
|
*
|
@@ -39,28 +76,35 @@ static VALUE rb_encdec_buildmatch(const UCharsetMatch *match)
|
|
39
76
|
* hint_enc - an optional String (like "UTF-8"), the encoding name which will
|
40
77
|
* be used as an additional hint to the charset detector
|
41
78
|
*
|
42
|
-
* Returns: a Hash with :encoding, :language and :confidence
|
79
|
+
* Returns: a Hash with :encoding, :language, :type and :confidence
|
43
80
|
*/
|
44
81
|
static VALUE rb_encdec_detect(int argc, VALUE *argv, VALUE self)
|
45
82
|
{
|
46
83
|
UErrorCode status = U_ZERO_ERROR;
|
47
|
-
|
84
|
+
charlock_detector_t *detector;
|
48
85
|
VALUE rb_str;
|
49
86
|
VALUE rb_enc_hint;
|
50
87
|
|
51
88
|
rb_scan_args(argc, argv, "11", &rb_str, &rb_enc_hint);
|
52
89
|
|
53
90
|
Check_Type(rb_str, T_STRING);
|
54
|
-
Data_Get_Struct(self,
|
91
|
+
Data_Get_Struct(self, charlock_detector_t, detector);
|
55
92
|
|
56
|
-
|
93
|
+
// first lets see if this is binary content
|
94
|
+
if (detect_binary_content(detector, rb_str)) {
|
95
|
+
return rb_encdec_binarymatch();
|
96
|
+
}
|
97
|
+
|
98
|
+
// if we got here - the data doesn't look like binary
|
99
|
+
// lets try to figure out what encoding the text is in
|
100
|
+
ucsdet_setText(detector->csd, RSTRING_PTR(rb_str), (int32_t)RSTRING_LEN(rb_str), &status);
|
57
101
|
|
58
102
|
if (!NIL_P(rb_enc_hint)) {
|
59
103
|
Check_Type(rb_enc_hint, T_STRING);
|
60
|
-
ucsdet_setDeclaredEncoding(csd, RSTRING_PTR(rb_enc_hint), RSTRING_LEN(rb_enc_hint), &status);
|
104
|
+
ucsdet_setDeclaredEncoding(detector->csd, RSTRING_PTR(rb_enc_hint), RSTRING_LEN(rb_enc_hint), &status);
|
61
105
|
}
|
62
106
|
|
63
|
-
return rb_encdec_buildmatch(ucsdet_detect(csd, &status));
|
107
|
+
return rb_encdec_buildmatch(ucsdet_detect(detector->csd, &status));
|
64
108
|
}
|
65
109
|
|
66
110
|
|
@@ -76,38 +120,48 @@ static VALUE rb_encdec_detect(int argc, VALUE *argv, VALUE self)
|
|
76
120
|
* be used as an additional hint to the charset detector
|
77
121
|
*
|
78
122
|
* Returns: an Array with zero or more Hashes,
|
79
|
-
* each one of them with with :encoding, :language and :confidence
|
123
|
+
* each one of them with with :encoding, :language, :type and :confidence
|
80
124
|
*/
|
81
125
|
static VALUE rb_encdec_detect_all(int argc, VALUE *argv, VALUE self)
|
82
126
|
{
|
83
127
|
UErrorCode status = U_ZERO_ERROR;
|
84
|
-
|
128
|
+
charlock_detector_t *detector;
|
85
129
|
const UCharsetMatch **csm;
|
86
130
|
VALUE rb_ret;
|
87
131
|
int i, match_count;
|
88
132
|
VALUE rb_str;
|
89
133
|
VALUE rb_enc_hint;
|
134
|
+
VALUE binary_match;
|
90
135
|
|
91
136
|
rb_scan_args(argc, argv, "11", &rb_str, &rb_enc_hint);
|
92
137
|
|
93
138
|
Check_Type(rb_str, T_STRING);
|
94
|
-
Data_Get_Struct(self,
|
139
|
+
Data_Get_Struct(self, charlock_detector_t, detector);
|
95
140
|
|
96
141
|
rb_ret = rb_ary_new();
|
97
142
|
|
98
|
-
|
143
|
+
// first lets see if this is binary content
|
144
|
+
binary_match = Qnil;
|
145
|
+
if (detect_binary_content(detector, rb_str)) {
|
146
|
+
binary_match = rb_encdec_binarymatch();
|
147
|
+
}
|
148
|
+
|
149
|
+
ucsdet_setText(detector->csd, RSTRING_PTR(rb_str), (int32_t)RSTRING_LEN(rb_str), &status);
|
99
150
|
|
100
151
|
if (!NIL_P(rb_enc_hint)) {
|
101
152
|
Check_Type(rb_enc_hint, T_STRING);
|
102
|
-
ucsdet_setDeclaredEncoding(csd, RSTRING_PTR(rb_enc_hint), RSTRING_LEN(rb_enc_hint), &status);
|
153
|
+
ucsdet_setDeclaredEncoding(detector->csd, RSTRING_PTR(rb_enc_hint), RSTRING_LEN(rb_enc_hint), &status);
|
103
154
|
}
|
104
155
|
|
105
|
-
csm = ucsdet_detectAll(csd, &match_count, &status);
|
156
|
+
csm = ucsdet_detectAll(detector->csd, &match_count, &status);
|
106
157
|
|
107
158
|
for (i = 0; i < match_count; ++i) {
|
108
159
|
rb_ary_push(rb_ret, rb_encdec_buildmatch(csm[i]));
|
109
160
|
}
|
110
161
|
|
162
|
+
if (!NIL_P(binary_match))
|
163
|
+
rb_ary_unshift(rb_ret, binary_match);
|
164
|
+
|
111
165
|
return rb_ret;
|
112
166
|
}
|
113
167
|
|
@@ -120,13 +174,13 @@ static VALUE rb_encdec_detect_all(int argc, VALUE *argv, VALUE self)
|
|
120
174
|
*/
|
121
175
|
static VALUE rb_get_strip_tags(VALUE self)
|
122
176
|
{
|
123
|
-
|
177
|
+
charlock_detector_t *detector;
|
124
178
|
UBool val;
|
125
179
|
VALUE rb_val;
|
126
180
|
|
127
|
-
Data_Get_Struct(self,
|
181
|
+
Data_Get_Struct(self, charlock_detector_t, detector);
|
128
182
|
|
129
|
-
val = ucsdet_isInputFilterEnabled(csd);
|
183
|
+
val = ucsdet_isInputFilterEnabled(detector->csd);
|
130
184
|
|
131
185
|
rb_val = val == 1 ? Qtrue : Qfalse;
|
132
186
|
|
@@ -143,14 +197,14 @@ static VALUE rb_get_strip_tags(VALUE self)
|
|
143
197
|
*/
|
144
198
|
static VALUE rb_set_strip_tags(VALUE self, VALUE rb_val)
|
145
199
|
{
|
146
|
-
|
200
|
+
charlock_detector_t *detector;
|
147
201
|
UBool val;
|
148
202
|
|
149
|
-
Data_Get_Struct(self,
|
203
|
+
Data_Get_Struct(self, charlock_detector_t, detector);
|
150
204
|
|
151
205
|
val = rb_val == Qtrue ? 1 : 0;
|
152
206
|
|
153
|
-
ucsdet_enableInputFilter(csd, val);
|
207
|
+
ucsdet_enableInputFilter(detector->csd, val);
|
154
208
|
|
155
209
|
return rb_val;
|
156
210
|
}
|
@@ -195,16 +249,42 @@ static VALUE rb_get_supported_encodings(VALUE klass)
|
|
195
249
|
return rb_encoding_list;
|
196
250
|
}
|
197
251
|
|
198
|
-
static void rb_encdec__free(void *
|
252
|
+
static void rb_encdec__free(void *obj)
|
199
253
|
{
|
200
|
-
|
254
|
+
charlock_detector_t *detector;
|
255
|
+
|
256
|
+
detector = (charlock_detector_t *)obj;
|
257
|
+
|
258
|
+
if (detector->csd)
|
259
|
+
ucsdet_close(detector->csd);
|
260
|
+
|
261
|
+
if (detector->magic)
|
262
|
+
magic_close(detector->magic);
|
201
263
|
}
|
202
264
|
|
203
265
|
static VALUE rb_encdec__alloc(VALUE klass)
|
204
266
|
{
|
267
|
+
charlock_detector_t *detector;
|
205
268
|
UErrorCode status = U_ZERO_ERROR;
|
206
|
-
|
207
|
-
|
269
|
+
VALUE obj;
|
270
|
+
|
271
|
+
obj = Data_Make_Struct(klass, charlock_detector_t, NULL, rb_encdec__free, (void *)detector);
|
272
|
+
|
273
|
+
detector->csd = ucsdet_open(&status);
|
274
|
+
if (U_FAILURE(status)) {
|
275
|
+
rb_raise(rb_eStandardError, u_errorName(status));
|
276
|
+
}
|
277
|
+
|
278
|
+
detector->magic = magic_open(0);
|
279
|
+
if (detector->magic == NULL) {
|
280
|
+
rb_raise(rb_eStandardError, magic_error(detector->magic));
|
281
|
+
}
|
282
|
+
|
283
|
+
// load the libmagic database
|
284
|
+
// NULL means use the default or whatever is specified by the MAGIC env var
|
285
|
+
magic_load(detector->magic, NULL);
|
286
|
+
|
287
|
+
return obj;
|
208
288
|
}
|
209
289
|
|
210
290
|
void _init_charlock_encoding_detector()
|
@@ -3,11 +3,14 @@ require 'mkmf'
|
|
3
3
|
$CFLAGS << ' -Wall -funroll-loops'
|
4
4
|
$CFLAGS << ' -Wextra -O0 -ggdb3' if ENV['DEBUG']
|
5
5
|
|
6
|
+
dir_config 'icu'
|
6
7
|
$CFLAGS << ' -I/usr/local/Cellar/icu4c/4.4.1/include'
|
7
8
|
$LDFLAGS << ' -L/usr/local/Cellar/icu4c/4.4.1/lib'
|
8
|
-
|
9
|
-
dir_config 'icu'
|
10
|
-
|
11
9
|
have_library 'icui18n'
|
12
10
|
|
11
|
+
dir_config 'magic'
|
12
|
+
$LDFLAGS << ' -L/usr/local/Cellar/libmagic/5.04/lib'
|
13
|
+
$CFLAGS << ' -I/usr/local/Cellar/libmagic/5.04/include'
|
14
|
+
have_library 'magic'
|
15
|
+
|
13
16
|
create_makefile 'charlock_holmes'
|
@@ -10,7 +10,7 @@ module CharlockHolmes
|
|
10
10
|
# hint_enc - an optional String (like "UTF-8"), the encoding name which will
|
11
11
|
# be used as an additional hint to the charset detector
|
12
12
|
#
|
13
|
-
# Returns: a Hash with :encoding, :language and :confidence
|
13
|
+
# Returns: a Hash with :encoding, :language, :type and :confidence
|
14
14
|
def self.detect(str, hint_enc=nil)
|
15
15
|
new.detect(str, hint_enc)
|
16
16
|
end
|
@@ -25,7 +25,7 @@ module CharlockHolmes
|
|
25
25
|
# be used as an additional hint to the charset detector
|
26
26
|
#
|
27
27
|
# Returns: an Array with zero or more Hashes,
|
28
|
-
# each one of them with with :encoding, :language and :confidence
|
28
|
+
# each one of them with with :encoding, :language, :type and :confidence
|
29
29
|
def self.detect_all(str, hint_enc=nil)
|
30
30
|
new.detect_all(str, hint_enc)
|
31
31
|
end
|
@@ -3,7 +3,7 @@ require 'charlock_holmes' unless defined? CharlockHolmes
|
|
3
3
|
class String
|
4
4
|
# Attempt to detect the encoding of this string
|
5
5
|
#
|
6
|
-
# Returns: a Hash with :encoding, :language and :confidence
|
6
|
+
# Returns: a Hash with :encoding, :language, :type and :confidence
|
7
7
|
def detect_encoding(hint_enc=nil)
|
8
8
|
encoding_detector.detect(self, hint_enc)
|
9
9
|
end
|
@@ -12,7 +12,7 @@ class String
|
|
12
12
|
# a list with all the possible encodings that match it.
|
13
13
|
#
|
14
14
|
# Returns: an Array with zero or more Hashes,
|
15
|
-
# each one of them with with :encoding, :language and :confidence
|
15
|
+
# each one of them with with :encoding, :language, :type and :confidence
|
16
16
|
def detect_encodings(hint_enc=nil)
|
17
17
|
encoding_detector.detect_all(self, hint_enc)
|
18
18
|
end
|
@@ -21,12 +21,12 @@ class String
|
|
21
21
|
# Attempt to detect the encoding of this string
|
22
22
|
# then set the encoding to what was detected ala `force_encoding`
|
23
23
|
#
|
24
|
-
# Returns:
|
24
|
+
# Returns: self
|
25
25
|
def detect_encoding!(hint_enc=nil)
|
26
26
|
if detected = self.detect_encoding(hint_enc)
|
27
27
|
self.force_encoding detected[:encoding]
|
28
|
-
detected
|
29
28
|
end
|
29
|
+
self
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
@@ -92,16 +92,17 @@ describe CharlockHolmes::EncodingDetector do
|
|
92
92
|
|
93
93
|
context 'encoding detection' do
|
94
94
|
MAPPING = [
|
95
|
-
['repl2.cljs', 'ISO-8859-1'],
|
96
|
-
['core.rkt', 'UTF-8'],
|
97
|
-
['cl-messagepack.lisp', 'ISO-8859-1'],
|
98
|
-
['TwigExtensionsDate.es.yml', 'UTF-8'],
|
99
|
-
['AnsiGraph.psm1', 'UTF-16LE'],
|
100
|
-
['laholator.py', 'UTF-8']
|
95
|
+
['repl2.cljs', 'ISO-8859-1', :text],
|
96
|
+
['core.rkt', 'UTF-8', :text],
|
97
|
+
['cl-messagepack.lisp', 'ISO-8859-1', :text],
|
98
|
+
['TwigExtensionsDate.es.yml', 'UTF-8', :text],
|
99
|
+
['AnsiGraph.psm1', 'UTF-16LE', :text],
|
100
|
+
['laholator.py', 'UTF-8', :text],
|
101
|
+
['hello_world', nil, :binary]
|
101
102
|
]
|
102
103
|
|
103
104
|
MAPPING.each do |mapping|
|
104
|
-
file, encoding = mapping
|
105
|
+
file, encoding, type = mapping
|
105
106
|
|
106
107
|
test "#{file} should be detected as #{encoding}" do
|
107
108
|
path = File.expand_path "../fixtures/#{file}", __FILE__
|
@@ -109,6 +110,7 @@ describe CharlockHolmes::EncodingDetector do
|
|
109
110
|
guessed = @detector.detect content
|
110
111
|
|
111
112
|
assert_equal encoding, guessed[:encoding]
|
113
|
+
assert_equal type, guessed[:type]
|
112
114
|
|
113
115
|
if content.respond_to? :force_encoding
|
114
116
|
content.force_encoding guessed[:encoding]
|
Binary file
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: charlock_holmes
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 7
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
8
|
+
- 6
|
9
9
|
- 0
|
10
|
-
version: 0.
|
10
|
+
version: 0.6.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Brian Lopez
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2011-08-
|
19
|
+
date: 2011-08-27 00:00:00 -07:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|
@@ -99,6 +99,7 @@ files:
|
|
99
99
|
- spec/fixtures/TwigExtensionsDate.es.yml
|
100
100
|
- spec/fixtures/cl-messagepack.lisp
|
101
101
|
- spec/fixtures/core.rkt
|
102
|
+
- spec/fixtures/hello_world
|
102
103
|
- spec/fixtures/laholator.py
|
103
104
|
- spec/fixtures/repl2.cljs
|
104
105
|
- spec/spec_helper.rb
|
@@ -145,6 +146,7 @@ test_files:
|
|
145
146
|
- spec/fixtures/TwigExtensionsDate.es.yml
|
146
147
|
- spec/fixtures/cl-messagepack.lisp
|
147
148
|
- spec/fixtures/core.rkt
|
149
|
+
- spec/fixtures/hello_world
|
148
150
|
- spec/fixtures/laholator.py
|
149
151
|
- spec/fixtures/repl2.cljs
|
150
152
|
- spec/spec_helper.rb
|