uchardet 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/README.rdoc +2 -0
- data/ext/uchardet/uchardet.c +18 -19
- data/lib/uchardet.rb +1 -1
- metadata +3 -3
data/History.txt
CHANGED
data/README.rdoc
CHANGED
data/ext/uchardet/uchardet.c
CHANGED
@@ -10,7 +10,7 @@ static VALUE cUChardetError;
|
|
10
10
|
static VALUE cUCharsetDetector;
|
11
11
|
|
12
12
|
static void
|
13
|
-
|
13
|
+
ensure(UErrorCode status)
|
14
14
|
{
|
15
15
|
if (U_FAILURE(status)) {
|
16
16
|
VALUE ex = rb_exc_new2(cUChardetError, u_errorName(status));
|
@@ -30,7 +30,7 @@ UCharsetDetector_alloc(VALUE klass)
|
|
30
30
|
{
|
31
31
|
UErrorCode status = U_ZERO_ERROR;
|
32
32
|
UCharsetDetector* detector = ucsdet_open(&status);
|
33
|
-
|
33
|
+
ensure(status);
|
34
34
|
|
35
35
|
return Data_Wrap_Struct(klass, NULL, UCharsetDetector_free, detector);
|
36
36
|
}
|
@@ -39,7 +39,7 @@ UCharsetDetector_alloc(VALUE klass)
|
|
39
39
|
* call-seq:
|
40
40
|
* input_filtered
|
41
41
|
*
|
42
|
-
*
|
42
|
+
* Return filtering flag value this charset detector.
|
43
43
|
*/
|
44
44
|
static VALUE
|
45
45
|
UCharsetDetector_get_input_filtered(VALUE self)
|
@@ -90,7 +90,6 @@ static VALUE
|
|
90
90
|
UCharsetDetector_set_text(VALUE self, VALUE text)
|
91
91
|
{
|
92
92
|
return rb_iv_set(self, "@text", text);
|
93
|
-
return text;
|
94
93
|
}
|
95
94
|
|
96
95
|
/*
|
@@ -118,7 +117,6 @@ static VALUE
|
|
118
117
|
UCharsetDetector_set_declared_encoding(VALUE self, VALUE declared_encoding)
|
119
118
|
{
|
120
119
|
return rb_iv_set(self, "@declared_encoding", declared_encoding);
|
121
|
-
return declared_encoding;
|
122
120
|
}
|
123
121
|
|
124
122
|
static void
|
@@ -132,7 +130,7 @@ set_text(VALUE self, VALUE text)
|
|
132
130
|
Data_Get_Struct(self, UCharsetDetector, detector);
|
133
131
|
|
134
132
|
ucsdet_setText(detector, StringValuePtr(text), RSTRING_LEN(text), &status);
|
135
|
-
|
133
|
+
ensure(status);
|
136
134
|
|
137
135
|
UCharsetDetector_set_text(self, text);
|
138
136
|
}
|
@@ -149,7 +147,7 @@ set_declared_encoding(VALUE self, VALUE declared_encoding)
|
|
149
147
|
Data_Get_Struct(self, UCharsetDetector, detector);
|
150
148
|
|
151
149
|
ucsdet_setDeclaredEncoding(detector, StringValuePtr(declared_encoding), RSTRING_LEN(declared_encoding), &status);
|
152
|
-
|
150
|
+
ensure(status);
|
153
151
|
|
154
152
|
UCharsetDetector_set_declared_encoding(self, declared_encoding);
|
155
153
|
}
|
@@ -211,16 +209,16 @@ UCharsetDetector_detect(int argc, VALUE *argv, VALUE self)
|
|
211
209
|
Data_Get_Struct(self, UCharsetDetector, detector);
|
212
210
|
|
213
211
|
const UCharsetMatch *match = ucsdet_detect(detector, &status);
|
214
|
-
|
212
|
+
ensure(status);
|
215
213
|
|
216
214
|
const char *encoding_name = ucsdet_getName(match, &status);
|
217
|
-
|
215
|
+
ensure(status);
|
218
216
|
|
219
217
|
int32_t encoding_confidence = ucsdet_getConfidence(match, &status);
|
220
|
-
|
218
|
+
ensure(status);
|
221
219
|
|
222
220
|
const char *encoding_language = ucsdet_getLanguage(match, &status);
|
223
|
-
|
221
|
+
ensure(status);
|
224
222
|
|
225
223
|
VALUE hash = rb_hash_new();
|
226
224
|
rb_hash_aset(hash, ID2SYM(rb_intern("encoding")), rb_str_new2(encoding_name));
|
@@ -262,20 +260,20 @@ UCharsetDetector_detect_all(int argc, VALUE *argv, VALUE self)
|
|
262
260
|
int32_t matches_found;
|
263
261
|
|
264
262
|
const UCharsetMatch **matches = ucsdet_detectAll(detector, &matches_found, &status);
|
265
|
-
|
263
|
+
ensure(status);
|
266
264
|
|
267
265
|
VALUE ary = rb_ary_new();
|
268
266
|
int i = 0;
|
269
267
|
|
270
268
|
for (i = 0; i < matches_found; i++) {
|
271
269
|
const char *encoding_name = ucsdet_getName(matches[i], &status);
|
272
|
-
|
270
|
+
ensure(status);
|
273
271
|
|
274
272
|
int32_t encoding_confidence = ucsdet_getConfidence(matches[i], &status);
|
275
|
-
|
273
|
+
ensure(status);
|
276
274
|
|
277
275
|
const char *encoding_language = ucsdet_getLanguage(matches[i], &status);
|
278
|
-
|
276
|
+
ensure(status);
|
279
277
|
|
280
278
|
VALUE hash = rb_hash_new();
|
281
279
|
rb_hash_aset(hash, ID2SYM(rb_intern("encoding")), rb_str_new2(encoding_name));
|
@@ -292,8 +290,7 @@ UCharsetDetector_detect_all(int argc, VALUE *argv, VALUE self)
|
|
292
290
|
* call-seq:
|
293
291
|
* detectable_charsets
|
294
292
|
*
|
295
|
-
* Get array of names of all detectable charsets that are known to the
|
296
|
-
* charset detection service.
|
293
|
+
* Get array of names of all detectable charsets that are known to the charset detection service.
|
297
294
|
*/
|
298
295
|
static VALUE
|
299
296
|
UCharsetDetector_get_detectable_charsets(VALUE self)
|
@@ -303,14 +300,14 @@ UCharsetDetector_get_detectable_charsets(VALUE self)
|
|
303
300
|
UErrorCode status = U_ZERO_ERROR;
|
304
301
|
|
305
302
|
UEnumeration *charsets = ucsdet_getAllDetectableCharsets(detector, &status);
|
306
|
-
|
303
|
+
ensure(status);
|
307
304
|
|
308
305
|
VALUE ary = rb_ary_new();
|
309
306
|
int32_t result_length;
|
310
307
|
const char *charset_name;
|
311
308
|
|
312
309
|
while (charset_name = uenum_next(charsets, &result_length, &status)) {
|
313
|
-
|
310
|
+
ensure(status);
|
314
311
|
rb_ary_push(ary, rb_str_new2(charset_name));
|
315
312
|
}
|
316
313
|
uenum_close(charsets);
|
@@ -318,6 +315,8 @@ UCharsetDetector_get_detectable_charsets(VALUE self)
|
|
318
315
|
return ary;
|
319
316
|
}
|
320
317
|
|
318
|
+
/*
|
319
|
+
*/
|
321
320
|
void
|
322
321
|
Init_uchardet()
|
323
322
|
{
|
data/lib/uchardet.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: uchardet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitri Goutnik
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-12-
|
12
|
+
date: 2009-12-20 00:00:00 +03:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -53,7 +53,7 @@ files:
|
|
53
53
|
- test/test_uchardet_cli.rb
|
54
54
|
- test/test_uchardet_extn.rb
|
55
55
|
has_rdoc: true
|
56
|
-
homepage: http://
|
56
|
+
homepage: http://rubyforge.org/projects/uchardet/
|
57
57
|
licenses: []
|
58
58
|
|
59
59
|
post_install_message:
|