uchardet 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/README.rdoc +2 -0
- data/ext/uchardet/uchardet.c +18 -19
- data/lib/uchardet.rb +1 -1
- metadata +3 -3
data/History.txt
CHANGED
data/README.rdoc
CHANGED
data/ext/uchardet/uchardet.c
CHANGED
@@ -10,7 +10,7 @@ static VALUE cUChardetError;
|
|
10
10
|
static VALUE cUCharsetDetector;
|
11
11
|
|
12
12
|
static void
|
13
|
-
|
13
|
+
ensure(UErrorCode status)
|
14
14
|
{
|
15
15
|
if (U_FAILURE(status)) {
|
16
16
|
VALUE ex = rb_exc_new2(cUChardetError, u_errorName(status));
|
@@ -30,7 +30,7 @@ UCharsetDetector_alloc(VALUE klass)
|
|
30
30
|
{
|
31
31
|
UErrorCode status = U_ZERO_ERROR;
|
32
32
|
UCharsetDetector* detector = ucsdet_open(&status);
|
33
|
-
|
33
|
+
ensure(status);
|
34
34
|
|
35
35
|
return Data_Wrap_Struct(klass, NULL, UCharsetDetector_free, detector);
|
36
36
|
}
|
@@ -39,7 +39,7 @@ UCharsetDetector_alloc(VALUE klass)
|
|
39
39
|
* call-seq:
|
40
40
|
* input_filtered
|
41
41
|
*
|
42
|
-
*
|
42
|
+
* Return filtering flag value this charset detector.
|
43
43
|
*/
|
44
44
|
static VALUE
|
45
45
|
UCharsetDetector_get_input_filtered(VALUE self)
|
@@ -90,7 +90,6 @@ static VALUE
|
|
90
90
|
UCharsetDetector_set_text(VALUE self, VALUE text)
|
91
91
|
{
|
92
92
|
return rb_iv_set(self, "@text", text);
|
93
|
-
return text;
|
94
93
|
}
|
95
94
|
|
96
95
|
/*
|
@@ -118,7 +117,6 @@ static VALUE
|
|
118
117
|
UCharsetDetector_set_declared_encoding(VALUE self, VALUE declared_encoding)
|
119
118
|
{
|
120
119
|
return rb_iv_set(self, "@declared_encoding", declared_encoding);
|
121
|
-
return declared_encoding;
|
122
120
|
}
|
123
121
|
|
124
122
|
static void
|
@@ -132,7 +130,7 @@ set_text(VALUE self, VALUE text)
|
|
132
130
|
Data_Get_Struct(self, UCharsetDetector, detector);
|
133
131
|
|
134
132
|
ucsdet_setText(detector, StringValuePtr(text), RSTRING_LEN(text), &status);
|
135
|
-
|
133
|
+
ensure(status);
|
136
134
|
|
137
135
|
UCharsetDetector_set_text(self, text);
|
138
136
|
}
|
@@ -149,7 +147,7 @@ set_declared_encoding(VALUE self, VALUE declared_encoding)
|
|
149
147
|
Data_Get_Struct(self, UCharsetDetector, detector);
|
150
148
|
|
151
149
|
ucsdet_setDeclaredEncoding(detector, StringValuePtr(declared_encoding), RSTRING_LEN(declared_encoding), &status);
|
152
|
-
|
150
|
+
ensure(status);
|
153
151
|
|
154
152
|
UCharsetDetector_set_declared_encoding(self, declared_encoding);
|
155
153
|
}
|
@@ -211,16 +209,16 @@ UCharsetDetector_detect(int argc, VALUE *argv, VALUE self)
|
|
211
209
|
Data_Get_Struct(self, UCharsetDetector, detector);
|
212
210
|
|
213
211
|
const UCharsetMatch *match = ucsdet_detect(detector, &status);
|
214
|
-
|
212
|
+
ensure(status);
|
215
213
|
|
216
214
|
const char *encoding_name = ucsdet_getName(match, &status);
|
217
|
-
|
215
|
+
ensure(status);
|
218
216
|
|
219
217
|
int32_t encoding_confidence = ucsdet_getConfidence(match, &status);
|
220
|
-
|
218
|
+
ensure(status);
|
221
219
|
|
222
220
|
const char *encoding_language = ucsdet_getLanguage(match, &status);
|
223
|
-
|
221
|
+
ensure(status);
|
224
222
|
|
225
223
|
VALUE hash = rb_hash_new();
|
226
224
|
rb_hash_aset(hash, ID2SYM(rb_intern("encoding")), rb_str_new2(encoding_name));
|
@@ -262,20 +260,20 @@ UCharsetDetector_detect_all(int argc, VALUE *argv, VALUE self)
|
|
262
260
|
int32_t matches_found;
|
263
261
|
|
264
262
|
const UCharsetMatch **matches = ucsdet_detectAll(detector, &matches_found, &status);
|
265
|
-
|
263
|
+
ensure(status);
|
266
264
|
|
267
265
|
VALUE ary = rb_ary_new();
|
268
266
|
int i = 0;
|
269
267
|
|
270
268
|
for (i = 0; i < matches_found; i++) {
|
271
269
|
const char *encoding_name = ucsdet_getName(matches[i], &status);
|
272
|
-
|
270
|
+
ensure(status);
|
273
271
|
|
274
272
|
int32_t encoding_confidence = ucsdet_getConfidence(matches[i], &status);
|
275
|
-
|
273
|
+
ensure(status);
|
276
274
|
|
277
275
|
const char *encoding_language = ucsdet_getLanguage(matches[i], &status);
|
278
|
-
|
276
|
+
ensure(status);
|
279
277
|
|
280
278
|
VALUE hash = rb_hash_new();
|
281
279
|
rb_hash_aset(hash, ID2SYM(rb_intern("encoding")), rb_str_new2(encoding_name));
|
@@ -292,8 +290,7 @@ UCharsetDetector_detect_all(int argc, VALUE *argv, VALUE self)
|
|
292
290
|
* call-seq:
|
293
291
|
* detectable_charsets
|
294
292
|
*
|
295
|
-
* Get array of names of all detectable charsets that are known to the
|
296
|
-
* charset detection service.
|
293
|
+
* Get array of names of all detectable charsets that are known to the charset detection service.
|
297
294
|
*/
|
298
295
|
static VALUE
|
299
296
|
UCharsetDetector_get_detectable_charsets(VALUE self)
|
@@ -303,14 +300,14 @@ UCharsetDetector_get_detectable_charsets(VALUE self)
|
|
303
300
|
UErrorCode status = U_ZERO_ERROR;
|
304
301
|
|
305
302
|
UEnumeration *charsets = ucsdet_getAllDetectableCharsets(detector, &status);
|
306
|
-
|
303
|
+
ensure(status);
|
307
304
|
|
308
305
|
VALUE ary = rb_ary_new();
|
309
306
|
int32_t result_length;
|
310
307
|
const char *charset_name;
|
311
308
|
|
312
309
|
while (charset_name = uenum_next(charsets, &result_length, &status)) {
|
313
|
-
|
310
|
+
ensure(status);
|
314
311
|
rb_ary_push(ary, rb_str_new2(charset_name));
|
315
312
|
}
|
316
313
|
uenum_close(charsets);
|
@@ -318,6 +315,8 @@ UCharsetDetector_get_detectable_charsets(VALUE self)
|
|
318
315
|
return ary;
|
319
316
|
}
|
320
317
|
|
318
|
+
/*
|
319
|
+
*/
|
321
320
|
void
|
322
321
|
Init_uchardet()
|
323
322
|
{
|
data/lib/uchardet.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: uchardet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitri Goutnik
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-12-
|
12
|
+
date: 2009-12-20 00:00:00 +03:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -53,7 +53,7 @@ files:
|
|
53
53
|
- test/test_uchardet_cli.rb
|
54
54
|
- test/test_uchardet_extn.rb
|
55
55
|
has_rdoc: true
|
56
|
-
homepage: http://
|
56
|
+
homepage: http://rubyforge.org/projects/uchardet/
|
57
57
|
licenses: []
|
58
58
|
|
59
59
|
post_install_message:
|