compact_enc_det 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/compact_enc_det/compact_enc_det.cc +12 -11
- data/lib/compact_enc_det/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 323e8bf5836cb4039fd07828e416c23013b517056d62fea716bb7a9c09b68da4
|
4
|
+
data.tar.gz: e8f0954b74b1013805c6d60d639f5fc16e546b4820c8eae7ab9305cba9f175e6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 54e49df974d7b5f800aa847f1e5ac721ba1e4a0b8aeff09af801720711f4b086a0a2c020e3079d3417a95b2b329b751fac2689253b9ce5116f66ea462ea83a77
|
7
|
+
data.tar.gz: 700275aea11d8f576e05169894916b22b84186b60cba3036fdc3080b6e13b8f7946f39a729ac49bb6dfbf02ec4428c90289043812b405d920f7b49e89c13b3b8
|
data/README.md
CHANGED
@@ -32,7 +32,7 @@ which is a thin wrapper around `CompactEncDet::DetectEncoding` and `MimeEncoding
|
|
32
32
|
|
33
33
|
> ```ruby
|
34
34
|
> file = File.read("unknown-encoding.txt")
|
35
|
-
> result = CompactEncDet.detect_encoding(file
|
35
|
+
> result = CompactEncDet.detect_encoding(file)
|
36
36
|
> result.encoding
|
37
37
|
> # => #<Encoding:Windows-1250>
|
38
38
|
> result.bytes_consumed
|
@@ -34,8 +34,8 @@ void Init_detect_encoding_result(VALUE rb_mCompactEncDet)
|
|
34
34
|
// for the CompactEncDet::DetectEncoding C++ function
|
35
35
|
static VALUE detect_encoding(int argc, VALUE *argv, VALUE self)
|
36
36
|
{
|
37
|
-
VALUE
|
38
|
-
|
37
|
+
VALUE text,
|
38
|
+
text_length,
|
39
39
|
url_hint,
|
40
40
|
http_charset_hint,
|
41
41
|
meta_charset_hint,
|
@@ -45,9 +45,9 @@ static VALUE detect_encoding(int argc, VALUE *argv, VALUE self)
|
|
45
45
|
ignore_7bit_mail_encodings;
|
46
46
|
|
47
47
|
// Parse the Ruby arguments
|
48
|
-
rb_scan_args(argc, argv, "
|
49
|
-
&
|
50
|
-
&
|
48
|
+
rb_scan_args(argc, argv, "17",
|
49
|
+
&text,
|
50
|
+
&text_length,
|
51
51
|
&url_hint,
|
52
52
|
&http_charset_hint,
|
53
53
|
&meta_charset_hint,
|
@@ -56,9 +56,9 @@ static VALUE detect_encoding(int argc, VALUE *argv, VALUE self)
|
|
56
56
|
&corpus_type,
|
57
57
|
&ignore_7bit_mail_encodings);
|
58
58
|
|
59
|
-
// Convert the Ruby
|
60
|
-
const char
|
61
|
-
const int
|
59
|
+
// Convert the Ruby arguments to C++ types
|
60
|
+
const char* c_text = StringValueCStr(text);
|
61
|
+
const int c_text_length = NIL_P(text_length) ? strlen(c_text) : NUM2INT(text_length);
|
62
62
|
|
63
63
|
// Declare the output variables
|
64
64
|
int bytes_consumed;
|
@@ -66,7 +66,8 @@ static VALUE detect_encoding(int argc, VALUE *argv, VALUE self)
|
|
66
66
|
|
67
67
|
// Detect the encoding using CompactEncDet::DetectEncoding
|
68
68
|
Encoding encoding = CompactEncDet::DetectEncoding(
|
69
|
-
|
69
|
+
c_text,
|
70
|
+
c_text_length,
|
70
71
|
NIL_P(url_hint) ? nullptr : StringValueCStr(url_hint),
|
71
72
|
NIL_P(http_charset_hint) ? nullptr : StringValueCStr(http_charset_hint),
|
72
73
|
NIL_P(meta_charset_hint) ? nullptr : StringValueCStr(meta_charset_hint),
|
@@ -76,11 +77,11 @@ static VALUE detect_encoding(int argc, VALUE *argv, VALUE self)
|
|
76
77
|
NIL_P(ignore_7bit_mail_encodings) ? false : RTEST(ignore_7bit_mail_encodings),
|
77
78
|
&bytes_consumed,
|
78
79
|
&is_reliable);
|
79
|
-
|
80
|
+
|
80
81
|
// Convert the encoding enum to string using MimeEncodingName
|
81
82
|
const char* encoding_mime_name = MimeEncodingName(encoding);
|
82
83
|
VALUE rb_encoding_mime_name = rb_str_new_cstr(encoding_mime_name);
|
83
|
-
|
84
|
+
|
84
85
|
// Find the Ruby Encoding class
|
85
86
|
VALUE rb_encoding = rb_funcall(rb_cEncoding, rb_intern("find"), 1, rb_encoding_mime_name);
|
86
87
|
|