charlock_holmes 0.4.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- charlock_holmes (0.4.0)
4
+ charlock_holmes (0.5.0)
5
5
 
6
6
  GEM
7
7
  remote: http://rubygems.org/
@@ -0,0 +1,41 @@
1
+ #ifndef CHARLOCK_COMMON_H
2
+ #define CHARLOCK_COMMON_H
3
+
4
+ // tell rbx not to use it's caching compat layer
5
+ // by doing this we're making a promize to RBX that
6
+ // we'll never modify the pointers we get back from RSTRING_PTR
7
+ #define RSTRING_NOT_MODIFIED
8
+
9
+ #include <ruby.h>
10
+ #ifdef HAVE_RUBY_ENCODING_H
11
+ #include <ruby/encoding.h>
12
+ #endif
13
+
14
+ static VALUE charlock_new_enc_str(const char *str, size_t len, void *encoding)
15
+ {
16
+ #ifdef HAVE_RUBY_ENCODING_H
17
+ return rb_external_str_new_with_enc(str, len, (rb_encoding *)encoding);
18
+ #else
19
+ return rb_str_new(str, len);
20
+ #endif
21
+ }
22
+
23
+ static VALUE charlock_new_str(const char *str, size_t len)
24
+ {
25
+ #ifdef HAVE_RUBY_ENCODING_H
26
+ return rb_external_str_new_with_enc(str, len, rb_utf8_encoding());
27
+ #else
28
+ return rb_str_new(str, len);
29
+ #endif
30
+ }
31
+
32
+ static VALUE charlock_new_str2(const char *str)
33
+ {
34
+ #ifdef HAVE_RUBY_ENCODING_H
35
+ return rb_external_str_new_with_enc(str, strlen(str), rb_utf8_encoding());
36
+ #else
37
+ return rb_str_new2(str);
38
+ #endif
39
+ }
40
+
41
+ #endif
@@ -0,0 +1,54 @@
1
+ #include "unicode/ucnv.h"
2
+ #include "common.h"
3
+
4
+ extern VALUE rb_mCharlockHolmes;
5
+ static VALUE rb_cConverter;
6
+
7
+ static VALUE rb_converter_convert(VALUE self, VALUE rb_txt, VALUE rb_src_enc, VALUE rb_dst_enc) {
8
+ VALUE rb_out;
9
+ const char *src_enc;
10
+ const char *dst_enc;
11
+ const char *src_txt;
12
+ char *out_buf;
13
+ void *rb_enc = NULL;
14
+ int32_t src_len;
15
+ int32_t out_len;
16
+ UErrorCode status = U_ZERO_ERROR;
17
+
18
+ src_txt = RSTRING_PTR(rb_txt);
19
+ src_len = RSTRING_LEN(rb_txt);
20
+ src_enc = RSTRING_PTR(rb_src_enc);
21
+ dst_enc = RSTRING_PTR(rb_dst_enc);
22
+
23
+ // first determin the size of the output buffer
24
+ out_len = ucnv_convert(dst_enc, src_enc, NULL, 0, src_txt, src_len, &status);
25
+ if (status != U_BUFFER_OVERFLOW_ERROR) {
26
+ rb_raise(rb_eArgError, u_errorName(status));
27
+ }
28
+ out_buf = xmalloc(out_len);
29
+
30
+ // now do the actual conversion
31
+ status = U_ZERO_ERROR;
32
+ out_len = ucnv_convert(dst_enc, src_enc, out_buf, out_len, src_txt, src_len, &status);
33
+ if (U_FAILURE(status)) {
34
+ xfree(out_buf);
35
+ rb_raise(rb_eArgError, u_errorName(status));
36
+ }
37
+
38
+ xfree(out_buf);
39
+
40
+ #ifdef HAVE_RUBY_ENCODING_H
41
+ (rb_encoding *)rb_enc = rb_enc_find(dst_enc);
42
+ #endif
43
+
44
+ rb_out = charlock_new_enc_str(out_buf, out_len, rb_enc);
45
+
46
+ return rb_out;
47
+ }
48
+
49
+ void _init_charlock_converter() {
50
+ rb_cConverter = rb_define_class_under(rb_mCharlockHolmes, "Converter", rb_cObject);
51
+
52
+ // rb_define_alloc_func(rb_cConverter, rb_converter__alloc);
53
+ rb_define_singleton_method(rb_cConverter, "convert", rb_converter_convert, 3);
54
+ }
@@ -1,31 +1,9 @@
1
1
  #include "unicode/ucsdet.h"
2
+ #include "common.h"
2
3
 
3
- #include <ruby.h>
4
- #ifdef HAVE_RUBY_ENCODING_H
5
- #include <ruby/encoding.h>
6
- #endif
7
-
8
- static VALUE rb_mCharlockHolmes;
4
+ extern VALUE rb_mCharlockHolmes;
9
5
  static VALUE rb_cEncodingDetector;
10
6
 
11
- static VALUE charlock_new_str(const char *str, size_t len)
12
- {
13
- #ifdef HAVE_RUBY_ENCODING_H
14
- return rb_external_str_new_with_enc(str, len, rb_utf8_encoding());
15
- #else
16
- return rb_str_new(str, len);
17
- #endif
18
- }
19
-
20
- static VALUE charlock_new_str2(const char *str)
21
- {
22
- #ifdef HAVE_RUBY_ENCODING_H
23
- return rb_external_str_new_with_enc(str, strlen(str), rb_utf8_encoding());
24
- #else
25
- return rb_str_new2(str);
26
- #endif
27
- }
28
-
29
7
  static VALUE rb_encdec_buildmatch(const UCharsetMatch *match)
30
8
  {
31
9
  UErrorCode status = U_ZERO_ERROR;
@@ -229,10 +207,8 @@ static VALUE rb_encdec__alloc(VALUE klass)
229
207
  return Data_Wrap_Struct(klass, NULL, rb_encdec__free, (void *)csd);
230
208
  }
231
209
 
232
- void Init_charlock_holmes()
210
+ void _init_charlock_encoding_detector()
233
211
  {
234
- rb_mCharlockHolmes = rb_define_module("CharlockHolmes");
235
-
236
212
  rb_cEncodingDetector = rb_define_class_under(rb_mCharlockHolmes, "EncodingDetector", rb_cObject);
237
213
  rb_define_alloc_func(rb_cEncodingDetector, rb_encdec__alloc);
238
214
  rb_define_method(rb_cEncodingDetector, "detect", rb_encdec_detect, -1);
@@ -0,0 +1,13 @@
1
+ #include "common.h"
2
+
3
+ extern void _init_charlock_encoding_detector();
4
+ extern void _init_charlock_converter();
5
+
6
+ VALUE rb_mCharlockHolmes;
7
+
8
+ void Init_charlock_holmes() {
9
+ rb_mCharlockHolmes = rb_define_module("CharlockHolmes");
10
+
11
+ _init_charlock_encoding_detector();
12
+ _init_charlock_converter();
13
+ }
@@ -1,3 +1,3 @@
1
1
  module CharlockHolmes
2
- VERSION = "0.4.1"
2
+ VERSION = "0.5.0"
3
3
  end
@@ -0,0 +1,29 @@
1
+ # encoding: utf-8
2
+
3
+ require 'spec_helper'
4
+
5
+ describe CharlockHolmes::Converter do
6
+ test 'is able to convert regular ascii content from ISO-8859-1 to UTF-16, and back again' do
7
+ input = 'test'
8
+
9
+ output = CharlockHolmes::Converter.convert input, 'ISO-8859-1', 'UTF-16'
10
+ assert input.bytesize < output.bytesize
11
+ assert input != output
12
+
13
+ output = CharlockHolmes::Converter.convert output, 'UTF-16', 'ISO-8859-1'
14
+ assert input.bytesize == output.bytesize
15
+ assert input == output
16
+ end
17
+
18
+ test 'is able to convert UTF-8 content from UTF-8 to UTF-16, and back again' do
19
+ input = 'λ, λ, λ'
20
+
21
+ output = CharlockHolmes::Converter.convert input, 'UTF-8', 'UTF-16'
22
+ assert input.bytesize < output.bytesize
23
+ assert input != output
24
+
25
+ output = CharlockHolmes::Converter.convert output, 'UTF-16', 'UTF-8'
26
+ assert input.bytesize == output.bytesize
27
+ assert input == output
28
+ end
29
+ end
metadata CHANGED
@@ -1,58 +1,79 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: charlock_holmes
3
- version: !ruby/object:Gem::Version
4
- version: 0.4.1
3
+ version: !ruby/object:Gem::Version
4
+ hash: 11
5
5
  prerelease:
6
+ segments:
7
+ - 0
8
+ - 5
9
+ - 0
10
+ version: 0.5.0
6
11
  platform: ruby
7
- authors:
12
+ authors:
8
13
  - Brian Lopez
9
- - Vicent Martí
14
+ - "Vicent Mart\xC3\xAD"
10
15
  autorequire:
11
16
  bindir: bin
12
17
  cert_chain: []
13
- date: 2011-08-25 00:00:00.000000000 -07:00
18
+
19
+ date: 2011-08-26 00:00:00 -07:00
14
20
  default_executable:
15
- dependencies:
16
- - !ruby/object:Gem::Dependency
21
+ dependencies:
22
+ - !ruby/object:Gem::Dependency
17
23
  name: rake-compiler
18
- requirement: &70218132232740 !ruby/object:Gem::Requirement
24
+ prerelease: false
25
+ requirement: &id001 !ruby/object:Gem::Requirement
19
26
  none: false
20
- requirements:
21
- - - ! '>='
22
- - !ruby/object:Gem::Version
27
+ requirements:
28
+ - - ">="
29
+ - !ruby/object:Gem::Version
30
+ hash: 9
31
+ segments:
32
+ - 0
33
+ - 7
34
+ - 5
23
35
  version: 0.7.5
24
36
  type: :development
25
- prerelease: false
26
- version_requirements: *70218132232740
27
- - !ruby/object:Gem::Dependency
37
+ version_requirements: *id001
38
+ - !ruby/object:Gem::Dependency
28
39
  name: rspec
29
- requirement: &70218114472220 !ruby/object:Gem::Requirement
40
+ prerelease: false
41
+ requirement: &id002 !ruby/object:Gem::Requirement
30
42
  none: false
31
- requirements:
32
- - - ! '>='
33
- - !ruby/object:Gem::Version
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ hash: 15
47
+ segments:
48
+ - 2
49
+ - 0
50
+ - 0
34
51
  version: 2.0.0
35
52
  type: :development
36
- prerelease: false
37
- version_requirements: *70218114472220
38
- - !ruby/object:Gem::Dependency
53
+ version_requirements: *id002
54
+ - !ruby/object:Gem::Dependency
39
55
  name: chardet
40
- requirement: &70218114471840 !ruby/object:Gem::Requirement
56
+ prerelease: false
57
+ requirement: &id003 !ruby/object:Gem::Requirement
41
58
  none: false
42
- requirements:
43
- - - ! '>='
44
- - !ruby/object:Gem::Version
45
- version: '0'
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ hash: 3
63
+ segments:
64
+ - 0
65
+ version: "0"
46
66
  type: :development
47
- prerelease: false
48
- version_requirements: *70218114471840
67
+ version_requirements: *id003
49
68
  description:
50
69
  email: seniorlopez@gmail.com
51
70
  executables: []
52
- extensions:
71
+
72
+ extensions:
53
73
  - ext/charlock_holmes/extconf.rb
54
74
  extra_rdoc_files: []
55
- files:
75
+
76
+ files:
56
77
  - .gitignore
57
78
  - .rspec
58
79
  - Gemfile
@@ -63,12 +84,16 @@ files:
63
84
  - benchmark/detection.rb
64
85
  - benchmark/test.txt
65
86
  - charlock_holmes.gemspec
66
- - ext/charlock_holmes/charlock_holmes.c
87
+ - ext/charlock_holmes/common.h
88
+ - ext/charlock_holmes/converter.c
89
+ - ext/charlock_holmes/encoding_detector.c
90
+ - ext/charlock_holmes/ext.c
67
91
  - ext/charlock_holmes/extconf.rb
68
92
  - lib/charlock_holmes.rb
69
93
  - lib/charlock_holmes/encoding_detector.rb
70
94
  - lib/charlock_holmes/string.rb
71
95
  - lib/charlock_holmes/version.rb
96
+ - spec/converter_spec.rb
72
97
  - spec/encoding_detector_spec.rb
73
98
  - spec/fixtures/AnsiGraph.psm1
74
99
  - spec/fixtures/TwigExtensionsDate.es.yml
@@ -81,31 +106,40 @@ files:
81
106
  has_rdoc: true
82
107
  homepage: http://github.com/brianmario/charlock_holmes
83
108
  licenses: []
109
+
84
110
  post_install_message:
85
- rdoc_options:
111
+ rdoc_options:
86
112
  - --charset=UTF-8
87
- require_paths:
113
+ require_paths:
88
114
  - lib
89
115
  - ext
90
- required_ruby_version: !ruby/object:Gem::Requirement
116
+ required_ruby_version: !ruby/object:Gem::Requirement
91
117
  none: false
92
- requirements:
93
- - - ! '>='
94
- - !ruby/object:Gem::Version
95
- version: '0'
96
- required_rubygems_version: !ruby/object:Gem::Requirement
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ hash: 3
122
+ segments:
123
+ - 0
124
+ version: "0"
125
+ required_rubygems_version: !ruby/object:Gem::Requirement
97
126
  none: false
98
- requirements:
99
- - - ! '>='
100
- - !ruby/object:Gem::Version
101
- version: '0'
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ hash: 3
131
+ segments:
132
+ - 0
133
+ version: "0"
102
134
  requirements: []
135
+
103
136
  rubyforge_project:
104
137
  rubygems_version: 1.6.2
105
138
  signing_key:
106
139
  specification_version: 3
107
140
  summary: Character encoding detection, brought to you by ICU
108
- test_files:
141
+ test_files:
142
+ - spec/converter_spec.rb
109
143
  - spec/encoding_detector_spec.rb
110
144
  - spec/fixtures/AnsiGraph.psm1
111
145
  - spec/fixtures/TwigExtensionsDate.es.yml