icu 0.9.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/.travis.yml +11 -0
  4. data/Gemfile +3 -0
  5. data/LICENSE +20 -0
  6. data/README.md +69 -0
  7. data/Rakefile +38 -0
  8. data/benchmark/normalization.rb +106 -0
  9. data/benchmark/normalization_phrases.txt +1031 -0
  10. data/benchmark/normalization_result.txt +45 -0
  11. data/benchmark/normalization_wikip.txt +2838 -0
  12. data/ext/icu/extconf.rb +242 -0
  13. data/ext/icu/icu.c +18 -0
  14. data/ext/icu/icu.h +78 -0
  15. data/ext/icu/icu_charset_detector.c +192 -0
  16. data/ext/icu/icu_collator.c +138 -0
  17. data/ext/icu/icu_locale.c +852 -0
  18. data/ext/icu/icu_normalizer.c +122 -0
  19. data/ext/icu/icu_number_format.c +0 -0
  20. data/ext/icu/icu_spoof_checker.c +194 -0
  21. data/ext/icu/icu_transliterator.c +159 -0
  22. data/ext/icu/internal_encoding.c +38 -0
  23. data/ext/icu/internal_ustring.c +304 -0
  24. data/ext/icu/internal_utils.c +50 -0
  25. data/ext/icu/rb_errors.c +14 -0
  26. data/icu.gemspec +22 -0
  27. data/lib/icu.rb +6 -18
  28. data/lib/icu/charset_detector.rb +5 -0
  29. data/lib/icu/collator.rb +24 -0
  30. data/lib/icu/locale.rb +19 -0
  31. data/lib/icu/transliterator.rb +8 -0
  32. data/lib/icu/version.rb +3 -0
  33. data/spec/charset_detector_spec.rb +47 -0
  34. data/spec/collator_spec.rb +73 -0
  35. data/spec/locale_spec.rb +312 -0
  36. data/spec/normalizer_spec.rb +35 -0
  37. data/spec/spec_helper.rb +8 -0
  38. data/spec/spoof_checker_spec.rb +56 -0
  39. data/spec/transliterator_spec.rb +41 -0
  40. metadata +132 -55
  41. data/COPYING +0 -674
  42. data/COPYING.LESSER +0 -165
  43. data/README +0 -81
  44. data/ext/extconf.rb +0 -31
  45. data/ext/icu.c +0 -128
  46. data/ext/icu.h +0 -34
  47. data/ext/icu_locale.c +0 -330
  48. data/ext/icu_locale_country.c +0 -99
  49. data/ext/icu_locale_language.c +0 -99
  50. data/ext/icu_numeric.c +0 -161
  51. data/ext/icu_time.c +0 -391
  52. data/test/test_locale.rb +0 -73
  53. data/test/test_numeric.rb +0 -78
  54. data/test/test_time.rb +0 -75
@@ -0,0 +1,242 @@
1
+ require 'mkmf'
2
+
3
+ require 'rubygems'
4
+
5
+ ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
6
+
7
+ # Utility functions
8
+
9
+ def using_system_libraries?
10
+ arg_config('--use-system-libraries', !!ENV['ICU_USE_SYSTEM_LIBRARIES'])
11
+ end
12
+
13
+ # Building with system ICU
14
+
15
+ if using_system_libraries?
16
+ message "Building ICU using system libraries.\n Not supported yet, PR welcome!"
17
+ exit 1
18
+
19
+ unless dir_config('icu').any?
20
+ base = if !`which brew`.empty?
21
+ `brew --prefix`.strip
22
+ elsif File.exists?("/usr/local/Cellar/icu4c")
23
+ '/usr/local/Cellar'
24
+ end
25
+
26
+ if base and icu4c = Dir[File.join(base, 'Cellar/icu4c/*')].sort.last
27
+ $INCFLAGS << " -I#{icu4c}/include "
28
+ $LDFLAGS << " -L#{icu4c}/lib "
29
+ end
30
+ end
31
+
32
+ unless have_library 'icui18n' and have_header 'unicode/ucnv.h'
33
+ STDERR.puts <<-EOS
34
+ ************************************************************************
35
+ icu not found.
36
+ install by brew install icu4c or apt-get install libicu-dev)
37
+ ************************************************************************ww
38
+ EOS
39
+
40
+ exit(1)
41
+ end
42
+
43
+ have_library 'icuuc' or abort 'libicuuc missing'
44
+ have_library 'icudata' or abort 'libicudata missing'
45
+ else
46
+ message "Building ICU from source.\n"
47
+
48
+ # The gem version constraint in the Rakefile is not respected at install time.
49
+ # Keep this version in sync with the one in the Rakefile !
50
+ require 'rubygems'
51
+ gem 'mini_portile2', '~> 2.2.0'
52
+ require 'mini_portile2'
53
+
54
+ # Checkout the source code of ICU.
55
+ # http://site.icu-project.org/download/
56
+ # http://userguide.icu-project.org/howtouseicu
57
+ # Also check the readme of ICU release file.
58
+ class ICURecipe < MiniPortile
59
+ def initialize(name, version, static_p)
60
+ super(name, version)
61
+ self.target = File.join(ROOT, "ports")
62
+ # Prefer host_alias over host in order to use i586-mingw32msvc as
63
+ # correct compiler prefix for cross build, but use host if not set.
64
+ self.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
65
+ self.patch_files = Dir[File.join(ROOT, "patches", name, "*.patch")].sort
66
+ self.configure_options << "--libdir=#{File.join(self.path, "lib")}"
67
+
68
+ yield self
69
+
70
+ env = Hash.new do |hash, key|
71
+ hash[key] = ENV[key].dup.to_s rescue ''
72
+ end
73
+
74
+ self.configure_options.flatten!
75
+
76
+ self.configure_options.delete_if do |option|
77
+ case option
78
+ when /\A(\w+)=(.*)\z/
79
+ env[$1] = $2
80
+ true
81
+ else
82
+ false
83
+ end
84
+ end
85
+
86
+ if static_p
87
+ self.configure_options += [
88
+ "--enable-shared",
89
+ "--enable-static",
90
+ "--disable-renaming"
91
+ ]
92
+ env['CFLAGS'] = "-fPIC #{env['CFLAGS']}"
93
+ env['CPPFLAGS'] = "-DU_CHARSET_IS_UTF8=1 -DU_USING_ICU_NAMESPACE=0 -DU_STATIC_IMPLEMENTATION #{env['CPPFLAGS']}"
94
+ env['CXXFLAGS'] = "-fPIC -fno-exceptions #{env['CXXFLAGS']}"
95
+ env['LDFLAGS'] = "-fPIC -static-libstdc++ #{env['CFLAGS']}"
96
+ else
97
+ self.configure_options += [
98
+ "--enable-shared",
99
+ "--disable-static",
100
+ ]
101
+ end
102
+
103
+ if RbConfig::CONFIG['target_cpu'] == 'universal'
104
+ %w[CFLAGS LDFLAGS].each do |key|
105
+ unless env[key].include?('-arch')
106
+ env[key] += ' ' + RbConfig::CONFIG['ARCH_FLAG']
107
+ end
108
+ end
109
+ end
110
+
111
+ @env = env
112
+ end
113
+
114
+ def cook
115
+
116
+ message <<-"EOS"
117
+ ************************************************************************
118
+ IMPORTANT NOTICE:
119
+
120
+ Building ICU with a packaged version of #{name}-#{version}#{'.' if self.patch_files.empty?}
121
+ EOS
122
+
123
+ unless self.patch_files.empty?
124
+ message "with the following patches applied:\n"
125
+
126
+ self.patch_files.each do |patch|
127
+ message "\t- %s\n" % File.basename(patch)
128
+ end
129
+ end
130
+
131
+ message <<-"EOS"
132
+
133
+ gem install icu -- --use-system-libraries
134
+
135
+ If you are using Bundler, tell it to use the option:
136
+
137
+ bundle config build.icu --use-system-libraries
138
+ bundle install
139
+ EOS
140
+
141
+ message <<-"EOS"
142
+ ************************************************************************
143
+ EOS
144
+ super
145
+ end
146
+
147
+ def configure
148
+ # run as recommend, basically set up compiler and flags
149
+ platform = if RUBY_PLATFORM =~ /mingw|mswin/
150
+ 'MSYS/MSVC'
151
+ elsif RUBY_PLATFORM =~ /darwin/
152
+ 'MacOSX'
153
+ else
154
+ 'Linux'
155
+ end # double quotes are significant.
156
+ execute('ICU Configure', [@env] + ['./runConfigureICU', platform] + computed_options)
157
+ super
158
+ end
159
+
160
+ def work_path
161
+ File.join(Dir.glob("#{tmp_path}/*").find { |d| File.directory?(d) }, 'source')
162
+ end
163
+
164
+ end
165
+
166
+ message "Using mini_portile version #{MiniPortile::VERSION}\n"
167
+
168
+ static_p = enable_config('static', true) or
169
+ message "Static linking is disabled.\n"
170
+ recipes = []
171
+
172
+ libicu_recipe = ICURecipe.new("libicu", "59.1", static_p) do |recipe|
173
+ recipe.files = [{
174
+ url: "https://downloads.sourceforge.net/project/icu/ICU4C/59.1/icu4c-59_1-src.tgz?r=&ts=1501595646",
175
+ sha256: "7132fdaf9379429d004005217f10e00b7d2319d0fea22bdfddef8991c45b75fe"
176
+ # gpg: Signature made Fri Apr 14 21:00:23 2017 CEST using RSA key ID 4FB419E3
177
+ # gpg: requesting key 4FB419E3 from hkps server hkps.pool.sks-keyservers.net
178
+ # gpg: key 4FB419E3: public key "Steven R. Loomis (filfla-signing) <srloomis@us.ibm.com>" imported
179
+ # gpg: 3 marginal(s) needed, 1 complete(s) needed, PGP trust model
180
+ # gpg: depth: 0 valid: 2 signed: 1 trust: 0-, 0q, 0n, 0m, 0f, 2u
181
+ # gpg: depth: 1 valid: 1 signed: 0 trust: 1-, 0q, 0n, 0m, 0f, 0u
182
+ # gpg: next trustdb check due at 2018-08-19
183
+ # gpg: Total number processed: 1
184
+ # gpg: imported: 1 (RSA: 1)
185
+ # gpg: Good signature from "Steven R. Loomis (filfla-signing) <srloomis@us.ibm.com>" [unknown]
186
+ # gpg: aka "Steven R. Loomis (filfla-signing) <srl295@gmail.com>" [unknown]
187
+ # gpg: aka "Steven R. Loomis (filfla-signing) <srl@icu-project.org>" [unknown]
188
+ # gpg: aka "[jpeg image of size 4680]" [unknown]
189
+ # gpg: WARNING: This key is not certified with a trusted signature!
190
+ # gpg: There is no indication that the signature belongs to the owner.
191
+ # Primary key fingerprint: BA90 283A 60D6 7BA0 DD91 0A89 3932 080F 4FB4 19E3
192
+ }]
193
+ end
194
+ recipes.push libicu_recipe
195
+
196
+ recipes.each do |recipe|
197
+ checkpoint = "#{recipe.target}/#{recipe.name}-#{recipe.version}-#{recipe.host}.installed"
198
+ unless File.exist?(checkpoint)
199
+ recipe.cook
200
+ FileUtils.touch checkpoint
201
+ end
202
+
203
+ recipe.activate
204
+ end
205
+
206
+ $libs = $libs.shellsplit.tap do |libs|
207
+ [libicu_recipe].each do |recipe|
208
+ libname = recipe.name[/\Alib(.+)\z/, 1]
209
+ # TODO: build with pkg-config
210
+ # Should do like PKG_CONFIG_PATH=/root/icu4r/ports/x86_64-pc-linux-gnu/libicu/59.1/lib/pkgconfig/ pkg-config --static icu-uc
211
+ File.join(recipe.path, "bin", "#{libname}-config").tap do |config|
212
+ # call config scripts explicit with 'sh' for compat with Windows
213
+ $CPPFLAGS = '-DU_DISABLE_RENAMING=1 -DU_CHARSET_IS_UTF8=1 -DU_USING_ICU_NAMESPACE=0 -DU_STATIC_IMPLEMENTATION' << ' ' << $CPPFLAGS
214
+ `sh #{config} --ldflags`.strip.shellsplit.each do |arg|
215
+ case arg
216
+ when /\A-L(.+)\z/
217
+ # Prioritize ports' directories
218
+ if $1.start_with?(ROOT + '/')
219
+ $LIBPATH = [$1] | $LIBPATH
220
+ else
221
+ $LIBPATH = $LIBPATH | [$1]
222
+ end
223
+ when /\A-l./
224
+ libs.unshift(arg)
225
+ else
226
+ $LDFLAGS << ' ' << arg.shellescape
227
+ end
228
+ end
229
+ $INCFLAGS = `sh #{config} --cppflags-searchpath `.strip << ' ' << $INCFLAGS
230
+ $CFLAGS = '-DU_DISABLE_RENAMING=1 -DU_CHARSET_IS_UTF8=1 -DU_USING_ICU_NAMESPACE=0 -DU_STATIC_IMPLEMENTATION' << ' ' << `sh #{config} --cflags`.strip << $CFLAGS
231
+ end
232
+ end
233
+ end.shelljoin
234
+
235
+ end
236
+
237
+ $CFLAGS << ' -O3 -funroll-loops -std=c99'
238
+ $CFLAGS << ' -Wextra -O0 -ggdb3' if ENV['DEBUG']
239
+
240
+ puts $CFLAGS, $CPPFLAGS, $CXXFLAGS
241
+
242
+ create_makefile('icu/icu')
@@ -0,0 +1,18 @@
1
+ #include "icu.h"
2
+
3
+ VALUE rb_mICU;
4
+
5
+ void Init_icu(void)
6
+ {
7
+ rb_mICU = rb_define_module("ICU");
8
+ init_internal_encoding();
9
+ init_rb_errors();
10
+ init_icu_collator();
11
+ init_icu_normalizer();
12
+ init_icu_spoof_checker();
13
+ init_icu_transliterator();
14
+ init_icu_charset_detector();
15
+ init_icu_locale();
16
+ }
17
+
18
+ /* vim: set expandtab sws=4 sw=4: */
@@ -0,0 +1,78 @@
1
+ #ifndef RUBY_EXTENSION_ICU_H_
2
+ #define RUBY_EXTENSION_ICU_H_
3
+
4
+ /* System libraries */
5
+ #include <stdlib.h>
6
+
7
+ /* Ruby headers */
8
+ #define ONIG_ESCAPE_UCHAR_COLLISION 1 // ruby.h defines UChar macro
9
+ #include <ruby.h>
10
+ #include <ruby/encoding.h>
11
+ #ifdef UChar // fail-safe
12
+ #undef UChar
13
+ #endif
14
+ #include "unicode/ustring.h"
15
+ #include "unicode/uenum.h"
16
+ #include "unicode/parseerr.h"
17
+
18
+ /* Globals */
19
+
20
+ extern VALUE rb_mICU;
21
+ extern VALUE rb_eICU_Error;
22
+ extern VALUE rb_eICU_InvalidParameterError;
23
+ extern VALUE rb_cICU_UString;
24
+ extern VALUE rb_cICU_Collator;
25
+ extern VALUE rb_cICU_Normalizer;
26
+ extern VALUE rb_cICU_SpoofChecker;
27
+ extern VALUE rb_cICU_Transliterator;
28
+ extern VALUE rb_cICU_CharsetDetector;
29
+ extern VALUE rb_cICU_CharsetDetector_Match;
30
+ extern VALUE rb_cICU_Locale;
31
+
32
+ /* Prototypes */
33
+ void Init_icu _(( void ));
34
+ void init_internal_encoding _(( void ));
35
+ void init_rb_errors _(( void ));
36
+ void init_icu_collator _(( void ));
37
+ void init_icu_normalizer _(( void ));
38
+ void init_icu_spoof_checker _(( void ));
39
+ void init_icu_transliterator _(( void ));
40
+ void init_icu_charset_detector _(( void ));
41
+ void init_icu_locale _(( void ));
42
+
43
+ int icu_is_rb_enc_idx_as_utf_8 _(( int ));
44
+ int icu_is_rb_str_as_utf_8 _(( VALUE ));
45
+ const char* icu_rb_str_enc_name _(( int ));
46
+ VALUE rb_str_enc_to_ascii_as_utf8 _(( VALUE ));
47
+ int icu_rb_str_enc_idx _(( VALUE ));
48
+ VALUE icu_enum_to_rb_ary _(( UEnumeration*, UErrorCode, long ));
49
+ extern void icu_rb_raise_icu_error _(( UErrorCode ));
50
+ extern void icu_rb_raise_icu_parse_error _(( const UParseError* ));
51
+ extern void icu_rb_raise_icu_invalid_parameter _(( const char*, const char* ));
52
+
53
+ VALUE icu_ustring_init_with_capa_enc _(( int32_t, int ));
54
+ VALUE icu_ustring_from_rb_str _(( VALUE ));
55
+ VALUE icu_ustring_from_uchar_str _(( const UChar*, int32_t ));
56
+ void icu_ustring_clear_ptr _(( VALUE ));
57
+ void icu_ustring_resize _(( VALUE, int32_t ));
58
+ void icu_ustring_set_enc _(( VALUE, int ));
59
+ VALUE icu_ustring_to_rb_enc_str_with_len _(( VALUE, int32_t ));
60
+ VALUE icu_ustring_to_rb_enc_str _(( VALUE ));
61
+ UChar* icu_ustring_ptr _(( VALUE ));
62
+ int32_t icu_ustring_len _(( VALUE ));
63
+ int32_t icu_ustring_capa _(( VALUE ));
64
+ VALUE char_buffer_to_rb_str _(( const char* ));
65
+ char* char_buffer_new _(( int32_t ));
66
+ void char_buffer_resize _(( const char*, int32_t ));
67
+ void char_buffer_free _(( const char* ));
68
+
69
+ /* Constants */
70
+ #define RUBY_C_STRING_TERMINATOR_SIZE 1
71
+
72
+ /* Macros */
73
+ #define ICU_RUBY_ENCODING_INDEX (rb_enc_to_index(rb_default_internal_encoding()) || rb_locale_encindex())
74
+ #define ICU_RB_STRING_ENC_NAME_IDX(_idx) rb_enc_from_index(_idx) != NULL ? (rb_enc_from_index(_idx))->name : ""
75
+
76
+ #endif // RUBY_EXTENSION_ICU_H_
77
+
78
+ /* vim: set expandtab sws=4 sw=4: */
@@ -0,0 +1,192 @@
1
+ #include "icu.h"
2
+ #include "unicode/ucsdet.h"
3
+
4
+ #define GET_DETECTOR(_data) icu_detector_data* _data; \
5
+ TypedData_Get_Struct(self, icu_detector_data, &icu_detector_type, _data)
6
+
7
+ VALUE rb_cICU_CharsetDetector;
8
+ VALUE rb_cICU_CharsetDetector_Match;
9
+
10
+ typedef struct {
11
+ VALUE rb_instance;
12
+ UCharsetDetector* service;
13
+ char* dummy_str; // used for reset
14
+ } icu_detector_data;
15
+
16
+ static void detector_free(void* _this)
17
+ {
18
+ icu_detector_data* this = _this;
19
+ if (this->dummy_str != NULL) {
20
+ ruby_xfree(this->dummy_str);
21
+ }
22
+ ucsdet_close(this->service);
23
+ }
24
+
25
+ static size_t detector_memsize(const void* _)
26
+ {
27
+ return sizeof(icu_detector_data);
28
+ }
29
+
30
+ static const rb_data_type_t icu_detector_type = {
31
+ "icu/charset_detector",
32
+ {NULL, detector_free, detector_memsize,},
33
+ 0, 0,
34
+ RUBY_TYPED_FREE_IMMEDIATELY,
35
+ };
36
+
37
+ static VALUE detector_populate_match_struct(const UCharsetMatch* match)
38
+ {
39
+ UErrorCode status = U_ZERO_ERROR;
40
+ int32_t confidence = ucsdet_getConfidence(match, &status);
41
+ if (U_FAILURE(status)) {
42
+ icu_rb_raise_icu_error(status);
43
+ }
44
+ status = U_ZERO_ERROR;
45
+ const char* name = ucsdet_getName(match, &status);
46
+ if (U_FAILURE(status)) {
47
+ icu_rb_raise_icu_error(status);
48
+ }
49
+ status = U_ZERO_ERROR;
50
+ const char* language = ucsdet_getLanguage(match, &status);
51
+ if (U_FAILURE(status)) {
52
+ icu_rb_raise_icu_error(status);
53
+ }
54
+ return rb_struct_new(rb_cICU_CharsetDetector_Match,
55
+ rb_str_new_cstr(name),
56
+ INT2NUM(confidence),
57
+ rb_str_new_cstr(language));
58
+ }
59
+
60
+ VALUE detector_alloc(VALUE self)
61
+ {
62
+ icu_detector_data* this;
63
+ return TypedData_Make_Struct(self, icu_detector_data, &icu_detector_type, this);
64
+ }
65
+
66
+ VALUE detector_initialize(int argc, VALUE* argv, VALUE self)
67
+ {
68
+ GET_DETECTOR(this);
69
+ this->rb_instance = self;
70
+ this->service = NULL;
71
+
72
+ UErrorCode status = U_ZERO_ERROR;
73
+ this->service = ucsdet_open(&status);
74
+ if (U_FAILURE(status)) {
75
+ icu_rb_raise_icu_error(status);
76
+ }
77
+ this->dummy_str = ALLOC_N(char, 1);
78
+ this->dummy_str[0] = '\0';
79
+
80
+ return self;
81
+ }
82
+
83
+ static inline void detector_reset_text(const icu_detector_data* this)
84
+ {
85
+ UErrorCode status = U_ZERO_ERROR;
86
+ ucsdet_setText(this->service, this->dummy_str, 0, &status);
87
+ if (U_FAILURE(status)) {
88
+ icu_rb_raise_icu_error(status);
89
+ }
90
+ }
91
+
92
+ // rb_str must be a ruby String
93
+ static inline void detector_set_text(const icu_detector_data* this, VALUE rb_str)
94
+ {
95
+ UErrorCode status = U_ZERO_ERROR;
96
+ ucsdet_setText(this->service, RSTRING_PTR(rb_str), RSTRING_LENINT(rb_str), &status);
97
+ if (U_FAILURE(status)) {
98
+ icu_rb_raise_icu_error(status);
99
+ }
100
+ }
101
+
102
+ //
103
+ // no charset appears to match the data.
104
+ // no input text has been provided
105
+ VALUE detector_detect(VALUE self, VALUE str)
106
+ {
107
+ StringValue(str);
108
+ GET_DETECTOR(this);
109
+
110
+ detector_set_text(this, str);
111
+ UErrorCode status = U_ZERO_ERROR;
112
+ const UCharsetMatch* match = ucsdet_detect(this->service, &status);
113
+ if (U_FAILURE(status)) {
114
+ icu_rb_raise_icu_error(status);
115
+ }
116
+
117
+ VALUE rb_match = detector_populate_match_struct(match);
118
+ detector_reset_text(this);
119
+ return rb_match;
120
+ }
121
+
122
+ VALUE detector_detect_all(VALUE self, VALUE str)
123
+ {
124
+ StringValue(str);
125
+ GET_DETECTOR(this);
126
+
127
+ detector_set_text(this, str);
128
+
129
+ UErrorCode status = U_ZERO_ERROR;
130
+ int32_t len_matches = 0;
131
+ const UCharsetMatch** matches = ucsdet_detectAll(this->service, &len_matches, &status);
132
+ if (U_FAILURE(status)) {
133
+ icu_rb_raise_icu_error(status);
134
+ }
135
+
136
+ VALUE result = rb_ary_new2(3); // pre-allocate some slots
137
+ for (int32_t i = 0; i < len_matches; ++i) {
138
+ rb_ary_push(result, detector_populate_match_struct(matches[i]));
139
+ }
140
+ detector_reset_text(this);
141
+ return result;
142
+ }
143
+
144
+ static inline VALUE detector_get_input_filter_internal(const icu_detector_data* this)
145
+ {
146
+ return ucsdet_isInputFilterEnabled(this->service) != 0 ? Qtrue : Qfalse;
147
+ }
148
+
149
+ VALUE detector_get_input_filter(VALUE self)
150
+ {
151
+ GET_DETECTOR(this);
152
+ return detector_get_input_filter_internal(this);
153
+ }
154
+
155
+ VALUE detector_set_input_filter(VALUE self, VALUE flag)
156
+ {
157
+ GET_DETECTOR(this);
158
+ ucsdet_enableInputFilter(this->service, flag == Qtrue ? TRUE : FALSE);
159
+ return detector_get_input_filter_internal(this);
160
+ }
161
+
162
+ VALUE detector_detectable_charsets(VALUE self)
163
+ {
164
+ GET_DETECTOR(this);
165
+ UErrorCode status = U_ZERO_ERROR;
166
+ UEnumeration* charsets = ucsdet_getAllDetectableCharsets(this->service, &status);
167
+ return icu_enum_to_rb_ary(charsets, status, 28);
168
+ }
169
+
170
+ void init_icu_charset_detector(void)
171
+ {
172
+ rb_cICU_CharsetDetector = rb_define_class_under(rb_mICU, "CharsetDetector", rb_cObject);
173
+ rb_define_alloc_func(rb_cICU_CharsetDetector, detector_alloc);
174
+ rb_define_method(rb_cICU_CharsetDetector, "initialize", detector_initialize, -1);
175
+ rb_define_method(rb_cICU_CharsetDetector, "detect", detector_detect, 1);
176
+ rb_define_method(rb_cICU_CharsetDetector, "detect_all", detector_detect_all, 1);
177
+ rb_define_method(rb_cICU_CharsetDetector, "input_filter", detector_get_input_filter, 0);
178
+ rb_define_method(rb_cICU_CharsetDetector, "input_filter=", detector_set_input_filter, 1);
179
+ rb_define_method(rb_cICU_CharsetDetector, "detectable_charsets", detector_detectable_charsets, 0);
180
+
181
+ // define a Match struct in Ruby
182
+ rb_cICU_CharsetDetector_Match = rb_struct_define_under(rb_cICU_CharsetDetector,
183
+ "Match",
184
+ "name",
185
+ "confidence",
186
+ "language",
187
+ NULL);
188
+ }
189
+
190
+ #undef GET_DETECTOR
191
+
192
+ /* vim: set expandtab sws=4 sw=4: */