icu 0.9.1 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/.travis.yml +11 -0
  4. data/Gemfile +3 -0
  5. data/LICENSE +20 -0
  6. data/README.md +69 -0
  7. data/Rakefile +38 -0
  8. data/benchmark/normalization.rb +106 -0
  9. data/benchmark/normalization_phrases.txt +1031 -0
  10. data/benchmark/normalization_result.txt +45 -0
  11. data/benchmark/normalization_wikip.txt +2838 -0
  12. data/ext/icu/extconf.rb +242 -0
  13. data/ext/icu/icu.c +18 -0
  14. data/ext/icu/icu.h +78 -0
  15. data/ext/icu/icu_charset_detector.c +192 -0
  16. data/ext/icu/icu_collator.c +138 -0
  17. data/ext/icu/icu_locale.c +852 -0
  18. data/ext/icu/icu_normalizer.c +122 -0
  19. data/ext/icu/icu_number_format.c +0 -0
  20. data/ext/icu/icu_spoof_checker.c +194 -0
  21. data/ext/icu/icu_transliterator.c +159 -0
  22. data/ext/icu/internal_encoding.c +38 -0
  23. data/ext/icu/internal_ustring.c +304 -0
  24. data/ext/icu/internal_utils.c +50 -0
  25. data/ext/icu/rb_errors.c +14 -0
  26. data/icu.gemspec +22 -0
  27. data/lib/icu.rb +6 -18
  28. data/lib/icu/charset_detector.rb +5 -0
  29. data/lib/icu/collator.rb +24 -0
  30. data/lib/icu/locale.rb +19 -0
  31. data/lib/icu/transliterator.rb +8 -0
  32. data/lib/icu/version.rb +3 -0
  33. data/spec/charset_detector_spec.rb +47 -0
  34. data/spec/collator_spec.rb +73 -0
  35. data/spec/locale_spec.rb +312 -0
  36. data/spec/normalizer_spec.rb +35 -0
  37. data/spec/spec_helper.rb +8 -0
  38. data/spec/spoof_checker_spec.rb +56 -0
  39. data/spec/transliterator_spec.rb +41 -0
  40. metadata +132 -55
  41. data/COPYING +0 -674
  42. data/COPYING.LESSER +0 -165
  43. data/README +0 -81
  44. data/ext/extconf.rb +0 -31
  45. data/ext/icu.c +0 -128
  46. data/ext/icu.h +0 -34
  47. data/ext/icu_locale.c +0 -330
  48. data/ext/icu_locale_country.c +0 -99
  49. data/ext/icu_locale_language.c +0 -99
  50. data/ext/icu_numeric.c +0 -161
  51. data/ext/icu_time.c +0 -391
  52. data/test/test_locale.rb +0 -73
  53. data/test/test_numeric.rb +0 -78
  54. data/test/test_time.rb +0 -75
@@ -0,0 +1,242 @@
1
+ require 'mkmf'
2
+
3
+ require 'rubygems'
4
+
5
+ ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
6
+
7
+ # Utility functions
8
+
9
+ def using_system_libraries?
10
+ arg_config('--use-system-libraries', !!ENV['ICU_USE_SYSTEM_LIBRARIES'])
11
+ end
12
+
13
+ # Building with system ICU
14
+
15
+ if using_system_libraries?
16
+ message "Building ICU using system libraries.\n Not supported yet, PR welcome!"
17
+ exit 1
18
+
19
+ unless dir_config('icu').any?
20
+ base = if !`which brew`.empty?
21
+ `brew --prefix`.strip
22
+ elsif File.exists?("/usr/local/Cellar/icu4c")
23
+ '/usr/local/Cellar'
24
+ end
25
+
26
+ if base and icu4c = Dir[File.join(base, 'Cellar/icu4c/*')].sort.last
27
+ $INCFLAGS << " -I#{icu4c}/include "
28
+ $LDFLAGS << " -L#{icu4c}/lib "
29
+ end
30
+ end
31
+
32
+ unless have_library 'icui18n' and have_header 'unicode/ucnv.h'
33
+ STDERR.puts <<-EOS
34
+ ************************************************************************
35
+ icu not found.
36
+ install by brew install icu4c or apt-get install libicu-dev)
37
+ ************************************************************************ww
38
+ EOS
39
+
40
+ exit(1)
41
+ end
42
+
43
+ have_library 'icuuc' or abort 'libicuuc missing'
44
+ have_library 'icudata' or abort 'libicudata missing'
45
+ else
46
+ message "Building ICU from source.\n"
47
+
48
+ # The gem version constraint in the Rakefile is not respected at install time.
49
+ # Keep this version in sync with the one in the Rakefile !
50
+ require 'rubygems'
51
+ gem 'mini_portile2', '~> 2.2.0'
52
+ require 'mini_portile2'
53
+
54
+ # Checkout the source code of ICU.
55
+ # http://site.icu-project.org/download/
56
+ # http://userguide.icu-project.org/howtouseicu
57
+ # Also check the readme of ICU release file.
58
+ class ICURecipe < MiniPortile
59
+ def initialize(name, version, static_p)
60
+ super(name, version)
61
+ self.target = File.join(ROOT, "ports")
62
+ # Prefer host_alias over host in order to use i586-mingw32msvc as
63
+ # correct compiler prefix for cross build, but use host if not set.
64
+ self.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
65
+ self.patch_files = Dir[File.join(ROOT, "patches", name, "*.patch")].sort
66
+ self.configure_options << "--libdir=#{File.join(self.path, "lib")}"
67
+
68
+ yield self
69
+
70
+ env = Hash.new do |hash, key|
71
+ hash[key] = ENV[key].dup.to_s rescue ''
72
+ end
73
+
74
+ self.configure_options.flatten!
75
+
76
+ self.configure_options.delete_if do |option|
77
+ case option
78
+ when /\A(\w+)=(.*)\z/
79
+ env[$1] = $2
80
+ true
81
+ else
82
+ false
83
+ end
84
+ end
85
+
86
+ if static_p
87
+ self.configure_options += [
88
+ "--enable-shared",
89
+ "--enable-static",
90
+ "--disable-renaming"
91
+ ]
92
+ env['CFLAGS'] = "-fPIC #{env['CFLAGS']}"
93
+ env['CPPFLAGS'] = "-DU_CHARSET_IS_UTF8=1 -DU_USING_ICU_NAMESPACE=0 -DU_STATIC_IMPLEMENTATION #{env['CPPFLAGS']}"
94
+ env['CXXFLAGS'] = "-fPIC -fno-exceptions #{env['CXXFLAGS']}"
95
+ env['LDFLAGS'] = "-fPIC -static-libstdc++ #{env['CFLAGS']}"
96
+ else
97
+ self.configure_options += [
98
+ "--enable-shared",
99
+ "--disable-static",
100
+ ]
101
+ end
102
+
103
+ if RbConfig::CONFIG['target_cpu'] == 'universal'
104
+ %w[CFLAGS LDFLAGS].each do |key|
105
+ unless env[key].include?('-arch')
106
+ env[key] += ' ' + RbConfig::CONFIG['ARCH_FLAG']
107
+ end
108
+ end
109
+ end
110
+
111
+ @env = env
112
+ end
113
+
114
+ def cook
115
+
116
+ message <<-"EOS"
117
+ ************************************************************************
118
+ IMPORTANT NOTICE:
119
+
120
+ Building ICU with a packaged version of #{name}-#{version}#{'.' if self.patch_files.empty?}
121
+ EOS
122
+
123
+ unless self.patch_files.empty?
124
+ message "with the following patches applied:\n"
125
+
126
+ self.patch_files.each do |patch|
127
+ message "\t- %s\n" % File.basename(patch)
128
+ end
129
+ end
130
+
131
+ message <<-"EOS"
132
+
133
+ gem install icu -- --use-system-libraries
134
+
135
+ If you are using Bundler, tell it to use the option:
136
+
137
+ bundle config build.icu --use-system-libraries
138
+ bundle install
139
+ EOS
140
+
141
+ message <<-"EOS"
142
+ ************************************************************************
143
+ EOS
144
+ super
145
+ end
146
+
147
+ def configure
148
+ # run as recommend, basically set up compiler and flags
149
+ platform = if RUBY_PLATFORM =~ /mingw|mswin/
150
+ 'MSYS/MSVC'
151
+ elsif RUBY_PLATFORM =~ /darwin/
152
+ 'MacOSX'
153
+ else
154
+ 'Linux'
155
+ end # double quotes are significant.
156
+ execute('ICU Configure', [@env] + ['./runConfigureICU', platform] + computed_options)
157
+ super
158
+ end
159
+
160
+ def work_path
161
+ File.join(Dir.glob("#{tmp_path}/*").find { |d| File.directory?(d) }, 'source')
162
+ end
163
+
164
+ end
165
+
166
+ message "Using mini_portile version #{MiniPortile::VERSION}\n"
167
+
168
+ static_p = enable_config('static', true) or
169
+ message "Static linking is disabled.\n"
170
+ recipes = []
171
+
172
+ libicu_recipe = ICURecipe.new("libicu", "59.1", static_p) do |recipe|
173
+ recipe.files = [{
174
+ url: "https://downloads.sourceforge.net/project/icu/ICU4C/59.1/icu4c-59_1-src.tgz?r=&ts=1501595646",
175
+ sha256: "7132fdaf9379429d004005217f10e00b7d2319d0fea22bdfddef8991c45b75fe"
176
+ # gpg: Signature made Fri Apr 14 21:00:23 2017 CEST using RSA key ID 4FB419E3
177
+ # gpg: requesting key 4FB419E3 from hkps server hkps.pool.sks-keyservers.net
178
+ # gpg: key 4FB419E3: public key "Steven R. Loomis (filfla-signing) <srloomis@us.ibm.com>" imported
179
+ # gpg: 3 marginal(s) needed, 1 complete(s) needed, PGP trust model
180
+ # gpg: depth: 0 valid: 2 signed: 1 trust: 0-, 0q, 0n, 0m, 0f, 2u
181
+ # gpg: depth: 1 valid: 1 signed: 0 trust: 1-, 0q, 0n, 0m, 0f, 0u
182
+ # gpg: next trustdb check due at 2018-08-19
183
+ # gpg: Total number processed: 1
184
+ # gpg: imported: 1 (RSA: 1)
185
+ # gpg: Good signature from "Steven R. Loomis (filfla-signing) <srloomis@us.ibm.com>" [unknown]
186
+ # gpg: aka "Steven R. Loomis (filfla-signing) <srl295@gmail.com>" [unknown]
187
+ # gpg: aka "Steven R. Loomis (filfla-signing) <srl@icu-project.org>" [unknown]
188
+ # gpg: aka "[jpeg image of size 4680]" [unknown]
189
+ # gpg: WARNING: This key is not certified with a trusted signature!
190
+ # gpg: There is no indication that the signature belongs to the owner.
191
+ # Primary key fingerprint: BA90 283A 60D6 7BA0 DD91 0A89 3932 080F 4FB4 19E3
192
+ }]
193
+ end
194
+ recipes.push libicu_recipe
195
+
196
+ recipes.each do |recipe|
197
+ checkpoint = "#{recipe.target}/#{recipe.name}-#{recipe.version}-#{recipe.host}.installed"
198
+ unless File.exist?(checkpoint)
199
+ recipe.cook
200
+ FileUtils.touch checkpoint
201
+ end
202
+
203
+ recipe.activate
204
+ end
205
+
206
+ $libs = $libs.shellsplit.tap do |libs|
207
+ [libicu_recipe].each do |recipe|
208
+ libname = recipe.name[/\Alib(.+)\z/, 1]
209
+ # TODO: build with pkg-config
210
+ # Should do like PKG_CONFIG_PATH=/root/icu4r/ports/x86_64-pc-linux-gnu/libicu/59.1/lib/pkgconfig/ pkg-config --static icu-uc
211
+ File.join(recipe.path, "bin", "#{libname}-config").tap do |config|
212
+ # call config scripts explicit with 'sh' for compat with Windows
213
+ $CPPFLAGS = '-DU_DISABLE_RENAMING=1 -DU_CHARSET_IS_UTF8=1 -DU_USING_ICU_NAMESPACE=0 -DU_STATIC_IMPLEMENTATION' << ' ' << $CPPFLAGS
214
+ `sh #{config} --ldflags`.strip.shellsplit.each do |arg|
215
+ case arg
216
+ when /\A-L(.+)\z/
217
+ # Prioritize ports' directories
218
+ if $1.start_with?(ROOT + '/')
219
+ $LIBPATH = [$1] | $LIBPATH
220
+ else
221
+ $LIBPATH = $LIBPATH | [$1]
222
+ end
223
+ when /\A-l./
224
+ libs.unshift(arg)
225
+ else
226
+ $LDFLAGS << ' ' << arg.shellescape
227
+ end
228
+ end
229
+ $INCFLAGS = `sh #{config} --cppflags-searchpath `.strip << ' ' << $INCFLAGS
230
+ $CFLAGS = '-DU_DISABLE_RENAMING=1 -DU_CHARSET_IS_UTF8=1 -DU_USING_ICU_NAMESPACE=0 -DU_STATIC_IMPLEMENTATION' << ' ' << `sh #{config} --cflags`.strip << $CFLAGS
231
+ end
232
+ end
233
+ end.shelljoin
234
+
235
+ end
236
+
237
+ $CFLAGS << ' -O3 -funroll-loops -std=c99'
238
+ $CFLAGS << ' -Wextra -O0 -ggdb3' if ENV['DEBUG']
239
+
240
+ puts $CFLAGS, $CPPFLAGS, $CXXFLAGS
241
+
242
+ create_makefile('icu/icu')
@@ -0,0 +1,18 @@
1
+ #include "icu.h"
2
+
3
+ VALUE rb_mICU;
4
+
5
+ void Init_icu(void)
6
+ {
7
+ rb_mICU = rb_define_module("ICU");
8
+ init_internal_encoding();
9
+ init_rb_errors();
10
+ init_icu_collator();
11
+ init_icu_normalizer();
12
+ init_icu_spoof_checker();
13
+ init_icu_transliterator();
14
+ init_icu_charset_detector();
15
+ init_icu_locale();
16
+ }
17
+
18
+ /* vim: set expandtab sws=4 sw=4: */
@@ -0,0 +1,78 @@
1
+ #ifndef RUBY_EXTENSION_ICU_H_
2
+ #define RUBY_EXTENSION_ICU_H_
3
+
4
+ /* System libraries */
5
+ #include <stdlib.h>
6
+
7
+ /* Ruby headers */
8
+ #define ONIG_ESCAPE_UCHAR_COLLISION 1 // ruby.h defines UChar macro
9
+ #include <ruby.h>
10
+ #include <ruby/encoding.h>
11
+ #ifdef UChar // fail-safe
12
+ #undef UChar
13
+ #endif
14
+ #include "unicode/ustring.h"
15
+ #include "unicode/uenum.h"
16
+ #include "unicode/parseerr.h"
17
+
18
+ /* Globals */
19
+
20
+ extern VALUE rb_mICU;
21
+ extern VALUE rb_eICU_Error;
22
+ extern VALUE rb_eICU_InvalidParameterError;
23
+ extern VALUE rb_cICU_UString;
24
+ extern VALUE rb_cICU_Collator;
25
+ extern VALUE rb_cICU_Normalizer;
26
+ extern VALUE rb_cICU_SpoofChecker;
27
+ extern VALUE rb_cICU_Transliterator;
28
+ extern VALUE rb_cICU_CharsetDetector;
29
+ extern VALUE rb_cICU_CharsetDetector_Match;
30
+ extern VALUE rb_cICU_Locale;
31
+
32
+ /* Prototypes */
33
+ void Init_icu _(( void ));
34
+ void init_internal_encoding _(( void ));
35
+ void init_rb_errors _(( void ));
36
+ void init_icu_collator _(( void ));
37
+ void init_icu_normalizer _(( void ));
38
+ void init_icu_spoof_checker _(( void ));
39
+ void init_icu_transliterator _(( void ));
40
+ void init_icu_charset_detector _(( void ));
41
+ void init_icu_locale _(( void ));
42
+
43
+ int icu_is_rb_enc_idx_as_utf_8 _(( int ));
44
+ int icu_is_rb_str_as_utf_8 _(( VALUE ));
45
+ const char* icu_rb_str_enc_name _(( int ));
46
+ VALUE rb_str_enc_to_ascii_as_utf8 _(( VALUE ));
47
+ int icu_rb_str_enc_idx _(( VALUE ));
48
+ VALUE icu_enum_to_rb_ary _(( UEnumeration*, UErrorCode, long ));
49
+ extern void icu_rb_raise_icu_error _(( UErrorCode ));
50
+ extern void icu_rb_raise_icu_parse_error _(( const UParseError* ));
51
+ extern void icu_rb_raise_icu_invalid_parameter _(( const char*, const char* ));
52
+
53
+ VALUE icu_ustring_init_with_capa_enc _(( int32_t, int ));
54
+ VALUE icu_ustring_from_rb_str _(( VALUE ));
55
+ VALUE icu_ustring_from_uchar_str _(( const UChar*, int32_t ));
56
+ void icu_ustring_clear_ptr _(( VALUE ));
57
+ void icu_ustring_resize _(( VALUE, int32_t ));
58
+ void icu_ustring_set_enc _(( VALUE, int ));
59
+ VALUE icu_ustring_to_rb_enc_str_with_len _(( VALUE, int32_t ));
60
+ VALUE icu_ustring_to_rb_enc_str _(( VALUE ));
61
+ UChar* icu_ustring_ptr _(( VALUE ));
62
+ int32_t icu_ustring_len _(( VALUE ));
63
+ int32_t icu_ustring_capa _(( VALUE ));
64
+ VALUE char_buffer_to_rb_str _(( const char* ));
65
+ char* char_buffer_new _(( int32_t ));
66
+ void char_buffer_resize _(( const char*, int32_t ));
67
+ void char_buffer_free _(( const char* ));
68
+
69
+ /* Constants */
70
+ #define RUBY_C_STRING_TERMINATOR_SIZE 1
71
+
72
+ /* Macros */
73
+ #define ICU_RUBY_ENCODING_INDEX (rb_enc_to_index(rb_default_internal_encoding()) || rb_locale_encindex())
74
+ #define ICU_RB_STRING_ENC_NAME_IDX(_idx) rb_enc_from_index(_idx) != NULL ? (rb_enc_from_index(_idx))->name : ""
75
+
76
+ #endif // RUBY_EXTENSION_ICU_H_
77
+
78
+ /* vim: set expandtab sws=4 sw=4: */
@@ -0,0 +1,192 @@
1
+ #include "icu.h"
2
+ #include "unicode/ucsdet.h"
3
+
4
+ #define GET_DETECTOR(_data) icu_detector_data* _data; \
5
+ TypedData_Get_Struct(self, icu_detector_data, &icu_detector_type, _data)
6
+
7
+ VALUE rb_cICU_CharsetDetector;
8
+ VALUE rb_cICU_CharsetDetector_Match;
9
+
10
+ typedef struct {
11
+ VALUE rb_instance;
12
+ UCharsetDetector* service;
13
+ char* dummy_str; // used for reset
14
+ } icu_detector_data;
15
+
16
+ static void detector_free(void* _this)
17
+ {
18
+ icu_detector_data* this = _this;
19
+ if (this->dummy_str != NULL) {
20
+ ruby_xfree(this->dummy_str);
21
+ }
22
+ ucsdet_close(this->service);
23
+ }
24
+
25
+ static size_t detector_memsize(const void* _)
26
+ {
27
+ return sizeof(icu_detector_data);
28
+ }
29
+
30
+ static const rb_data_type_t icu_detector_type = {
31
+ "icu/charset_detector",
32
+ {NULL, detector_free, detector_memsize,},
33
+ 0, 0,
34
+ RUBY_TYPED_FREE_IMMEDIATELY,
35
+ };
36
+
37
+ static VALUE detector_populate_match_struct(const UCharsetMatch* match)
38
+ {
39
+ UErrorCode status = U_ZERO_ERROR;
40
+ int32_t confidence = ucsdet_getConfidence(match, &status);
41
+ if (U_FAILURE(status)) {
42
+ icu_rb_raise_icu_error(status);
43
+ }
44
+ status = U_ZERO_ERROR;
45
+ const char* name = ucsdet_getName(match, &status);
46
+ if (U_FAILURE(status)) {
47
+ icu_rb_raise_icu_error(status);
48
+ }
49
+ status = U_ZERO_ERROR;
50
+ const char* language = ucsdet_getLanguage(match, &status);
51
+ if (U_FAILURE(status)) {
52
+ icu_rb_raise_icu_error(status);
53
+ }
54
+ return rb_struct_new(rb_cICU_CharsetDetector_Match,
55
+ rb_str_new_cstr(name),
56
+ INT2NUM(confidence),
57
+ rb_str_new_cstr(language));
58
+ }
59
+
60
+ VALUE detector_alloc(VALUE self)
61
+ {
62
+ icu_detector_data* this;
63
+ return TypedData_Make_Struct(self, icu_detector_data, &icu_detector_type, this);
64
+ }
65
+
66
+ VALUE detector_initialize(int argc, VALUE* argv, VALUE self)
67
+ {
68
+ GET_DETECTOR(this);
69
+ this->rb_instance = self;
70
+ this->service = NULL;
71
+
72
+ UErrorCode status = U_ZERO_ERROR;
73
+ this->service = ucsdet_open(&status);
74
+ if (U_FAILURE(status)) {
75
+ icu_rb_raise_icu_error(status);
76
+ }
77
+ this->dummy_str = ALLOC_N(char, 1);
78
+ this->dummy_str[0] = '\0';
79
+
80
+ return self;
81
+ }
82
+
83
+ static inline void detector_reset_text(const icu_detector_data* this)
84
+ {
85
+ UErrorCode status = U_ZERO_ERROR;
86
+ ucsdet_setText(this->service, this->dummy_str, 0, &status);
87
+ if (U_FAILURE(status)) {
88
+ icu_rb_raise_icu_error(status);
89
+ }
90
+ }
91
+
92
+ // rb_str must be a ruby String
93
+ static inline void detector_set_text(const icu_detector_data* this, VALUE rb_str)
94
+ {
95
+ UErrorCode status = U_ZERO_ERROR;
96
+ ucsdet_setText(this->service, RSTRING_PTR(rb_str), RSTRING_LENINT(rb_str), &status);
97
+ if (U_FAILURE(status)) {
98
+ icu_rb_raise_icu_error(status);
99
+ }
100
+ }
101
+
102
+ //
103
+ // no charset appears to match the data.
104
+ // no input text has been provided
105
+ VALUE detector_detect(VALUE self, VALUE str)
106
+ {
107
+ StringValue(str);
108
+ GET_DETECTOR(this);
109
+
110
+ detector_set_text(this, str);
111
+ UErrorCode status = U_ZERO_ERROR;
112
+ const UCharsetMatch* match = ucsdet_detect(this->service, &status);
113
+ if (U_FAILURE(status)) {
114
+ icu_rb_raise_icu_error(status);
115
+ }
116
+
117
+ VALUE rb_match = detector_populate_match_struct(match);
118
+ detector_reset_text(this);
119
+ return rb_match;
120
+ }
121
+
122
+ VALUE detector_detect_all(VALUE self, VALUE str)
123
+ {
124
+ StringValue(str);
125
+ GET_DETECTOR(this);
126
+
127
+ detector_set_text(this, str);
128
+
129
+ UErrorCode status = U_ZERO_ERROR;
130
+ int32_t len_matches = 0;
131
+ const UCharsetMatch** matches = ucsdet_detectAll(this->service, &len_matches, &status);
132
+ if (U_FAILURE(status)) {
133
+ icu_rb_raise_icu_error(status);
134
+ }
135
+
136
+ VALUE result = rb_ary_new2(3); // pre-allocate some slots
137
+ for (int32_t i = 0; i < len_matches; ++i) {
138
+ rb_ary_push(result, detector_populate_match_struct(matches[i]));
139
+ }
140
+ detector_reset_text(this);
141
+ return result;
142
+ }
143
+
144
+ static inline VALUE detector_get_input_filter_internal(const icu_detector_data* this)
145
+ {
146
+ return ucsdet_isInputFilterEnabled(this->service) != 0 ? Qtrue : Qfalse;
147
+ }
148
+
149
+ VALUE detector_get_input_filter(VALUE self)
150
+ {
151
+ GET_DETECTOR(this);
152
+ return detector_get_input_filter_internal(this);
153
+ }
154
+
155
+ VALUE detector_set_input_filter(VALUE self, VALUE flag)
156
+ {
157
+ GET_DETECTOR(this);
158
+ ucsdet_enableInputFilter(this->service, flag == Qtrue ? TRUE : FALSE);
159
+ return detector_get_input_filter_internal(this);
160
+ }
161
+
162
+ VALUE detector_detectable_charsets(VALUE self)
163
+ {
164
+ GET_DETECTOR(this);
165
+ UErrorCode status = U_ZERO_ERROR;
166
+ UEnumeration* charsets = ucsdet_getAllDetectableCharsets(this->service, &status);
167
+ return icu_enum_to_rb_ary(charsets, status, 28);
168
+ }
169
+
170
+ void init_icu_charset_detector(void)
171
+ {
172
+ rb_cICU_CharsetDetector = rb_define_class_under(rb_mICU, "CharsetDetector", rb_cObject);
173
+ rb_define_alloc_func(rb_cICU_CharsetDetector, detector_alloc);
174
+ rb_define_method(rb_cICU_CharsetDetector, "initialize", detector_initialize, -1);
175
+ rb_define_method(rb_cICU_CharsetDetector, "detect", detector_detect, 1);
176
+ rb_define_method(rb_cICU_CharsetDetector, "detect_all", detector_detect_all, 1);
177
+ rb_define_method(rb_cICU_CharsetDetector, "input_filter", detector_get_input_filter, 0);
178
+ rb_define_method(rb_cICU_CharsetDetector, "input_filter=", detector_set_input_filter, 1);
179
+ rb_define_method(rb_cICU_CharsetDetector, "detectable_charsets", detector_detectable_charsets, 0);
180
+
181
+ // define a Match struct in Ruby
182
+ rb_cICU_CharsetDetector_Match = rb_struct_define_under(rb_cICU_CharsetDetector,
183
+ "Match",
184
+ "name",
185
+ "confidence",
186
+ "language",
187
+ NULL);
188
+ }
189
+
190
+ #undef GET_DETECTOR
191
+
192
+ /* vim: set expandtab sws=4 sw=4: */