charlock_holmes 0.7.5 → 0.7.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 57e54315a83df12c843ea67de7213128f74f2a7e
4
- data.tar.gz: f773e478f37f51fe159787964aadf794c25d347a
2
+ SHA256:
3
+ metadata.gz: 11393fb92c8ecb1d18d4741c9915c0293fde4381ccfeae3e929c84970b6779f4
4
+ data.tar.gz: addcb3bdf4fc04e53f7a287c483d6510205b317d980f3a8b21a26940dc5a198d
5
5
  SHA512:
6
- metadata.gz: cc59faffae9eacb3515fbc653b326f26fde8b607177f11ba64fcbcdd4033f06b03531c2f3e388d4d66ce09433f39164583b024beb189b8b37adc9a26eaf2586d
7
- data.tar.gz: 7e7cbe1bb7d7bc8bfbae5417ce1c8047aa9e324fdfa528952c848f3180ac90c3ea3c81bfe74829352a1e81ac3648ac195d0f02f38ad4935876a5af20615396a3
6
+ metadata.gz: 1729db533d5d41aa4708888763270e3f346552a01a8f29371a010253db3bb0cc4f58a7ed628b735829e7f5d9763d35de20a17855dc9c393d4244d1acf30f0dbe
7
+ data.tar.gz: 5033611bfe43c98fd8d88a7141b94676750a2c7fe49a44f5e621d61a4bfc4eef8e627b478471c0cce7dc128b4fc3ad6922129a73c48274681522ff639e82dd75
@@ -38,4 +38,19 @@ static inline VALUE charlock_new_str2(const char *str)
38
38
  #endif
39
39
  }
40
40
 
41
+
42
+ #ifdef __cplusplus
43
+ extern "C"
44
+ {
45
+ #endif
46
+
47
+ extern void Init_charlock_holmes(void);
48
+ extern void _init_charlock_encoding_detector(void);
49
+ extern void _init_charlock_converter(void);
50
+ extern void _init_charlock_transliterator(void);
51
+
52
+ #ifdef __cplusplus
53
+ }
54
+ #endif
55
+
41
56
  #endif
@@ -20,7 +20,7 @@ static VALUE rb_converter_convert(VALUE self, VALUE rb_txt, VALUE rb_src_enc, VA
20
20
  Check_Type(rb_dst_enc, T_STRING);
21
21
 
22
22
  src_txt = RSTRING_PTR(rb_txt);
23
- src_len = RSTRING_LEN(rb_txt);
23
+ src_len = (int32_t)RSTRING_LEN(rb_txt);
24
24
  src_enc = RSTRING_PTR(rb_src_enc);
25
25
  dst_enc = RSTRING_PTR(rb_dst_enc);
26
26
 
@@ -29,7 +29,7 @@ static VALUE rb_converter_convert(VALUE self, VALUE rb_txt, VALUE rb_src_enc, VA
29
29
  if (status != U_BUFFER_OVERFLOW_ERROR) {
30
30
  rb_raise(rb_eArgError, "%s", u_errorName(status));
31
31
  }
32
- out_buf = malloc(out_len);
32
+ out_buf = (char *) malloc(out_len);
33
33
 
34
34
  // now do the actual conversion
35
35
  status = U_ZERO_ERROR;
@@ -50,7 +50,7 @@ static VALUE rb_converter_convert(VALUE self, VALUE rb_txt, VALUE rb_src_enc, VA
50
50
  return rb_out;
51
51
  }
52
52
 
53
- void _init_charlock_converter() {
53
+ void _init_charlock_converter(void) {
54
54
  rb_cConverter = rb_define_class_under(rb_mCharlockHolmes, "Converter", rb_cObject);
55
55
 
56
56
  rb_define_singleton_method(rb_cConverter, "convert", rb_converter_convert, 3);
@@ -8,6 +8,25 @@ typedef struct {
8
8
  UCharsetDetector *csd;
9
9
  } charlock_detector_t;
10
10
 
11
+ static void rb_encdec__free(void *obj)
12
+ {
13
+ charlock_detector_t *detector;
14
+
15
+ detector = (charlock_detector_t *)obj;
16
+
17
+ if (detector->csd)
18
+ ucsdet_close(detector->csd);
19
+
20
+ free(detector);
21
+ }
22
+
23
+ static const rb_data_type_t charlock_detector_type = {
24
+ "Charlock/Detector",
25
+ { 0, rb_encdec__free, 0, },
26
+ 0, 0,
27
+ RUBY_TYPED_FREE_IMMEDIATELY,
28
+ };
29
+
11
30
  static VALUE rb_encdec_buildmatch(const UCharsetMatch *match)
12
31
  {
13
32
  UErrorCode status = U_ZERO_ERROR;
@@ -47,7 +66,7 @@ static VALUE rb_encdec_buildmatch(const UCharsetMatch *match)
47
66
  return rb_match;
48
67
  }
49
68
 
50
- static VALUE rb_encdec_binarymatch() {
69
+ static VALUE rb_encdec_binarymatch(void) {
51
70
  VALUE rb_match;
52
71
 
53
72
  rb_match = rb_hash_new();
@@ -167,7 +186,7 @@ static VALUE rb_encdec_detect(int argc, VALUE *argv, VALUE self)
167
186
  rb_scan_args(argc, argv, "11", &rb_str, &rb_enc_hint);
168
187
 
169
188
  Check_Type(rb_str, T_STRING);
170
- Data_Get_Struct(self, charlock_detector_t, detector);
189
+ TypedData_Get_Struct(self, charlock_detector_t, &charlock_detector_type, detector);
171
190
 
172
191
  // first lets see if this is binary content
173
192
  if (detect_binary_content(self, rb_str)) {
@@ -180,7 +199,7 @@ static VALUE rb_encdec_detect(int argc, VALUE *argv, VALUE self)
180
199
 
181
200
  if (!NIL_P(rb_enc_hint)) {
182
201
  Check_Type(rb_enc_hint, T_STRING);
183
- ucsdet_setDeclaredEncoding(detector->csd, RSTRING_PTR(rb_enc_hint), RSTRING_LEN(rb_enc_hint), &status);
202
+ ucsdet_setDeclaredEncoding(detector->csd, RSTRING_PTR(rb_enc_hint), (int32_t)RSTRING_LEN(rb_enc_hint), &status);
184
203
  }
185
204
 
186
205
  return rb_encdec_buildmatch(ucsdet_detect(detector->csd, &status));
@@ -215,7 +234,7 @@ static VALUE rb_encdec_detect_all(int argc, VALUE *argv, VALUE self)
215
234
  rb_scan_args(argc, argv, "11", &rb_str, &rb_enc_hint);
216
235
 
217
236
  Check_Type(rb_str, T_STRING);
218
- Data_Get_Struct(self, charlock_detector_t, detector);
237
+ TypedData_Get_Struct(self, charlock_detector_t, &charlock_detector_type, detector);
219
238
 
220
239
  rb_ret = rb_ary_new();
221
240
 
@@ -229,7 +248,7 @@ static VALUE rb_encdec_detect_all(int argc, VALUE *argv, VALUE self)
229
248
 
230
249
  if (!NIL_P(rb_enc_hint)) {
231
250
  Check_Type(rb_enc_hint, T_STRING);
232
- ucsdet_setDeclaredEncoding(detector->csd, RSTRING_PTR(rb_enc_hint), RSTRING_LEN(rb_enc_hint), &status);
251
+ ucsdet_setDeclaredEncoding(detector->csd, RSTRING_PTR(rb_enc_hint), (int32_t)RSTRING_LEN(rb_enc_hint), &status);
233
252
  }
234
253
 
235
254
  csm = ucsdet_detectAll(detector->csd, &match_count, &status);
@@ -257,7 +276,7 @@ static VALUE rb_get_strip_tags(VALUE self)
257
276
  UBool val;
258
277
  VALUE rb_val;
259
278
 
260
- Data_Get_Struct(self, charlock_detector_t, detector);
279
+ TypedData_Get_Struct(self, charlock_detector_t, &charlock_detector_type, detector);
261
280
 
262
281
  val = ucsdet_isInputFilterEnabled(detector->csd);
263
282
 
@@ -279,7 +298,7 @@ static VALUE rb_set_strip_tags(VALUE self, VALUE rb_val)
279
298
  charlock_detector_t *detector;
280
299
  UBool val;
281
300
 
282
- Data_Get_Struct(self, charlock_detector_t, detector);
301
+ TypedData_Get_Struct(self, charlock_detector_t, &charlock_detector_type, detector);
283
302
 
284
303
  val = rb_val == Qtrue ? 1 : 0;
285
304
 
@@ -334,26 +353,14 @@ static VALUE rb_get_supported_encodings(VALUE klass)
334
353
  return rb_encoding_list;
335
354
  }
336
355
 
337
- static void rb_encdec__free(void *obj)
338
- {
339
- charlock_detector_t *detector;
340
-
341
- detector = (charlock_detector_t *)obj;
342
-
343
- if (detector->csd)
344
- ucsdet_close(detector->csd);
345
-
346
- free(detector);
347
- }
348
-
349
356
  static VALUE rb_encdec__alloc(VALUE klass)
350
357
  {
351
358
  charlock_detector_t *detector;
352
359
  UErrorCode status = U_ZERO_ERROR;
353
360
  VALUE obj;
354
361
 
355
- detector = calloc(1, sizeof(charlock_detector_t));
356
- obj = Data_Wrap_Struct(klass, NULL, rb_encdec__free, (void *)detector);
362
+ detector = (charlock_detector_t *) calloc(1, sizeof(charlock_detector_t));
363
+ obj = TypedData_Wrap_Struct(klass, &charlock_detector_type, (void *)detector);
357
364
 
358
365
  detector->csd = ucsdet_open(&status);
359
366
  if (U_FAILURE(status)) {
@@ -363,7 +370,7 @@ static VALUE rb_encdec__alloc(VALUE klass)
363
370
  return obj;
364
371
  }
365
372
 
366
- void _init_charlock_encoding_detector()
373
+ void _init_charlock_encoding_detector(void)
367
374
  {
368
375
  rb_cEncodingDetector = rb_define_class_under(rb_mCharlockHolmes, "EncodingDetector", rb_cObject);
369
376
  rb_define_alloc_func(rb_cEncodingDetector, rb_encdec__alloc);
@@ -1,15 +1,11 @@
1
1
  #include "common.h"
2
2
 
3
- extern void _init_charlock_encoding_detector();
4
- extern void _init_charlock_converter();
5
- extern void _init_charlock_transliterator();
6
-
7
3
  VALUE rb_mCharlockHolmes;
8
4
 
9
- void Init_charlock_holmes() {
5
+ void Init_charlock_holmes(void) {
10
6
  rb_mCharlockHolmes = rb_define_module("CharlockHolmes");
11
7
 
12
8
  _init_charlock_encoding_detector();
13
9
  _init_charlock_converter();
14
10
  _init_charlock_transliterator();
15
- }
11
+ }
@@ -1,14 +1,5 @@
1
1
  require 'mkmf'
2
2
 
3
- CWD = File.expand_path(File.dirname(__FILE__))
4
- def sys(cmd)
5
- puts " -- #{cmd}"
6
- unless ret = xsystem(cmd)
7
- raise "#{cmd} failed, please report issue on https://github.com/brianmario/charlock_holmes"
8
- end
9
- ret
10
- end
11
-
12
3
  if `which make`.strip.empty?
13
4
  STDERR.puts "\n\n"
14
5
  STDERR.puts "***************************************************************************************"
@@ -21,25 +12,28 @@ end
21
12
  # ICU dependency
22
13
  #
23
14
 
24
- dir_config 'icu'
25
-
26
- rubyopt = ENV.delete("RUBYOPT")
27
-
28
- icu4c = "/usr"
29
- # detect homebrew installs
30
- if !have_library 'icui18n'
31
- base = if !`which brew`.empty?
32
- `brew --prefix`.strip
33
- elsif File.exists?("/usr/local/Cellar/icu4c")
34
- '/usr/local/Cellar'
35
- end
15
+ ldflags = cppflags = nil
36
16
 
37
- if base and icu4c = Dir[File.join(base, 'Cellar/icu4c/*')].sort.last
38
- $INCFLAGS << " -I#{icu4c}/include "
39
- $LDFLAGS << " -L#{icu4c}/lib "
17
+ if RbConfig::CONFIG["host_os"] =~ /darwin/
18
+ begin
19
+ brew_prefix = `brew --prefix icu4c`.chomp
20
+ ldflags = "#{brew_prefix}/lib"
21
+ cppflags = "#{brew_prefix}/include"
22
+ pkg_conf = "#{brew_prefix}/lib/pkgconfig"
23
+ # pkg_config should be less error prone than parsing compiler
24
+ # commandline options, but we need to set default ldflags and cpp flags
25
+ # in case the user doesn't have pkg-config installed
26
+ ENV['PKG_CONFIG_PATH'] ||= pkg_conf
27
+ rescue
40
28
  end
41
29
  end
42
30
 
31
+ dir_config 'icu', cppflags, ldflags
32
+
33
+ pkg_config("icu-i18n")
34
+ pkg_config("icu-io")
35
+ pkg_config("icu-uc")
36
+
43
37
  unless have_library 'icui18n' and have_header 'unicode/ucnv.h'
44
38
  STDERR.puts "\n\n"
45
39
  STDERR.puts "***************************************************************************************"
@@ -52,15 +46,96 @@ have_library 'z' or abort 'libz missing'
52
46
  have_library 'icuuc' or abort 'libicuuc missing'
53
47
  have_library 'icudata' or abort 'libicudata missing'
54
48
 
55
- # icu4c might be built in C++11 mode, but it also might not have been
56
- icuconfig = `which icu-config`.chomp
57
- icuconfig = "#{icu4c}/bin/icu-config" if icuconfig.empty?
58
- if File.exist?(icuconfig) && `#{icuconfig} --cxxflags`.include?("c++11")
59
- $CXXFLAGS << ' -std=c++11'
60
- end
61
-
62
49
  $CFLAGS << ' -Wall -funroll-loops'
63
50
  $CFLAGS << ' -Wextra -O0 -ggdb3' if ENV['DEBUG']
64
51
 
65
- ENV['RUBYOPT'] = rubyopt
52
+ minimal_program = <<~SRC
53
+ #include <unicode/translit.h>
54
+ int main() { return 0; }
55
+ SRC
56
+
57
+ # Pass -x c++ to force gcc to compile the test program
58
+ # as C++ (as it will end in .c by default).
59
+ compile_options = +"-x c++"
60
+
61
+ icu_requires_version_flag = checking_for("icu that requires explicit C++ version flag") do
62
+ !try_compile(minimal_program, compile_options)
63
+ end
64
+
65
+ if icu_requires_version_flag
66
+ abort "Cannot compile icu with your compiler: recent versions require C++17 support." unless %w[c++20 c++17 c++11 c++0x].any? do |std|
67
+ checking_for("icu that compiles with #{std} standard") do
68
+ flags = compile_options + " -std=#{std}"
69
+ if try_compile(minimal_program, flags)
70
+ $CPPFLAGS << flags
71
+
72
+ true
73
+ end
74
+ end
75
+ end
76
+ end
77
+
78
+ def libflag_to_filename(ldflag)
79
+ case ldflag
80
+ when /\A-l(.+)/
81
+ "lib#{Regexp.last_match(1)}.#{$LIBEXT}"
82
+ end
83
+ end
84
+
85
+ def resolve_static_library(libflag, dirs)
86
+ filename = libflag_to_filename(libflag)
87
+
88
+ dir = dirs.find { |path| File.exist?(File.join(path, filename)) }
89
+
90
+ raise "Unable to find #{filename} in #{dirs}" unless dir
91
+
92
+ File.join(dir, filename)
93
+ end
94
+
95
+ def substitute_static_libs(packages)
96
+ packages.each do |pkg|
97
+ unless pkg_config(pkg)
98
+ message = <<~MSG
99
+ Unable to run `pkg-config #{pkg}`.
100
+
101
+ Check that PKG_CONFIG_PATH includes #{pkg}.pc (or unset it if it's already set).
102
+
103
+ Current environment:
104
+ PKG_CONFIG_PATH=#{ENV['PKG_CONFIG_PATH']}
105
+ MSG
106
+
107
+ raise message
108
+ end
109
+ end
110
+
111
+ # First, find all the -l<lib> flags added by pkg-config. We want to drop
112
+ # these dynamically linked libraries and substitute them with the static libraries.
113
+ libflags = packages.map do |pkg|
114
+ pkg_config(pkg, 'libs-only-l')&.strip&.split(' ')
115
+ end.flatten.uniq
116
+
117
+ # To find where the static libraries live, we need to search the
118
+ # library paths given by the -L flag from pkg-config.
119
+ lib_paths = packages.map do |pkg|
120
+ include_path = pkg_config(pkg, 'libs-only-L')&.strip
121
+ include_path&.split(' ')&.map { |lib| lib.gsub(/^-L/, '') }
122
+ end.flatten.uniq
123
+
124
+ # Drop the -l<lib> flags and add in the static libraries.
125
+ new_libs = $libs.shellsplit
126
+ new_libs.reject! { |arg| libflags.include?(arg) }
127
+ libflags.each { |flag| new_libs << resolve_static_library(flag, lib_paths) }
128
+ $libs = new_libs.uniq.shelljoin
129
+ end
130
+
131
+ static_p = enable_config('static', false)
132
+ message "Static linking is #{static_p ? 'enabled' : 'disabled'}.\n"
133
+
134
+ if static_p
135
+ $CXXFLAGS << ' -fPIC'
136
+ ENV['PKG_CONFIG_ALLOW_SYSTEM_LIBS'] = '1'
137
+
138
+ substitute_static_libs(%w[icu-i18n icu-io icu-uc])
139
+ end
140
+
66
141
  create_makefile 'charlock_holmes/charlock_holmes'
@@ -36,7 +36,7 @@ static VALUE rb_cTransliterator;
36
36
 
37
37
  static VALUE rb_transliterator_id_list(VALUE self) {
38
38
  UErrorCode status = U_ZERO_ERROR;
39
- StringEnumeration *id_list;
39
+ icu::StringEnumeration *id_list;
40
40
  int32_t id_list_size;
41
41
  const char *curr_id;
42
42
  int32_t curr_id_len;
@@ -44,7 +44,7 @@ static VALUE rb_transliterator_id_list(VALUE self) {
44
44
  VALUE rb_curr_id;
45
45
 
46
46
  id_list_size = 0;
47
- id_list = Transliterator::getAvailableIDs(status);
47
+ id_list = icu::Transliterator::getAvailableIDs(status);
48
48
  if(!U_SUCCESS(status)) {
49
49
  rb_raise(rb_eArgError, "%s", u_errorName(status));
50
50
  }
@@ -78,12 +78,12 @@ static VALUE rb_transliterator_id_list(VALUE self) {
78
78
  static VALUE rb_transliterator_transliterate(VALUE self, VALUE rb_txt, VALUE rb_id) {
79
79
  UErrorCode status = U_ZERO_ERROR;
80
80
  UParseError p_error;
81
- Transliterator *trans;
81
+ icu::Transliterator *trans;
82
82
  const char *txt;
83
83
  size_t txt_len;
84
84
  const char *id;
85
85
  size_t id_len;
86
- UnicodeString *u_txt;
86
+ icu::UnicodeString *u_txt;
87
87
  std::string result;
88
88
  VALUE rb_out;
89
89
 
@@ -98,14 +98,14 @@ static VALUE rb_transliterator_transliterate(VALUE self, VALUE rb_txt, VALUE rb_
98
98
  id = RSTRING_PTR(rb_id);
99
99
  id_len = RSTRING_LEN(rb_id);
100
100
 
101
- trans = Transliterator::createInstance(UnicodeString(id, id_len), UTRANS_FORWARD, p_error, status);
101
+ trans = icu::Transliterator::createInstance(icu::UnicodeString(id, id_len), UTRANS_FORWARD, p_error, status);
102
102
  if(!U_SUCCESS(status)) {
103
103
  rb_raise(rb_eArgError, "%s", u_errorName(status));
104
104
  }
105
105
 
106
- u_txt = new UnicodeString(txt, txt_len);
106
+ u_txt = new icu::UnicodeString(txt, txt_len);
107
107
  trans->transliterate(*u_txt);
108
- StringByteSink<std::string> sink(&result);
108
+ icu::StringByteSink<std::string> sink(&result);
109
109
  u_txt->toUTF8(sink);
110
110
 
111
111
  delete u_txt;
@@ -116,7 +116,7 @@ static VALUE rb_transliterator_transliterate(VALUE self, VALUE rb_txt, VALUE rb_
116
116
  return rb_out;
117
117
  }
118
118
 
119
- void _init_charlock_transliterator() {
119
+ void _init_charlock_transliterator(void) {
120
120
  #ifdef HAVE_RUBY_ENCODING_H
121
121
  rb_eEncodingCompatibilityError = rb_const_get(rb_cEncoding, rb_intern("CompatibilityError"));
122
122
  #endif
@@ -1,3 +1,3 @@
1
1
  module CharlockHolmes
2
- VERSION = "0.7.5"
2
+ VERSION = "0.7.9"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: charlock_holmes
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.5
4
+ version: 0.7.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Lopez
@@ -9,50 +9,50 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-08-14 00:00:00.000000000 Z
12
+ date: 2024-07-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake-compiler
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
- - - ">="
18
+ - - "~>"
19
19
  - !ruby/object:Gem::Version
20
- version: 0.7.5
20
+ version: '1.0'
21
21
  type: :development
22
22
  prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
25
- - - ">="
25
+ - - "~>"
26
26
  - !ruby/object:Gem::Version
27
- version: 0.7.5
27
+ version: '1.0'
28
28
  - !ruby/object:Gem::Dependency
29
29
  name: minitest
30
30
  requirement: !ruby/object:Gem::Requirement
31
31
  requirements:
32
- - - ">="
32
+ - - "~>"
33
33
  - !ruby/object:Gem::Version
34
- version: '0'
34
+ version: '5.11'
35
35
  type: :development
36
36
  prerelease: false
37
37
  version_requirements: !ruby/object:Gem::Requirement
38
38
  requirements:
39
- - - ">="
39
+ - - "~>"
40
40
  - !ruby/object:Gem::Version
41
- version: '0'
41
+ version: '5.11'
42
42
  - !ruby/object:Gem::Dependency
43
43
  name: chardet
44
44
  requirement: !ruby/object:Gem::Requirement
45
45
  requirements:
46
- - - ">="
46
+ - - "~>"
47
47
  - !ruby/object:Gem::Version
48
- version: '0'
48
+ version: '0.9'
49
49
  type: :development
50
50
  prerelease: false
51
51
  version_requirements: !ruby/object:Gem::Requirement
52
52
  requirements:
53
- - - ">="
53
+ - - "~>"
54
54
  - !ruby/object:Gem::Version
55
- version: '0'
55
+ version: '0.9'
56
56
  description: charlock_holmes provides binary and text detection as well as text transcoding
57
57
  using libicu
58
58
  email: seniorlopez@gmail.com
@@ -91,8 +91,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
91
91
  - !ruby/object:Gem::Version
92
92
  version: '0'
93
93
  requirements: []
94
- rubyforge_project:
95
- rubygems_version: 2.6.11
94
+ rubygems_version: 3.0.3.1
96
95
  signing_key:
97
96
  specification_version: 4
98
97
  summary: Character encoding detection, brought to you by ICU