charlock_holmes 0.7.8 → 0.7.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 63323878cbd6ae16be524e46cd02c593003b954a26f8b381595fdd8e4355d651
4
- data.tar.gz: 7f8a02a33b2d9c3d0e3800ff2728a9f95b7a2e07692b2ca01b44953ae9cb54ed
3
+ metadata.gz: 11393fb92c8ecb1d18d4741c9915c0293fde4381ccfeae3e929c84970b6779f4
4
+ data.tar.gz: addcb3bdf4fc04e53f7a287c483d6510205b317d980f3a8b21a26940dc5a198d
5
5
  SHA512:
6
- metadata.gz: d1e8235025a81e5dd289e52bcf1696c86ceaa7f39e6fa627fb2625a24a613e86868247bf2caedd5e837b1aea1db73f2259031093c668710cdd7498fea9b93b65
7
- data.tar.gz: 7e60283edcbd9795185ea95717407361e02ec7b98114759a32924217231439edb45048890e210d2bda7a5408559ab05d65af9df13569be4a3699858aa2b1fcf8
6
+ metadata.gz: 1729db533d5d41aa4708888763270e3f346552a01a8f29371a010253db3bb0cc4f58a7ed628b735829e7f5d9763d35de20a17855dc9c393d4244d1acf30f0dbe
7
+ data.tar.gz: 5033611bfe43c98fd8d88a7141b94676750a2c7fe49a44f5e621d61a4bfc4eef8e627b478471c0cce7dc128b4fc3ad6922129a73c48274681522ff639e82dd75
@@ -38,4 +38,19 @@ static inline VALUE charlock_new_str2(const char *str)
38
38
  #endif
39
39
  }
40
40
 
41
+
42
+ #ifdef __cplusplus
43
+ extern "C"
44
+ {
45
+ #endif
46
+
47
+ extern void Init_charlock_holmes(void);
48
+ extern void _init_charlock_encoding_detector(void);
49
+ extern void _init_charlock_converter(void);
50
+ extern void _init_charlock_transliterator(void);
51
+
52
+ #ifdef __cplusplus
53
+ }
54
+ #endif
55
+
41
56
  #endif
@@ -20,7 +20,7 @@ static VALUE rb_converter_convert(VALUE self, VALUE rb_txt, VALUE rb_src_enc, VA
20
20
  Check_Type(rb_dst_enc, T_STRING);
21
21
 
22
22
  src_txt = RSTRING_PTR(rb_txt);
23
- src_len = RSTRING_LEN(rb_txt);
23
+ src_len = (int32_t)RSTRING_LEN(rb_txt);
24
24
  src_enc = RSTRING_PTR(rb_src_enc);
25
25
  dst_enc = RSTRING_PTR(rb_dst_enc);
26
26
 
@@ -29,7 +29,7 @@ static VALUE rb_converter_convert(VALUE self, VALUE rb_txt, VALUE rb_src_enc, VA
29
29
  if (status != U_BUFFER_OVERFLOW_ERROR) {
30
30
  rb_raise(rb_eArgError, "%s", u_errorName(status));
31
31
  }
32
- out_buf = malloc(out_len);
32
+ out_buf = (char *) malloc(out_len);
33
33
 
34
34
  // now do the actual conversion
35
35
  status = U_ZERO_ERROR;
@@ -50,7 +50,7 @@ static VALUE rb_converter_convert(VALUE self, VALUE rb_txt, VALUE rb_src_enc, VA
50
50
  return rb_out;
51
51
  }
52
52
 
53
- void _init_charlock_converter() {
53
+ void _init_charlock_converter(void) {
54
54
  rb_cConverter = rb_define_class_under(rb_mCharlockHolmes, "Converter", rb_cObject);
55
55
 
56
56
  rb_define_singleton_method(rb_cConverter, "convert", rb_converter_convert, 3);
@@ -8,6 +8,25 @@ typedef struct {
8
8
  UCharsetDetector *csd;
9
9
  } charlock_detector_t;
10
10
 
11
+ static void rb_encdec__free(void *obj)
12
+ {
13
+ charlock_detector_t *detector;
14
+
15
+ detector = (charlock_detector_t *)obj;
16
+
17
+ if (detector->csd)
18
+ ucsdet_close(detector->csd);
19
+
20
+ free(detector);
21
+ }
22
+
23
+ static const rb_data_type_t charlock_detector_type = {
24
+ "Charlock/Detector",
25
+ { 0, rb_encdec__free, 0, },
26
+ 0, 0,
27
+ RUBY_TYPED_FREE_IMMEDIATELY,
28
+ };
29
+
11
30
  static VALUE rb_encdec_buildmatch(const UCharsetMatch *match)
12
31
  {
13
32
  UErrorCode status = U_ZERO_ERROR;
@@ -47,7 +66,7 @@ static VALUE rb_encdec_buildmatch(const UCharsetMatch *match)
47
66
  return rb_match;
48
67
  }
49
68
 
50
- static VALUE rb_encdec_binarymatch() {
69
+ static VALUE rb_encdec_binarymatch(void) {
51
70
  VALUE rb_match;
52
71
 
53
72
  rb_match = rb_hash_new();
@@ -167,7 +186,7 @@ static VALUE rb_encdec_detect(int argc, VALUE *argv, VALUE self)
167
186
  rb_scan_args(argc, argv, "11", &rb_str, &rb_enc_hint);
168
187
 
169
188
  Check_Type(rb_str, T_STRING);
170
- Data_Get_Struct(self, charlock_detector_t, detector);
189
+ TypedData_Get_Struct(self, charlock_detector_t, &charlock_detector_type, detector);
171
190
 
172
191
  // first lets see if this is binary content
173
192
  if (detect_binary_content(self, rb_str)) {
@@ -180,7 +199,7 @@ static VALUE rb_encdec_detect(int argc, VALUE *argv, VALUE self)
180
199
 
181
200
  if (!NIL_P(rb_enc_hint)) {
182
201
  Check_Type(rb_enc_hint, T_STRING);
183
- ucsdet_setDeclaredEncoding(detector->csd, RSTRING_PTR(rb_enc_hint), RSTRING_LEN(rb_enc_hint), &status);
202
+ ucsdet_setDeclaredEncoding(detector->csd, RSTRING_PTR(rb_enc_hint), (int32_t)RSTRING_LEN(rb_enc_hint), &status);
184
203
  }
185
204
 
186
205
  return rb_encdec_buildmatch(ucsdet_detect(detector->csd, &status));
@@ -215,7 +234,7 @@ static VALUE rb_encdec_detect_all(int argc, VALUE *argv, VALUE self)
215
234
  rb_scan_args(argc, argv, "11", &rb_str, &rb_enc_hint);
216
235
 
217
236
  Check_Type(rb_str, T_STRING);
218
- Data_Get_Struct(self, charlock_detector_t, detector);
237
+ TypedData_Get_Struct(self, charlock_detector_t, &charlock_detector_type, detector);
219
238
 
220
239
  rb_ret = rb_ary_new();
221
240
 
@@ -229,7 +248,7 @@ static VALUE rb_encdec_detect_all(int argc, VALUE *argv, VALUE self)
229
248
 
230
249
  if (!NIL_P(rb_enc_hint)) {
231
250
  Check_Type(rb_enc_hint, T_STRING);
232
- ucsdet_setDeclaredEncoding(detector->csd, RSTRING_PTR(rb_enc_hint), RSTRING_LEN(rb_enc_hint), &status);
251
+ ucsdet_setDeclaredEncoding(detector->csd, RSTRING_PTR(rb_enc_hint), (int32_t)RSTRING_LEN(rb_enc_hint), &status);
233
252
  }
234
253
 
235
254
  csm = ucsdet_detectAll(detector->csd, &match_count, &status);
@@ -257,7 +276,7 @@ static VALUE rb_get_strip_tags(VALUE self)
257
276
  UBool val;
258
277
  VALUE rb_val;
259
278
 
260
- Data_Get_Struct(self, charlock_detector_t, detector);
279
+ TypedData_Get_Struct(self, charlock_detector_t, &charlock_detector_type, detector);
261
280
 
262
281
  val = ucsdet_isInputFilterEnabled(detector->csd);
263
282
 
@@ -279,7 +298,7 @@ static VALUE rb_set_strip_tags(VALUE self, VALUE rb_val)
279
298
  charlock_detector_t *detector;
280
299
  UBool val;
281
300
 
282
- Data_Get_Struct(self, charlock_detector_t, detector);
301
+ TypedData_Get_Struct(self, charlock_detector_t, &charlock_detector_type, detector);
283
302
 
284
303
  val = rb_val == Qtrue ? 1 : 0;
285
304
 
@@ -334,26 +353,14 @@ static VALUE rb_get_supported_encodings(VALUE klass)
334
353
  return rb_encoding_list;
335
354
  }
336
355
 
337
- static void rb_encdec__free(void *obj)
338
- {
339
- charlock_detector_t *detector;
340
-
341
- detector = (charlock_detector_t *)obj;
342
-
343
- if (detector->csd)
344
- ucsdet_close(detector->csd);
345
-
346
- free(detector);
347
- }
348
-
349
356
  static VALUE rb_encdec__alloc(VALUE klass)
350
357
  {
351
358
  charlock_detector_t *detector;
352
359
  UErrorCode status = U_ZERO_ERROR;
353
360
  VALUE obj;
354
361
 
355
- detector = calloc(1, sizeof(charlock_detector_t));
356
- obj = Data_Wrap_Struct(klass, NULL, rb_encdec__free, (void *)detector);
362
+ detector = (charlock_detector_t *) calloc(1, sizeof(charlock_detector_t));
363
+ obj = TypedData_Wrap_Struct(klass, &charlock_detector_type, (void *)detector);
357
364
 
358
365
  detector->csd = ucsdet_open(&status);
359
366
  if (U_FAILURE(status)) {
@@ -363,7 +370,7 @@ static VALUE rb_encdec__alloc(VALUE klass)
363
370
  return obj;
364
371
  }
365
372
 
366
- void _init_charlock_encoding_detector()
373
+ void _init_charlock_encoding_detector(void)
367
374
  {
368
375
  rb_cEncodingDetector = rb_define_class_under(rb_mCharlockHolmes, "EncodingDetector", rb_cObject);
369
376
  rb_define_alloc_func(rb_cEncodingDetector, rb_encdec__alloc);
@@ -1,15 +1,11 @@
1
1
  #include "common.h"
2
2
 
3
- extern void _init_charlock_encoding_detector();
4
- extern void _init_charlock_converter();
5
- extern void _init_charlock_transliterator();
6
-
7
3
  VALUE rb_mCharlockHolmes;
8
4
 
9
- void Init_charlock_holmes() {
5
+ void Init_charlock_holmes(void) {
10
6
  rb_mCharlockHolmes = rb_define_module("CharlockHolmes");
11
7
 
12
8
  _init_charlock_encoding_detector();
13
9
  _init_charlock_converter();
14
10
  _init_charlock_transliterator();
15
- }
11
+ }
@@ -49,4 +49,93 @@ have_library 'icudata' or abort 'libicudata missing'
49
49
  $CFLAGS << ' -Wall -funroll-loops'
50
50
  $CFLAGS << ' -Wextra -O0 -ggdb3' if ENV['DEBUG']
51
51
 
52
+ minimal_program = <<~SRC
53
+ #include <unicode/translit.h>
54
+ int main() { return 0; }
55
+ SRC
56
+
57
+ # Pass -x c++ to force gcc to compile the test program
58
+ # as C++ (as it will end in .c by default).
59
+ compile_options = +"-x c++"
60
+
61
+ icu_requires_version_flag = checking_for("icu that requires explicit C++ version flag") do
62
+ !try_compile(minimal_program, compile_options)
63
+ end
64
+
65
+ if icu_requires_version_flag
66
+ abort "Cannot compile icu with your compiler: recent versions require C++17 support." unless %w[c++20 c++17 c++11 c++0x].any? do |std|
67
+ checking_for("icu that compiles with #{std} standard") do
68
+ flags = compile_options + " -std=#{std}"
69
+ if try_compile(minimal_program, flags)
70
+ $CPPFLAGS << flags
71
+
72
+ true
73
+ end
74
+ end
75
+ end
76
+ end
77
+
78
+ def libflag_to_filename(ldflag)
79
+ case ldflag
80
+ when /\A-l(.+)/
81
+ "lib#{Regexp.last_match(1)}.#{$LIBEXT}"
82
+ end
83
+ end
84
+
85
+ def resolve_static_library(libflag, dirs)
86
+ filename = libflag_to_filename(libflag)
87
+
88
+ dir = dirs.find { |path| File.exist?(File.join(path, filename)) }
89
+
90
+ raise "Unable to find #{filename} in #{dirs}" unless dir
91
+
92
+ File.join(dir, filename)
93
+ end
94
+
95
+ def substitute_static_libs(packages)
96
+ packages.each do |pkg|
97
+ unless pkg_config(pkg)
98
+ message = <<~MSG
99
+ Unable to run `pkg-config #{pkg}`.
100
+
101
+ Check that PKG_CONFIG_PATH includes #{pkg}.pc (or unset it if it's already set).
102
+
103
+ Current environment:
104
+ PKG_CONFIG_PATH=#{ENV['PKG_CONFIG_PATH']}
105
+ MSG
106
+
107
+ raise message
108
+ end
109
+ end
110
+
111
+ # First, find all the -l<lib> flags added by pkg-config. We want to drop
112
+ # these dynamically linked libraries and substitute them with the static libraries.
113
+ libflags = packages.map do |pkg|
114
+ pkg_config(pkg, 'libs-only-l')&.strip&.split(' ')
115
+ end.flatten.uniq
116
+
117
+ # To find where the static libraries live, we need to search the
118
+ # library paths given by the -L flag from pkg-config.
119
+ lib_paths = packages.map do |pkg|
120
+ include_path = pkg_config(pkg, 'libs-only-L')&.strip
121
+ include_path&.split(' ')&.map { |lib| lib.gsub(/^-L/, '') }
122
+ end.flatten.uniq
123
+
124
+ # Drop the -l<lib> flags and add in the static libraries.
125
+ new_libs = $libs.shellsplit
126
+ new_libs.reject! { |arg| libflags.include?(arg) }
127
+ libflags.each { |flag| new_libs << resolve_static_library(flag, lib_paths) }
128
+ $libs = new_libs.uniq.shelljoin
129
+ end
130
+
131
+ static_p = enable_config('static', false)
132
+ message "Static linking is #{static_p ? 'enabled' : 'disabled'}.\n"
133
+
134
+ if static_p
135
+ $CXXFLAGS << ' -fPIC'
136
+ ENV['PKG_CONFIG_ALLOW_SYSTEM_LIBS'] = '1'
137
+
138
+ substitute_static_libs(%w[icu-i18n icu-io icu-uc])
139
+ end
140
+
52
141
  create_makefile 'charlock_holmes/charlock_holmes'
@@ -116,7 +116,7 @@ static VALUE rb_transliterator_transliterate(VALUE self, VALUE rb_txt, VALUE rb_
116
116
  return rb_out;
117
117
  }
118
118
 
119
- void _init_charlock_transliterator() {
119
+ void _init_charlock_transliterator(void) {
120
120
  #ifdef HAVE_RUBY_ENCODING_H
121
121
  rb_eEncodingCompatibilityError = rb_const_get(rb_cEncoding, rb_intern("CompatibilityError"));
122
122
  #endif
@@ -1,3 +1,3 @@
1
1
  module CharlockHolmes
2
- VERSION = "0.7.8"
2
+ VERSION = "0.7.9"
3
3
  end
metadata CHANGED
@@ -1,14 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: charlock_holmes
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.8
4
+ version: 0.7.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Lopez
8
8
  - Vicent Martí
9
+ autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2024-06-28 00:00:00.000000000 Z
12
+ date: 2024-07-10 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: rake-compiler
@@ -74,6 +75,7 @@ homepage: https://github.com/brianmario/charlock_holmes
74
75
  licenses:
75
76
  - MIT
76
77
  metadata: {}
78
+ post_install_message:
77
79
  rdoc_options:
78
80
  - "--charset=UTF-8"
79
81
  require_paths:
@@ -89,7 +91,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
89
91
  - !ruby/object:Gem::Version
90
92
  version: '0'
91
93
  requirements: []
92
- rubygems_version: 3.6.0.dev
94
+ rubygems_version: 3.0.3.1
95
+ signing_key:
93
96
  specification_version: 4
94
97
  summary: Character encoding detection, brought to you by ICU
95
98
  test_files: []