charlock_holmes_heroku 0.6.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +3 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +30 -0
- data/MIT-LICENSE +20 -0
- data/README.md +111 -0
- data/Rakefile +29 -0
- data/benchmark/detection.rb +39 -0
- data/benchmark/test.txt +693 -0
- data/charlock_holmes.gemspec +25 -0
- data/ext/charlock_holmes/common.h +41 -0
- data/ext/charlock_holmes/converter.c +53 -0
- data/ext/charlock_holmes/encoding_detector.c +295 -0
- data/ext/charlock_holmes/ext.c +13 -0
- data/ext/charlock_holmes/extconf.rb +86 -0
- data/ext/charlock_holmes/src/icu4c-49_1_2-src.tgz +0 -0
- data/ext/charlock_holmes/src/icu4c-52_1-src.tgz +0 -0
- data/lib/charlock_holmes.rb +6 -0
- data/lib/charlock_holmes/encoding_detector.rb +33 -0
- data/lib/charlock_holmes/string.rb +34 -0
- data/lib/charlock_holmes/version.rb +3 -0
- data/spec/converter_spec.rb +29 -0
- data/spec/encoding_detector_spec.rb +122 -0
- data/spec/fixtures/AnsiGraph.psm1 +0 -0
- data/spec/fixtures/TwigExtensionsDate.es.yml +8 -0
- data/spec/fixtures/cl-messagepack.lisp +264 -0
- data/spec/fixtures/core.rkt +254 -0
- data/spec/fixtures/hello_world +0 -0
- data/spec/fixtures/laholator.py +131 -0
- data/spec/fixtures/repl2.cljs +109 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/string_method_spec.rb +52 -0
- metadata +133 -0
@@ -0,0 +1,25 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require './lib/charlock_holmes/version' unless defined? CharlockHolmes::VERSION
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = %q{charlock_holmes_heroku}
|
7
|
+
s.version = CharlockHolmes::VERSION
|
8
|
+
s.authors = ["Brian Lopez", "Vicent Martí"]
|
9
|
+
s.date = Time.now.utc.strftime("%Y-%m-%d")
|
10
|
+
s.email = %q{seniorlopez@gmail.com}
|
11
|
+
s.extensions = ["ext/charlock_holmes/extconf.rb"]
|
12
|
+
s.files = `git ls-files`.split("\n")
|
13
|
+
s.homepage = %q{http://github.com/brianmario/charlock_holmes}
|
14
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
15
|
+
s.require_paths = ["lib"]
|
16
|
+
s.rubygems_version = %q{1.4.2}
|
17
|
+
s.summary = %q{Character encoding detection, brought to you by ICU}
|
18
|
+
s.test_files = `git ls-files spec`.split("\n")
|
19
|
+
|
20
|
+
# tests
|
21
|
+
s.add_development_dependency 'rake-compiler', ">= 0.7.5"
|
22
|
+
s.add_development_dependency 'rspec', ">= 2.0.0"
|
23
|
+
# benchmarks
|
24
|
+
s.add_development_dependency 'chardet'
|
25
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#ifndef CHARLOCK_COMMON_H
|
2
|
+
#define CHARLOCK_COMMON_H
|
3
|
+
|
4
|
+
// tell rbx not to use it's caching compat layer
|
5
|
+
// by doing this we're making a promize to RBX that
|
6
|
+
// we'll never modify the pointers we get back from RSTRING_PTR
|
7
|
+
#define RSTRING_NOT_MODIFIED
|
8
|
+
|
9
|
+
#include <ruby.h>
|
10
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
11
|
+
#include <ruby/encoding.h>
|
12
|
+
#endif
|
13
|
+
|
14
|
+
static VALUE charlock_new_enc_str(const char *str, size_t len, void *encoding)
|
15
|
+
{
|
16
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
17
|
+
return rb_external_str_new_with_enc(str, len, (rb_encoding *)encoding);
|
18
|
+
#else
|
19
|
+
return rb_str_new(str, len);
|
20
|
+
#endif
|
21
|
+
}
|
22
|
+
|
23
|
+
static VALUE charlock_new_str(const char *str, size_t len)
|
24
|
+
{
|
25
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
26
|
+
return rb_external_str_new_with_enc(str, len, rb_utf8_encoding());
|
27
|
+
#else
|
28
|
+
return rb_str_new(str, len);
|
29
|
+
#endif
|
30
|
+
}
|
31
|
+
|
32
|
+
static VALUE charlock_new_str2(const char *str)
|
33
|
+
{
|
34
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
35
|
+
return rb_external_str_new_with_enc(str, strlen(str), rb_utf8_encoding());
|
36
|
+
#else
|
37
|
+
return rb_str_new2(str);
|
38
|
+
#endif
|
39
|
+
}
|
40
|
+
|
41
|
+
#endif
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#include "unicode/ucnv.h"
|
2
|
+
#include "common.h"
|
3
|
+
|
4
|
+
extern VALUE rb_mCharlockHolmes;
|
5
|
+
static VALUE rb_cConverter;
|
6
|
+
|
7
|
+
static VALUE rb_converter_convert(VALUE self, VALUE rb_txt, VALUE rb_src_enc, VALUE rb_dst_enc) {
|
8
|
+
VALUE rb_out;
|
9
|
+
const char *src_enc;
|
10
|
+
const char *dst_enc;
|
11
|
+
const char *src_txt;
|
12
|
+
char *out_buf;
|
13
|
+
void *rb_enc = NULL;
|
14
|
+
int32_t src_len;
|
15
|
+
int32_t out_len;
|
16
|
+
UErrorCode status = U_ZERO_ERROR;
|
17
|
+
|
18
|
+
src_txt = RSTRING_PTR(rb_txt);
|
19
|
+
src_len = RSTRING_LEN(rb_txt);
|
20
|
+
src_enc = RSTRING_PTR(rb_src_enc);
|
21
|
+
dst_enc = RSTRING_PTR(rb_dst_enc);
|
22
|
+
|
23
|
+
// first determin the size of the output buffer
|
24
|
+
out_len = ucnv_convert(dst_enc, src_enc, NULL, 0, src_txt, src_len, &status);
|
25
|
+
if (status != U_BUFFER_OVERFLOW_ERROR) {
|
26
|
+
rb_raise(rb_eArgError, "%s", u_errorName(status));
|
27
|
+
}
|
28
|
+
out_buf = malloc(out_len);
|
29
|
+
|
30
|
+
// now do the actual conversion
|
31
|
+
status = U_ZERO_ERROR;
|
32
|
+
out_len = ucnv_convert(dst_enc, src_enc, out_buf, out_len, src_txt, src_len, &status);
|
33
|
+
if (U_FAILURE(status)) {
|
34
|
+
free(out_buf);
|
35
|
+
rb_raise(rb_eArgError, "%s", u_errorName(status));
|
36
|
+
}
|
37
|
+
|
38
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
39
|
+
rb_enc = (void *)rb_enc_find(dst_enc);
|
40
|
+
#endif
|
41
|
+
|
42
|
+
rb_out = charlock_new_enc_str(out_buf, out_len, rb_enc);
|
43
|
+
|
44
|
+
free(out_buf);
|
45
|
+
|
46
|
+
return rb_out;
|
47
|
+
}
|
48
|
+
|
49
|
+
void _init_charlock_converter() {
|
50
|
+
rb_cConverter = rb_define_class_under(rb_mCharlockHolmes, "Converter", rb_cObject);
|
51
|
+
|
52
|
+
rb_define_singleton_method(rb_cConverter, "convert", rb_converter_convert, 3);
|
53
|
+
}
|
@@ -0,0 +1,295 @@
|
|
1
|
+
#include "unicode/ucsdet.h"
|
2
|
+
#include "magic.h"
|
3
|
+
#include "common.h"
|
4
|
+
|
5
|
+
extern VALUE rb_mCharlockHolmes;
|
6
|
+
static VALUE rb_cEncodingDetector;
|
7
|
+
|
8
|
+
typedef struct {
|
9
|
+
UCharsetDetector *csd;
|
10
|
+
magic_t magic;
|
11
|
+
} charlock_detector_t;
|
12
|
+
|
13
|
+
static VALUE rb_encdec_buildmatch(const UCharsetMatch *match)
|
14
|
+
{
|
15
|
+
UErrorCode status = U_ZERO_ERROR;
|
16
|
+
const char *mname;
|
17
|
+
const char *mlang;
|
18
|
+
int mconfidence;
|
19
|
+
VALUE rb_match;
|
20
|
+
|
21
|
+
if (!match)
|
22
|
+
return Qnil;
|
23
|
+
|
24
|
+
mname = ucsdet_getName(match, &status);
|
25
|
+
mlang = ucsdet_getLanguage(match, &status);
|
26
|
+
mconfidence = ucsdet_getConfidence(match, &status);
|
27
|
+
|
28
|
+
rb_match = rb_hash_new();
|
29
|
+
|
30
|
+
rb_hash_aset(rb_match, ID2SYM(rb_intern("type")), ID2SYM(rb_intern("text")));
|
31
|
+
rb_hash_aset(rb_match, ID2SYM(rb_intern("encoding")), charlock_new_str2(mname));
|
32
|
+
rb_hash_aset(rb_match, ID2SYM(rb_intern("confidence")), INT2NUM(mconfidence));
|
33
|
+
|
34
|
+
if (mlang && mlang[0])
|
35
|
+
rb_hash_aset(rb_match, ID2SYM(rb_intern("language")), charlock_new_str2(mlang));
|
36
|
+
|
37
|
+
return rb_match;
|
38
|
+
}
|
39
|
+
|
40
|
+
static VALUE rb_encdec_binarymatch() {
|
41
|
+
VALUE rb_match;
|
42
|
+
|
43
|
+
rb_match = rb_hash_new();
|
44
|
+
|
45
|
+
rb_hash_aset(rb_match, ID2SYM(rb_intern("type")), ID2SYM(rb_intern("binary")));
|
46
|
+
rb_hash_aset(rb_match, ID2SYM(rb_intern("confidence")), INT2NUM(100));
|
47
|
+
|
48
|
+
return rb_match;
|
49
|
+
}
|
50
|
+
|
51
|
+
static int detect_binary_content(charlock_detector_t *detector, VALUE rb_str) {
|
52
|
+
const char *binary_result;
|
53
|
+
|
54
|
+
binary_result = magic_buffer(detector->magic, RSTRING_PTR(rb_str), RSTRING_LEN(rb_str));
|
55
|
+
|
56
|
+
if (binary_result) {
|
57
|
+
if (!strstr(binary_result, "text"))
|
58
|
+
return 1;
|
59
|
+
} else {
|
60
|
+
rb_raise(rb_eStandardError, "%s", magic_error(detector->magic));
|
61
|
+
}
|
62
|
+
|
63
|
+
return 0;
|
64
|
+
}
|
65
|
+
|
66
|
+
/*
|
67
|
+
* call-seq: detection_hash = EncodingDetector.detect str[, hint_enc]
|
68
|
+
*
|
69
|
+
* Attempt to detect the encoding of this string
|
70
|
+
*
|
71
|
+
* str - a String, what you want to detect the encoding of
|
72
|
+
* hint_enc - an optional String (like "UTF-8"), the encoding name which will
|
73
|
+
* be used as an additional hint to the charset detector
|
74
|
+
*
|
75
|
+
* Returns: a Hash with :encoding, :language, :type and :confidence
|
76
|
+
*/
|
77
|
+
static VALUE rb_encdec_detect(int argc, VALUE *argv, VALUE self)
|
78
|
+
{
|
79
|
+
UErrorCode status = U_ZERO_ERROR;
|
80
|
+
charlock_detector_t *detector;
|
81
|
+
VALUE rb_str;
|
82
|
+
VALUE rb_enc_hint;
|
83
|
+
|
84
|
+
rb_scan_args(argc, argv, "11", &rb_str, &rb_enc_hint);
|
85
|
+
|
86
|
+
Check_Type(rb_str, T_STRING);
|
87
|
+
Data_Get_Struct(self, charlock_detector_t, detector);
|
88
|
+
|
89
|
+
// first lets see if this is binary content
|
90
|
+
if (detect_binary_content(detector, rb_str)) {
|
91
|
+
return rb_encdec_binarymatch();
|
92
|
+
}
|
93
|
+
|
94
|
+
// if we got here - the data doesn't look like binary
|
95
|
+
// lets try to figure out what encoding the text is in
|
96
|
+
ucsdet_setText(detector->csd, RSTRING_PTR(rb_str), (int32_t)RSTRING_LEN(rb_str), &status);
|
97
|
+
|
98
|
+
if (!NIL_P(rb_enc_hint)) {
|
99
|
+
Check_Type(rb_enc_hint, T_STRING);
|
100
|
+
ucsdet_setDeclaredEncoding(detector->csd, RSTRING_PTR(rb_enc_hint), RSTRING_LEN(rb_enc_hint), &status);
|
101
|
+
}
|
102
|
+
|
103
|
+
return rb_encdec_buildmatch(ucsdet_detect(detector->csd, &status));
|
104
|
+
}
|
105
|
+
|
106
|
+
|
107
|
+
/*
|
108
|
+
* call-seq: detection_hash_array = EncodingDetector.detect_all str[, hint_enc]
|
109
|
+
*
|
110
|
+
* Attempt to detect the encoding of this string, and return
|
111
|
+
* a list with all the possible encodings that match it.
|
112
|
+
*
|
113
|
+
*
|
114
|
+
* str - a String, what you want to detect the encoding of
|
115
|
+
* hint_enc - an optional String (like "UTF-8"), the encoding name which will
|
116
|
+
* be used as an additional hint to the charset detector
|
117
|
+
*
|
118
|
+
* Returns: an Array with zero or more Hashes,
|
119
|
+
* each one of them with with :encoding, :language, :type and :confidence
|
120
|
+
*/
|
121
|
+
static VALUE rb_encdec_detect_all(int argc, VALUE *argv, VALUE self)
|
122
|
+
{
|
123
|
+
UErrorCode status = U_ZERO_ERROR;
|
124
|
+
charlock_detector_t *detector;
|
125
|
+
const UCharsetMatch **csm;
|
126
|
+
VALUE rb_ret;
|
127
|
+
int i, match_count;
|
128
|
+
VALUE rb_str;
|
129
|
+
VALUE rb_enc_hint;
|
130
|
+
VALUE binary_match;
|
131
|
+
|
132
|
+
rb_scan_args(argc, argv, "11", &rb_str, &rb_enc_hint);
|
133
|
+
|
134
|
+
Check_Type(rb_str, T_STRING);
|
135
|
+
Data_Get_Struct(self, charlock_detector_t, detector);
|
136
|
+
|
137
|
+
rb_ret = rb_ary_new();
|
138
|
+
|
139
|
+
// first lets see if this is binary content
|
140
|
+
binary_match = Qnil;
|
141
|
+
if (detect_binary_content(detector, rb_str)) {
|
142
|
+
binary_match = rb_encdec_binarymatch();
|
143
|
+
}
|
144
|
+
|
145
|
+
ucsdet_setText(detector->csd, RSTRING_PTR(rb_str), (int32_t)RSTRING_LEN(rb_str), &status);
|
146
|
+
|
147
|
+
if (!NIL_P(rb_enc_hint)) {
|
148
|
+
Check_Type(rb_enc_hint, T_STRING);
|
149
|
+
ucsdet_setDeclaredEncoding(detector->csd, RSTRING_PTR(rb_enc_hint), RSTRING_LEN(rb_enc_hint), &status);
|
150
|
+
}
|
151
|
+
|
152
|
+
csm = ucsdet_detectAll(detector->csd, &match_count, &status);
|
153
|
+
|
154
|
+
for (i = 0; i < match_count; ++i) {
|
155
|
+
rb_ary_push(rb_ret, rb_encdec_buildmatch(csm[i]));
|
156
|
+
}
|
157
|
+
|
158
|
+
if (!NIL_P(binary_match))
|
159
|
+
rb_ary_unshift(rb_ret, binary_match);
|
160
|
+
|
161
|
+
return rb_ret;
|
162
|
+
}
|
163
|
+
|
164
|
+
/*
|
165
|
+
* call-seq: EncodingDetector#strip_tags?
|
166
|
+
*
|
167
|
+
* Returns whether or not the strip_tags flag is set on this detector
|
168
|
+
*
|
169
|
+
* Returns: Boolean
|
170
|
+
*/
|
171
|
+
static VALUE rb_get_strip_tags(VALUE self)
|
172
|
+
{
|
173
|
+
charlock_detector_t *detector;
|
174
|
+
UBool val;
|
175
|
+
VALUE rb_val;
|
176
|
+
|
177
|
+
Data_Get_Struct(self, charlock_detector_t, detector);
|
178
|
+
|
179
|
+
val = ucsdet_isInputFilterEnabled(detector->csd);
|
180
|
+
|
181
|
+
rb_val = val == 1 ? Qtrue : Qfalse;
|
182
|
+
|
183
|
+
return rb_val;
|
184
|
+
}
|
185
|
+
|
186
|
+
/*
|
187
|
+
* call-seq: EncodingDetector#strip_tags = true
|
188
|
+
*
|
189
|
+
* Enable or disable the stripping of HTML/XML tags from the input before
|
190
|
+
* attempting any detection
|
191
|
+
*
|
192
|
+
* Returns: Boolean, the value passed
|
193
|
+
*/
|
194
|
+
static VALUE rb_set_strip_tags(VALUE self, VALUE rb_val)
|
195
|
+
{
|
196
|
+
charlock_detector_t *detector;
|
197
|
+
UBool val;
|
198
|
+
|
199
|
+
Data_Get_Struct(self, charlock_detector_t, detector);
|
200
|
+
|
201
|
+
val = rb_val == Qtrue ? 1 : 0;
|
202
|
+
|
203
|
+
ucsdet_enableInputFilter(detector->csd, val);
|
204
|
+
|
205
|
+
return rb_val;
|
206
|
+
}
|
207
|
+
|
208
|
+
/*
|
209
|
+
* call-seq: detectable_encodings = EncodingDetector.supported_encodings
|
210
|
+
*
|
211
|
+
* The list of detectable encodings supported by this library
|
212
|
+
*
|
213
|
+
* Returns: an Array of Strings
|
214
|
+
*/
|
215
|
+
static VALUE rb_get_supported_encodings(VALUE klass)
|
216
|
+
{
|
217
|
+
UCharsetDetector *csd;
|
218
|
+
UErrorCode status = U_ZERO_ERROR;
|
219
|
+
UEnumeration *encoding_list;
|
220
|
+
VALUE rb_encoding_list;
|
221
|
+
int32_t enc_count;
|
222
|
+
int32_t i;
|
223
|
+
const char *enc_name;
|
224
|
+
int32_t enc_name_len;
|
225
|
+
|
226
|
+
rb_encoding_list = rb_iv_get(klass, "encoding_list");
|
227
|
+
|
228
|
+
// lazily populate the list
|
229
|
+
if (NIL_P(rb_encoding_list)) {
|
230
|
+
csd = ucsdet_open(&status);
|
231
|
+
|
232
|
+
encoding_list = ucsdet_getAllDetectableCharsets(csd, &status);
|
233
|
+
rb_encoding_list = rb_ary_new();
|
234
|
+
enc_count = uenum_count(encoding_list, &status);
|
235
|
+
|
236
|
+
for(i=0; i < enc_count; i++) {
|
237
|
+
enc_name = uenum_next(encoding_list, &enc_name_len, &status);
|
238
|
+
rb_ary_push(rb_encoding_list, charlock_new_str(enc_name, enc_name_len));
|
239
|
+
}
|
240
|
+
|
241
|
+
rb_iv_set(klass, "encoding_list", rb_encoding_list);
|
242
|
+
ucsdet_close(csd);
|
243
|
+
}
|
244
|
+
|
245
|
+
return rb_encoding_list;
|
246
|
+
}
|
247
|
+
|
248
|
+
static void rb_encdec__free(void *obj)
|
249
|
+
{
|
250
|
+
charlock_detector_t *detector;
|
251
|
+
|
252
|
+
detector = (charlock_detector_t *)obj;
|
253
|
+
|
254
|
+
if (detector->csd)
|
255
|
+
ucsdet_close(detector->csd);
|
256
|
+
|
257
|
+
if (detector->magic)
|
258
|
+
magic_close(detector->magic);
|
259
|
+
|
260
|
+
free(detector);
|
261
|
+
}
|
262
|
+
|
263
|
+
static VALUE rb_encdec__alloc(VALUE klass)
|
264
|
+
{
|
265
|
+
charlock_detector_t *detector;
|
266
|
+
UErrorCode status = U_ZERO_ERROR;
|
267
|
+
VALUE obj;
|
268
|
+
|
269
|
+
detector = calloc(1, sizeof(charlock_detector_t));
|
270
|
+
obj = Data_Wrap_Struct(klass, NULL, rb_encdec__free, (void *)detector);
|
271
|
+
|
272
|
+
detector->csd = ucsdet_open(&status);
|
273
|
+
if (U_FAILURE(status)) {
|
274
|
+
rb_raise(rb_eStandardError, "%s", u_errorName(status));
|
275
|
+
}
|
276
|
+
|
277
|
+
detector->magic = magic_open(MAGIC_NO_CHECK_SOFT);
|
278
|
+
if (detector->magic == NULL) {
|
279
|
+
rb_raise(rb_eStandardError, "%s", magic_error(detector->magic));
|
280
|
+
}
|
281
|
+
|
282
|
+
return obj;
|
283
|
+
}
|
284
|
+
|
285
|
+
void _init_charlock_encoding_detector()
|
286
|
+
{
|
287
|
+
rb_cEncodingDetector = rb_define_class_under(rb_mCharlockHolmes, "EncodingDetector", rb_cObject);
|
288
|
+
rb_define_alloc_func(rb_cEncodingDetector, rb_encdec__alloc);
|
289
|
+
rb_define_method(rb_cEncodingDetector, "detect", rb_encdec_detect, -1);
|
290
|
+
rb_define_method(rb_cEncodingDetector, "detect_all", rb_encdec_detect_all, -1);
|
291
|
+
rb_define_method(rb_cEncodingDetector, "strip_tags", rb_get_strip_tags, 0);
|
292
|
+
rb_define_method(rb_cEncodingDetector, "strip_tags=", rb_set_strip_tags, 1);
|
293
|
+
|
294
|
+
rb_define_singleton_method(rb_cEncodingDetector, "supported_encodings", rb_get_supported_encodings, 0);
|
295
|
+
}
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#include "common.h"
|
2
|
+
|
3
|
+
extern void _init_charlock_encoding_detector();
|
4
|
+
extern void _init_charlock_converter();
|
5
|
+
|
6
|
+
VALUE rb_mCharlockHolmes;
|
7
|
+
|
8
|
+
void Init_charlock_holmes() {
|
9
|
+
rb_mCharlockHolmes = rb_define_module("CharlockHolmes");
|
10
|
+
|
11
|
+
_init_charlock_encoding_detector();
|
12
|
+
_init_charlock_converter();
|
13
|
+
}
|
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'mkmf'
|
2
|
+
|
3
|
+
CWD = File.expand_path(File.dirname(__FILE__))
|
4
|
+
def sys(cmd)
|
5
|
+
puts " -- #{cmd}"
|
6
|
+
unless ret = xsystem(cmd)
|
7
|
+
raise "#{cmd} failed, please report issue on http://github.com/brianmario/charlock_holmes"
|
8
|
+
end
|
9
|
+
ret
|
10
|
+
end
|
11
|
+
|
12
|
+
if `which make`.strip.empty?
|
13
|
+
STDERR.puts "\n\n"
|
14
|
+
STDERR.puts "***************************************************************************************"
|
15
|
+
STDERR.puts "*************** make required (apt-get install make build-essential) =( ***************"
|
16
|
+
STDERR.puts "***************************************************************************************"
|
17
|
+
exit(1)
|
18
|
+
end
|
19
|
+
|
20
|
+
##
|
21
|
+
# ICU dependency
|
22
|
+
#
|
23
|
+
|
24
|
+
src = File.basename('icu4c-52_1-src.tgz')
|
25
|
+
dir = File.basename('icu')
|
26
|
+
|
27
|
+
Dir.chdir("#{CWD}/src") do
|
28
|
+
FileUtils.rm_rf(dir) if File.exists?(dir)
|
29
|
+
|
30
|
+
sys("tar zxvf #{src}")
|
31
|
+
Dir.chdir(File.join(dir, 'source')) do
|
32
|
+
sys("LDFLAGS= CXXFLAGS=\"-O2 -fPIC\" CFLAGS=\"-O2 -fPIC\" ./configure --prefix=#{CWD}/dst/ --disable-tests --disable-samples --disable-icuio --disable-extras --disable-layout --enable-static --disable-shared")
|
33
|
+
sys("make install")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
dir_config 'icu'
|
38
|
+
|
39
|
+
$INCFLAGS << " -I#{CWD}/dst/include "
|
40
|
+
$LDFLAGS << " -L#{CWD}/dst/lib"
|
41
|
+
|
42
|
+
unless have_library 'icui18n' and have_library 'icudata' and have_library 'icutu' and have_library 'icuuc' and have_header 'unicode/ucnv.h'
|
43
|
+
STDERR.puts "\n\n"
|
44
|
+
STDERR.puts "***************************************************************************************"
|
45
|
+
STDERR.puts "********* error compiling and linking icu4c. please report issue on github *********"
|
46
|
+
STDERR.puts "***************************************************************************************"
|
47
|
+
exit(1)
|
48
|
+
end
|
49
|
+
|
50
|
+
##
|
51
|
+
# libmagic dependency
|
52
|
+
#
|
53
|
+
|
54
|
+
src = File.basename('file-5.08.tar.gz')
|
55
|
+
dir = File.basename(src, '.tar.gz')
|
56
|
+
|
57
|
+
Dir.chdir("#{CWD}/src") do
|
58
|
+
FileUtils.rm_rf(dir) if File.exists?(dir)
|
59
|
+
|
60
|
+
sys("tar zxvf #{src}")
|
61
|
+
Dir.chdir(dir) do
|
62
|
+
sys("./configure --prefix=#{CWD}/dst/ --disable-shared --enable-static --with-pic")
|
63
|
+
sys("patch -p0 < ../file-soft-check.patch")
|
64
|
+
sys("make -C src install")
|
65
|
+
sys("make -C magic install")
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
FileUtils.cp "#{CWD}/dst/lib/libmagic.a", "#{CWD}/libmagic_ext.a"
|
70
|
+
|
71
|
+
$INCFLAGS[0,0] = " -I#{CWD}/dst/include "
|
72
|
+
$LDFLAGS << " -L#{CWD} "
|
73
|
+
|
74
|
+
dir_config 'magic'
|
75
|
+
unless have_library 'magic_ext' and have_header 'magic.h'
|
76
|
+
STDERR.puts "\n\n"
|
77
|
+
STDERR.puts "***************************************************************************************"
|
78
|
+
STDERR.puts "********* error compiling and linking libmagic. please report issue on github *********"
|
79
|
+
STDERR.puts "***************************************************************************************"
|
80
|
+
exit(1)
|
81
|
+
end
|
82
|
+
|
83
|
+
$CFLAGS << ' -Wall -funroll-loops'
|
84
|
+
$CFLAGS << ' -Wextra -O0 -ggdb3' if ENV['DEBUG']
|
85
|
+
$LIBS << " -lstdc++"
|
86
|
+
create_makefile 'charlock_holmes/charlock_holmes'
|