charlock_holmes 0.6.9.1 → 0.6.9.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,24 +1,16 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- charlock_holmes (0.6.9.1)
4
+ charlock_holmes (0.6.9.2)
5
5
 
6
6
  GEM
7
7
  remote: http://rubygems.org/
8
8
  specs:
9
9
  chardet (0.9.0)
10
- diff-lcs (1.1.2)
10
+ minitest (4.6.2)
11
11
  rake (0.9.2)
12
12
  rake-compiler (0.7.9)
13
13
  rake
14
- rspec (2.6.0)
15
- rspec-core (~> 2.6.0)
16
- rspec-expectations (~> 2.6.0)
17
- rspec-mocks (~> 2.6.0)
18
- rspec-core (2.6.4)
19
- rspec-expectations (2.6.0)
20
- diff-lcs (~> 1.1.2)
21
- rspec-mocks (2.6.0)
22
14
 
23
15
  PLATFORMS
24
16
  ruby
@@ -26,5 +18,5 @@ PLATFORMS
26
18
  DEPENDENCIES
27
19
  chardet
28
20
  charlock_holmes!
21
+ minitest
29
22
  rake-compiler (>= 0.7.5)
30
- rspec (>= 2.0.0)
data/Rakefile CHANGED
@@ -1,23 +1,10 @@
1
- # rspec
2
- begin
3
- require 'rspec'
4
- require 'rspec/core/rake_task'
1
+ require 'rake/testtask'
5
2
 
6
- desc "Run all examples with RCov"
7
- RSpec::Core::RakeTask.new 'spec:rcov' do |t|
8
- t.rcov = true
9
- end
10
- RSpec::Core::RakeTask.new 'spec' do |t|
11
- t.verbose = true
12
- end
13
-
14
- task :default => :spec
15
- rescue LoadError
16
- puts "rspec, or one of its dependencies, is not available. Install it with: sudo gem install rspec"
3
+ Rake::TestTask.new do |t|
4
+ t.pattern = "test/**/*_test.rb"
17
5
  end
18
6
 
19
- # rake-compiler
20
- require 'rake' unless defined? Rake
7
+ task :default => :test
21
8
 
22
9
  gem 'rake-compiler', '>= 0.7.5'
23
10
  require "rake/extensiontask"
@@ -26,4 +13,4 @@ Rake::ExtensionTask.new 'charlock_holmes' do |ext|
26
13
  ext.lib_dir = File.join 'lib', 'charlock_holmes'
27
14
  end
28
15
 
29
- Rake::Task[:spec].prerequisites << :compile
16
+ Rake::Task[:test].prerequisites << :compile
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
19
19
 
20
20
  # tests
21
21
  s.add_development_dependency 'rake-compiler', ">= 0.7.5"
22
- s.add_development_dependency 'rspec', ">= 2.0.0"
22
+ s.add_development_dependency 'minitest'
23
23
  # benchmarks
24
24
  s.add_development_dependency 'chardet'
25
25
  end
@@ -274,15 +274,11 @@ static VALUE rb_encdec__alloc(VALUE klass)
274
274
  rb_raise(rb_eStandardError, "%s", u_errorName(status));
275
275
  }
276
276
 
277
- detector->magic = magic_open(0);
277
+ detector->magic = magic_open(MAGIC_NO_CHECK_SOFT);
278
278
  if (detector->magic == NULL) {
279
279
  rb_raise(rb_eStandardError, "%s", magic_error(detector->magic));
280
280
  }
281
281
 
282
- // load the libmagic database
283
- // NULL means use the default or whatever is specified by the MAGIC env var
284
- magic_load(detector->magic, NULL);
285
-
286
282
  return obj;
287
283
  }
288
284
 
@@ -2,6 +2,7 @@
2
2
 
3
3
  extern void _init_charlock_encoding_detector();
4
4
  extern void _init_charlock_converter();
5
+ extern void _init_charlock_transliterator();
5
6
 
6
7
  VALUE rb_mCharlockHolmes;
7
8
 
@@ -10,4 +11,5 @@ void Init_charlock_holmes() {
10
11
 
11
12
  _init_charlock_encoding_detector();
12
13
  _init_charlock_converter();
14
+ _init_charlock_transliterator();
13
15
  }
@@ -58,6 +58,7 @@ Dir.chdir("#{CWD}/src") do
58
58
  sys("tar zxvf #{src}")
59
59
  Dir.chdir(dir) do
60
60
  sys("./configure --prefix=#{CWD}/dst/ --disable-shared --enable-static --with-pic")
61
+ sys("patch -p0 < ../file-soft-check.patch")
61
62
  sys("make -C src install")
62
63
  sys("make -C magic install")
63
64
  end
@@ -0,0 +1,86 @@
1
+ #include "common.h"
2
+ #undef UChar
3
+
4
+ #include <unicode/translit.h>
5
+
6
+ extern "C" {
7
+
8
+ #ifdef HAVE_RUBY_ENCODING_H
9
+ #include <ruby/encoding.h>
10
+ static VALUE rb_eEncodingCompatibilityError;
11
+
12
+ static void check_utf8_encoding(VALUE str) {
13
+ static rb_encoding *_cached[3] = {NULL, NULL, NULL};
14
+ rb_encoding *enc;
15
+
16
+ if (_cached[0] == NULL) {
17
+ _cached[0] = rb_utf8_encoding();
18
+ _cached[1] = rb_usascii_encoding();
19
+ _cached[2] = rb_ascii8bit_encoding();
20
+ }
21
+
22
+ enc = rb_enc_get(str);
23
+ if (enc != _cached[0] && enc != _cached[1] && enc != _cached[2]) {
24
+ rb_raise(rb_eEncodingCompatibilityError,
25
+ "Input must be UTF-8 or US-ASCII, %s given", rb_enc_name(enc));
26
+ }
27
+ }
28
+
29
+ #else
30
+ static void check_utf8_encoding(VALUE str) {}
31
+ #endif
32
+
33
+ extern VALUE rb_mCharlockHolmes;
34
+ static VALUE rb_cTransliterator;
35
+
36
+ static VALUE rb_transliterator_transliterate(VALUE self, VALUE rb_txt, VALUE rb_id) {
37
+ UErrorCode status = U_ZERO_ERROR;
38
+ UParseError p_error;
39
+ Transliterator *trans;
40
+ const char *txt;
41
+ size_t txt_len;
42
+ const char *id;
43
+ size_t id_len;
44
+ UnicodeString *u_txt;
45
+ std::string result;
46
+ VALUE rb_out;
47
+
48
+ Check_Type(rb_txt, T_STRING);
49
+ Check_Type(rb_id, T_STRING);
50
+
51
+ check_utf8_encoding(rb_txt);
52
+ check_utf8_encoding(rb_id);
53
+
54
+ txt = RSTRING_PTR(rb_txt);
55
+ txt_len = RSTRING_LEN(rb_txt);
56
+ id = RSTRING_PTR(rb_id);
57
+ id_len = RSTRING_LEN(rb_id);
58
+
59
+ trans = Transliterator::createInstance(UnicodeString(id, id_len), UTRANS_FORWARD, p_error, status);
60
+ if(!U_SUCCESS(status)) {
61
+ rb_raise(rb_eArgError, "%s", u_errorName(status));
62
+ }
63
+
64
+ u_txt = new UnicodeString(txt, txt_len);
65
+ trans->transliterate(*u_txt);
66
+ result = u_txt->toUTF8String(result);
67
+
68
+ delete u_txt;
69
+ delete trans;
70
+
71
+ rb_out = charlock_new_str(result.data(), result.length());
72
+
73
+ return rb_out;
74
+ }
75
+
76
+ void _init_charlock_transliterator() {
77
+ #ifdef HAVE_RUBY_ENCODING_H
78
+ rb_eEncodingCompatibilityError = rb_const_get(rb_cEncoding, rb_intern("CompatibilityError"));
79
+ #endif
80
+
81
+ rb_cTransliterator = rb_define_class_under(rb_mCharlockHolmes, "Transliterator", rb_cObject);
82
+
83
+ rb_define_singleton_method(rb_cTransliterator, "transliterate", (VALUE(*)(...))rb_transliterator_transliterate, 2);
84
+ }
85
+
86
+ }
@@ -1,3 +1,3 @@
1
1
  module CharlockHolmes
2
- VERSION = "0.6.9.1"
2
+ VERSION = "0.6.9.2"
3
3
  end
@@ -1,9 +1,8 @@
1
1
  # encoding: utf-8
2
+ require File.expand_path("../helper", __FILE__)
2
3
 
3
- require 'spec_helper'
4
-
5
- describe CharlockHolmes::Converter do
6
- test 'is able to convert regular ascii content from ISO-8859-1 to UTF-16, and back again' do
4
+ class ConverterTest < MiniTest::Unit::TestCase
5
+ def test_convert_ascii_from_iso859_1_to_utf16_and_back
7
6
  input = 'test'
8
7
 
9
8
  output = CharlockHolmes::Converter.convert input, 'ISO-8859-1', 'UTF-16'
@@ -15,7 +14,7 @@ describe CharlockHolmes::Converter do
15
14
  assert input == output
16
15
  end
17
16
 
18
- test 'is able to convert UTF-8 content from UTF-8 to UTF-16, and back again' do
17
+ def test_convert_utf8_to_utf16_and_back
19
18
  input = 'λ, λ, λ'
20
19
 
21
20
  output = CharlockHolmes::Converter.convert input, 'UTF-8', 'UTF-16'
@@ -27,7 +26,7 @@ describe CharlockHolmes::Converter do
27
26
  assert input == output
28
27
  end
29
28
 
30
- test 'all params must be strings' do
29
+ def test_params_must_be_strings
31
30
  assert_raises TypeError do
32
31
  CharlockHolmes::Converter.convert nil, 'UTF-8', 'UTF-16'
33
32
  end
@@ -40,8 +39,10 @@ describe CharlockHolmes::Converter do
40
39
  CharlockHolmes::Converter.convert 'lol', 'UTF-8', nil
41
40
  end
42
41
 
43
- assert_nothing_raised do
42
+ begin
44
43
  CharlockHolmes::Converter.convert 'lol', 'UTF-8', 'UTF-16'
44
+ rescue Exception => e
45
+ assert_nil e, "#{e.class.name} raised, expected nothing"
45
46
  end
46
47
  end
47
48
  end
@@ -1,25 +1,24 @@
1
1
  # encoding: utf-8
2
+ require File.expand_path("../helper", __FILE__)
2
3
 
3
- require 'spec_helper'
4
-
5
- describe CharlockHolmes::EncodingDetector do
6
- before :all do
4
+ class EncodingDetectorTest < MiniTest::Unit::TestCase
5
+ def setup
7
6
  @detector = CharlockHolmes::EncodingDetector.new
8
7
  end
9
8
 
10
- test 'has a class-level detect method' do
9
+ def test_has_class_level_detect_method
11
10
  CharlockHolmes::EncodingDetector.respond_to? :detect
12
11
  detected = CharlockHolmes::EncodingDetector.detect 'test'
13
12
  assert_equal 'ISO-8859-1', detected[:encoding]
14
13
  end
15
14
 
16
- test 'has a class-level detect method that accepts an encoding hint' do
15
+ def test_class_level_detect_accepts_encoding_hint
17
16
  CharlockHolmes::EncodingDetector.respond_to? :detect
18
17
  detected = CharlockHolmes::EncodingDetector.detect 'test', 'UTF-8'
19
18
  assert_equal 'ISO-8859-1', detected[:encoding]
20
19
  end
21
20
 
22
- test 'has a class-level detect_all method' do
21
+ def test_has_class_level_detect_all_method
23
22
  CharlockHolmes::EncodingDetector.respond_to? :detect_all
24
23
  detected_list = CharlockHolmes::EncodingDetector.detect_all 'test'
25
24
  assert detected_list.is_a? Array
@@ -28,7 +27,7 @@ describe CharlockHolmes::EncodingDetector do
28
27
  assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
29
28
  end
30
29
 
31
- test 'has a class-level detect_all method that accepts an encoding hint' do
30
+ def test_class_level_detect_all_method_accepts_encoding_hint
32
31
  CharlockHolmes::EncodingDetector.respond_to? :detect_all
33
32
  detected_list = CharlockHolmes::EncodingDetector.detect_all 'test', 'UTF-8'
34
33
  assert detected_list.is_a? Array
@@ -37,19 +36,19 @@ describe CharlockHolmes::EncodingDetector do
37
36
  assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
38
37
  end
39
38
 
40
- test 'has a detect method' do
39
+ def test_has_detect_method
41
40
  @detector.respond_to? :detect
42
41
  detected = @detector.detect 'test'
43
42
  assert_equal 'ISO-8859-1', detected[:encoding]
44
43
  end
45
44
 
46
- test 'has a detect method that accepts an encoding hint' do
45
+ def test_detect_accepts_encoding_hint
47
46
  @detector.respond_to? :detect
48
47
  detected = @detector.detect 'test', 'UTF-8'
49
48
  assert_equal 'ISO-8859-1', detected[:encoding]
50
49
  end
51
50
 
52
- test 'has a detect_all method' do
51
+ def test_has_detect_all_method
53
52
  @detector.respond_to? :detect_all
54
53
  detected_list = @detector.detect_all 'test'
55
54
  assert detected_list.is_a? Array
@@ -58,7 +57,7 @@ describe CharlockHolmes::EncodingDetector do
58
57
  assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
59
58
  end
60
59
 
61
- test 'has a detect_all method that accepts an encoding hint' do
60
+ def test_detect_all_accepts_encoding_hint
62
61
  @detector.respond_to? :detect_all
63
62
  detected_list = @detector.detect_all 'test', 'UTF-8'
64
63
  assert detected_list.is_a? Array
@@ -67,7 +66,7 @@ describe CharlockHolmes::EncodingDetector do
67
66
  assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
68
67
  end
69
68
 
70
- test 'has a strip_tags flag' do
69
+ def test_strip_tags_flag
71
70
  detector = CharlockHolmes::EncodingDetector.new
72
71
  detector.strip_tags = true
73
72
  assert detector.strip_tags
@@ -82,7 +81,7 @@ describe CharlockHolmes::EncodingDetector do
82
81
  assert_equal 'UTF-8', detection[:encoding]
83
82
  end
84
83
 
85
- test 'has a list of supported encodings' do
84
+ def test_has_list_of_supported_encodings
86
85
  CharlockHolmes::EncodingDetector.respond_to? :supported_encodings
87
86
  supported_encodings = CharlockHolmes::EncodingDetector.supported_encodings
88
87
 
@@ -90,32 +89,30 @@ describe CharlockHolmes::EncodingDetector do
90
89
  assert supported_encodings.include? 'UTF-8'
91
90
  end
92
91
 
93
- context 'encoding detection' do
94
- MAPPING = [
95
- ['repl2.cljs', 'ISO-8859-1', :text],
96
- ['core.rkt', 'UTF-8', :text],
97
- ['cl-messagepack.lisp', 'ISO-8859-1', :text],
98
- ['TwigExtensionsDate.es.yml', 'UTF-8', :text],
99
- ['AnsiGraph.psm1', 'UTF-16LE', :text],
100
- ['laholator.py', 'UTF-8', :text],
101
- ['hello_world', nil, :binary]
102
- ]
103
-
92
+ MAPPING = [
93
+ ['repl2.cljs', 'ISO-8859-1', :text],
94
+ ['core.rkt', 'UTF-8', :text],
95
+ ['cl-messagepack.lisp', 'ISO-8859-1', :text],
96
+ ['TwigExtensionsDate.es.yml', 'UTF-8', :text],
97
+ ['AnsiGraph.psm1', 'UTF-16LE', :text],
98
+ ['laholator.py', 'UTF-8', :text],
99
+ ['hello_world', nil, :binary]
100
+ ]
101
+
102
+ def test_detection_works_as_expected
104
103
  MAPPING.each do |mapping|
105
104
  file, encoding, type = mapping
106
105
 
107
- test "#{file} should be detected as #{encoding || 'binary'}" do
108
- path = File.expand_path "../fixtures/#{file}", __FILE__
109
- content = File.read path
110
- guessed = @detector.detect content
106
+ path = File.expand_path "../fixtures/#{file}", __FILE__
107
+ content = File.read path
108
+ guessed = @detector.detect content
111
109
 
112
- assert_equal encoding, guessed[:encoding]
113
- assert_equal type, guessed[:type]
110
+ assert_equal encoding, guessed[:encoding]
111
+ assert_equal type, guessed[:type]
114
112
 
115
- if content.respond_to?(:force_encoding) && guessed[:type] == :text
116
- content.force_encoding guessed[:encoding]
117
- assert content.valid_encoding?
118
- end
113
+ if content.respond_to?(:force_encoding) && guessed[:type] == :text
114
+ content.force_encoding guessed[:encoding]
115
+ assert content.valid_encoding?
119
116
  end
120
117
  end
121
118
  end
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -0,0 +1,14 @@
1
+ # Basic test environment.
2
+
3
+ # blah fuck this
4
+ require 'rubygems' if !defined?(Gem)
5
+ require 'bundler/setup'
6
+
7
+ require 'charlock_holmes'
8
+
9
+ # bring in minitest
10
+ require 'minitest/autorun'
11
+
12
+ # put lib and test dirs directly on load path
13
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
14
+ $LOAD_PATH.unshift File.expand_path('..', __FILE__)
@@ -1,8 +1,8 @@
1
- require 'spec_helper'
1
+ require File.expand_path("../helper", __FILE__)
2
2
  require 'charlock_holmes/string'
3
3
 
4
- describe String do
5
- test 'has a detect_encoding method' do
4
+ class StringMethodsTest < MiniTest::Unit::TestCase
5
+ def test_adds_detect_encoding_method
6
6
  str = 'test'
7
7
  str.respond_to? :detect_encoding
8
8
 
@@ -10,7 +10,7 @@ describe String do
10
10
  assert_equal 'ISO-8859-1', detected[:encoding]
11
11
  end
12
12
 
13
- test 'has a detect_encoding method that accepts an encoding hint' do
13
+ def test_detect_encoding_accepts_encoding_hint_param
14
14
  str = 'test'
15
15
  str.respond_to? :detect_encoding
16
16
 
@@ -18,7 +18,7 @@ describe String do
18
18
  assert_equal 'ISO-8859-1', detected[:encoding]
19
19
  end
20
20
 
21
- test 'has a detect_encodings method' do
21
+ def test_adds_detect_encodings_method
22
22
  str = 'test'
23
23
  str.respond_to? :detect_encodings
24
24
 
@@ -29,7 +29,7 @@ describe String do
29
29
  assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
30
30
  end
31
31
 
32
- test 'has a detect_encodings method that accepts an encoding hint' do
32
+ def test_detect_encodings_accepts_encoding_hint_param
33
33
  str = 'test'
34
34
  str.respond_to? :detect_encodings
35
35
 
@@ -41,7 +41,7 @@ describe String do
41
41
  end
42
42
 
43
43
  if RUBY_VERSION =~ /1.9/
44
- test 'has a detect_encoding! method' do
44
+ def test_adds_detect_encoding_bang_method
45
45
  str = 'test'
46
46
  str.respond_to? :detect_encoding!
47
47
 
@@ -0,0 +1,119 @@
1
+ # encoding: utf-8
2
+ require File.expand_path("../helper", __FILE__)
3
+
4
+ class TransliteratorTest < MiniTest::Unit::TestCase
5
+ DONT_CONVERT = [
6
+ "Vitrum edere possum; mihi non nocet.", # Latin
7
+ "Je puis mangier del voirre. Ne me nuit.", # Old French
8
+ "Kristala jan dezaket, ez dit minik ematen.", # Basque
9
+ "Kaya kong kumain nang bubog at hindi ako masaktan.", # Tagalog
10
+ "Ich kann Glas essen, ohne mir weh zu tun.", # German
11
+ "I can eat glass and it doesn't hurt me.", # English
12
+ ]
13
+
14
+ CONVERT_PAIRS = {
15
+ "Je peux manger du verre, ça ne me fait pas de mal." => # French
16
+ "Je peux manger du verre, ca ne me fait pas de mal.",
17
+ "Pot să mănânc sticlă și ea nu mă rănește." => # Romanian
18
+ "Pot sa mananc sticla si ea nu ma raneste.",
19
+ "Ég get etið gler án þess að meiða mig." => # Icelandic
20
+ "Eg get etid gler an thess ad meida mig.",
21
+ "Unë mund të ha qelq dhe nuk më gjen gjë." => # Albanian
22
+ "Une mund te ha qelq dhe nuk me gjen gje.",
23
+ "Mogę jeść szkło i mi nie szkodzi." => # Polish
24
+ "Moge jesc szklo i mi nie szkodzi.",
25
+ # "Я могу есть стекло, оно мне не вредит." => # Russian
26
+ # "Ia moghu iest' stieklo, ono mnie nie vriedit.",
27
+ # "Мога да ям стъкло, то не ми вреди." => # Bulgarian
28
+ # "Mogha da iam stklo, to nie mi vriedi.",
29
+ # "ᛁᚳ᛫ᛗᚨᚷ᛫ᚷᛚᚨᛋ᛫ᛖᚩᛏᚪᚾ᛫ᚩᚾᛞ᛫ᚻᛁᛏ᛫ᚾᛖ᛫ᚻᛖᚪᚱᛗᛁᚪᚧ᛫ᛗᛖ᛬" => # Anglo-Saxon
30
+ # "ic.mag.glas.eotacn.ond.hit.ne.heacrmiacth.me:",
31
+ # "ὕαλον ϕαγεῖν δύναμαι· τοῦτο οὔ με βλάπτει" => # Classical Greek
32
+ # "ualon phagein dunamai; touto ou me blaptei",
33
+ # "मैं काँच खा सकता हूँ और मुझे उससे कोई चोट नहीं पहुंचती" => # Hindi
34
+ # "maiN kaaNc khaa sktaa huuN aur mujhe usse koii cott nhiiN phuNctii",
35
+ # "من می توانم بدونِ احساس درد شيشه بخورم" => # Persian
36
+ # "mn my twnm bdwni Hss drd shyshh bkhwrm",
37
+ # "أنا قادر على أكل الزجاج و هذا لا يؤلمن" => # Arabic
38
+ # "'n qdr 'l~ 'kl lzjj w hdh l yw'lmn",
39
+ # "אני יכול לאכול זכוכית וזה לא מזיק לי" => # Hebrew
40
+ # "ny ykvl lkvl zkvkyt vzh l mzyq ly",
41
+ # "ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ" => # Thai
42
+ # "chankinkracchkaid aetmanaimthamaihchanecchb",
43
+ # "我能吞下玻璃而不伤身体。" => # Chinese
44
+ # "Wo Neng Tun Xia Bo Li Er Bu Shang Shen Ti . ",
45
+ # "私はガラスを食べられます。それは私を傷つけません。" => # Japanese
46
+ # "Si hagarasuwoShi beraremasu. sorehaSi woShang tukemasen. ",
47
+ # "⠋⠗⠁⠝⠉⠑" => # Braille
48
+ # "france",
49
+ "Schloß - Assunção - Łódź" =>
50
+ "Schloss - Assuncao - Lodz",
51
+ "TÜM GOLLER Fb 4-1 Bursa Maç Özeti Íƶle" =>
52
+ "TUM GOLLER Fb 4-1 Bursa Mac Ozeti Izle",
53
+ "ßßßßß" => "ssssssssss"
54
+ }
55
+
56
+ def test_transliterate
57
+ trans_id = "Any-NFD; Any-Latin; Latin-ASCII; Any-NFC"
58
+
59
+ DONT_CONVERT.each do |subject|
60
+ assert_equal subject, trans(subject, trans_id)
61
+ end
62
+
63
+ CONVERT_PAIRS.each do |before, after|
64
+ assert_equal after, trans(before, trans_id)
65
+ end
66
+ end
67
+
68
+ if "".respond_to? :force_encoding
69
+ def test_transliterate_id_must_be_utf8_or_ascii
70
+ trans_id = "Any-NFD; Any-Latin; Latin-ASCII; Any-NFC".force_encoding('big5')
71
+ txt = "blah blah blah"
72
+
73
+ assert_raises Encoding::CompatibilityError do
74
+ trans(txt, trans_id)
75
+ end
76
+
77
+ trans_id.force_encoding('UTF-8')
78
+ begin
79
+ trans(txt, trans_id)
80
+ rescue Encoding::CompatibilityError => e
81
+ assert_nil e, "#{e.class.name} raised, expected not to"
82
+ end
83
+
84
+ trans_id.force_encoding('US-ASCII')
85
+ begin
86
+ trans(txt, trans_id)
87
+ rescue Encoding::CompatibilityError => e
88
+ assert_nil e, "#{e.class.name} raised, expected not to"
89
+ end
90
+ end
91
+
92
+ def test_transliterate_text_must_be_utf8_or_ascii
93
+ trans_id = "Any-NFD; Any-Latin; Latin-ASCII; Any-NFC"
94
+ txt = "blah blah blah".force_encoding('big5')
95
+
96
+ assert_raises Encoding::CompatibilityError do
97
+ trans(txt, trans_id)
98
+ end
99
+
100
+ txt.force_encoding('UTF-8')
101
+ begin
102
+ trans(txt, trans_id)
103
+ rescue Encoding::CompatibilityError => e
104
+ assert_nil e, "#{e.class.name} raised, expected not to"
105
+ end
106
+
107
+ txt.force_encoding('US-ASCII')
108
+ begin
109
+ trans(txt, trans_id)
110
+ rescue Encoding::CompatibilityError => e
111
+ assert_nil e, "#{e.class.name} raised, expected not to"
112
+ end
113
+ end
114
+ end
115
+
116
+ def trans(text, id)
117
+ CharlockHolmes::Transliterator.transliterate(text, id)
118
+ end
119
+ end
metadata CHANGED
@@ -1,82 +1,73 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: charlock_holmes
3
- version: !ruby/object:Gem::Version
4
- hash: 89
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.6.9.2
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 6
9
- - 9
10
- - 1
11
- version: 0.6.9.1
12
6
  platform: ruby
13
- authors:
7
+ authors:
14
8
  - Brian Lopez
15
- - "Vicent Mart\xC3\xAD"
9
+ - Vicent Martí
16
10
  autorequire:
17
11
  bindir: bin
18
12
  cert_chain: []
19
-
20
- date: 2013-01-31 00:00:00 -08:00
21
- default_executable:
22
- dependencies:
23
- - !ruby/object:Gem::Dependency
13
+ date: 2013-03-20 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
24
16
  name: rake-compiler
25
- prerelease: false
26
- requirement: &id001 !ruby/object:Gem::Requirement
17
+ requirement: !ruby/object:Gem::Requirement
27
18
  none: false
28
- requirements:
29
- - - ">="
30
- - !ruby/object:Gem::Version
31
- hash: 9
32
- segments:
33
- - 0
34
- - 7
35
- - 5
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
36
22
  version: 0.7.5
37
23
  type: :development
38
- version_requirements: *id001
39
- - !ruby/object:Gem::Dependency
40
- name: rspec
41
24
  prerelease: false
42
- requirement: &id002 !ruby/object:Gem::Requirement
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - ! '>='
29
+ - !ruby/object:Gem::Version
30
+ version: 0.7.5
31
+ - !ruby/object:Gem::Dependency
32
+ name: minitest
33
+ requirement: !ruby/object:Gem::Requirement
43
34
  none: false
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- hash: 15
48
- segments:
49
- - 2
50
- - 0
51
- - 0
52
- version: 2.0.0
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
53
39
  type: :development
54
- version_requirements: *id002
55
- - !ruby/object:Gem::Dependency
56
- name: chardet
57
40
  prerelease: false
58
- requirement: &id003 !ruby/object:Gem::Requirement
41
+ version_requirements: !ruby/object:Gem::Requirement
59
42
  none: false
60
- requirements:
61
- - - ">="
62
- - !ruby/object:Gem::Version
63
- hash: 3
64
- segments:
65
- - 0
66
- version: "0"
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ - !ruby/object:Gem::Dependency
48
+ name: chardet
49
+ requirement: !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
67
55
  type: :development
68
- version_requirements: *id003
56
+ prerelease: false
57
+ version_requirements: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
69
63
  description:
70
64
  email: seniorlopez@gmail.com
71
65
  executables: []
72
-
73
- extensions:
66
+ extensions:
74
67
  - ext/charlock_holmes/extconf.rb
75
68
  extra_rdoc_files: []
76
-
77
- files:
69
+ files:
78
70
  - .gitignore
79
- - .rspec
80
71
  - Gemfile
81
72
  - Gemfile.lock
82
73
  - MIT-LICENSE
@@ -91,64 +82,47 @@ files:
91
82
  - ext/charlock_holmes/ext.c
92
83
  - ext/charlock_holmes/extconf.rb
93
84
  - ext/charlock_holmes/src/file-5.08.tar.gz
85
+ - ext/charlock_holmes/src/file-soft-check.patch
86
+ - ext/charlock_holmes/transliterator.cpp
94
87
  - lib/charlock_holmes.rb
95
88
  - lib/charlock_holmes/encoding_detector.rb
96
89
  - lib/charlock_holmes/string.rb
97
90
  - lib/charlock_holmes/version.rb
98
- - spec/converter_spec.rb
99
- - spec/encoding_detector_spec.rb
100
- - spec/fixtures/AnsiGraph.psm1
101
- - spec/fixtures/TwigExtensionsDate.es.yml
102
- - spec/fixtures/cl-messagepack.lisp
103
- - spec/fixtures/core.rkt
104
- - spec/fixtures/hello_world
105
- - spec/fixtures/laholator.py
106
- - spec/fixtures/repl2.cljs
107
- - spec/spec_helper.rb
108
- - spec/string_method_spec.rb
109
- has_rdoc: true
91
+ - test/converter_test.rb
92
+ - test/encoding_detector_test.rb
93
+ - test/fixtures/AnsiGraph.psm1
94
+ - test/fixtures/TwigExtensionsDate.es.yml
95
+ - test/fixtures/cl-messagepack.lisp
96
+ - test/fixtures/core.rkt
97
+ - test/fixtures/hello_world
98
+ - test/fixtures/laholator.py
99
+ - test/fixtures/repl2.cljs
100
+ - test/helper.rb
101
+ - test/string_methods_test.rb
102
+ - test/transliterator_test.rb
110
103
  homepage: http://github.com/brianmario/charlock_holmes
111
104
  licenses: []
112
-
113
105
  post_install_message:
114
- rdoc_options:
106
+ rdoc_options:
115
107
  - --charset=UTF-8
116
- require_paths:
108
+ require_paths:
117
109
  - lib
118
- required_ruby_version: !ruby/object:Gem::Requirement
110
+ required_ruby_version: !ruby/object:Gem::Requirement
119
111
  none: false
120
- requirements:
121
- - - ">="
122
- - !ruby/object:Gem::Version
123
- hash: 3
124
- segments:
125
- - 0
126
- version: "0"
127
- required_rubygems_version: !ruby/object:Gem::Requirement
112
+ requirements:
113
+ - - ! '>='
114
+ - !ruby/object:Gem::Version
115
+ version: '0'
116
+ required_rubygems_version: !ruby/object:Gem::Requirement
128
117
  none: false
129
- requirements:
130
- - - ">="
131
- - !ruby/object:Gem::Version
132
- hash: 3
133
- segments:
134
- - 0
135
- version: "0"
118
+ requirements:
119
+ - - ! '>='
120
+ - !ruby/object:Gem::Version
121
+ version: '0'
136
122
  requirements: []
137
-
138
123
  rubyforge_project:
139
- rubygems_version: 1.6.2
124
+ rubygems_version: 1.8.23
140
125
  signing_key:
141
126
  specification_version: 3
142
127
  summary: Character encoding detection, brought to you by ICU
143
- test_files:
144
- - spec/converter_spec.rb
145
- - spec/encoding_detector_spec.rb
146
- - spec/fixtures/AnsiGraph.psm1
147
- - spec/fixtures/TwigExtensionsDate.es.yml
148
- - spec/fixtures/cl-messagepack.lisp
149
- - spec/fixtures/core.rkt
150
- - spec/fixtures/hello_world
151
- - spec/fixtures/laholator.py
152
- - spec/fixtures/repl2.cljs
153
- - spec/spec_helper.rb
154
- - spec/string_method_spec.rb
128
+ test_files: []
data/.rspec DELETED
@@ -1,3 +0,0 @@
1
- --color
2
- --format=documentation
3
- --fail-fast
@@ -1,9 +0,0 @@
1
- $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
-
3
- require 'charlock_holmes'
4
- require 'rspec'
5
-
6
- RSpec.configure do |config|
7
- config.expect_with :stdlib
8
- config.alias_example_to :test
9
- end