charlock_holmes 0.6.9.1 → 0.6.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,24 +1,16 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- charlock_holmes (0.6.9.1)
4
+ charlock_holmes (0.6.9.2)
5
5
 
6
6
  GEM
7
7
  remote: http://rubygems.org/
8
8
  specs:
9
9
  chardet (0.9.0)
10
- diff-lcs (1.1.2)
10
+ minitest (4.6.2)
11
11
  rake (0.9.2)
12
12
  rake-compiler (0.7.9)
13
13
  rake
14
- rspec (2.6.0)
15
- rspec-core (~> 2.6.0)
16
- rspec-expectations (~> 2.6.0)
17
- rspec-mocks (~> 2.6.0)
18
- rspec-core (2.6.4)
19
- rspec-expectations (2.6.0)
20
- diff-lcs (~> 1.1.2)
21
- rspec-mocks (2.6.0)
22
14
 
23
15
  PLATFORMS
24
16
  ruby
@@ -26,5 +18,5 @@ PLATFORMS
26
18
  DEPENDENCIES
27
19
  chardet
28
20
  charlock_holmes!
21
+ minitest
29
22
  rake-compiler (>= 0.7.5)
30
- rspec (>= 2.0.0)
data/Rakefile CHANGED
@@ -1,23 +1,10 @@
1
- # rspec
2
- begin
3
- require 'rspec'
4
- require 'rspec/core/rake_task'
1
+ require 'rake/testtask'
5
2
 
6
- desc "Run all examples with RCov"
7
- RSpec::Core::RakeTask.new 'spec:rcov' do |t|
8
- t.rcov = true
9
- end
10
- RSpec::Core::RakeTask.new 'spec' do |t|
11
- t.verbose = true
12
- end
13
-
14
- task :default => :spec
15
- rescue LoadError
16
- puts "rspec, or one of its dependencies, is not available. Install it with: sudo gem install rspec"
3
+ Rake::TestTask.new do |t|
4
+ t.pattern = "test/**/*_test.rb"
17
5
  end
18
6
 
19
- # rake-compiler
20
- require 'rake' unless defined? Rake
7
+ task :default => :test
21
8
 
22
9
  gem 'rake-compiler', '>= 0.7.5'
23
10
  require "rake/extensiontask"
@@ -26,4 +13,4 @@ Rake::ExtensionTask.new 'charlock_holmes' do |ext|
26
13
  ext.lib_dir = File.join 'lib', 'charlock_holmes'
27
14
  end
28
15
 
29
- Rake::Task[:spec].prerequisites << :compile
16
+ Rake::Task[:test].prerequisites << :compile
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
19
19
 
20
20
  # tests
21
21
  s.add_development_dependency 'rake-compiler', ">= 0.7.5"
22
- s.add_development_dependency 'rspec', ">= 2.0.0"
22
+ s.add_development_dependency 'minitest'
23
23
  # benchmarks
24
24
  s.add_development_dependency 'chardet'
25
25
  end
@@ -274,15 +274,11 @@ static VALUE rb_encdec__alloc(VALUE klass)
274
274
  rb_raise(rb_eStandardError, "%s", u_errorName(status));
275
275
  }
276
276
 
277
- detector->magic = magic_open(0);
277
+ detector->magic = magic_open(MAGIC_NO_CHECK_SOFT);
278
278
  if (detector->magic == NULL) {
279
279
  rb_raise(rb_eStandardError, "%s", magic_error(detector->magic));
280
280
  }
281
281
 
282
- // load the libmagic database
283
- // NULL means use the default or whatever is specified by the MAGIC env var
284
- magic_load(detector->magic, NULL);
285
-
286
282
  return obj;
287
283
  }
288
284
 
@@ -2,6 +2,7 @@
2
2
 
3
3
  extern void _init_charlock_encoding_detector();
4
4
  extern void _init_charlock_converter();
5
+ extern void _init_charlock_transliterator();
5
6
 
6
7
  VALUE rb_mCharlockHolmes;
7
8
 
@@ -10,4 +11,5 @@ void Init_charlock_holmes() {
10
11
 
11
12
  _init_charlock_encoding_detector();
12
13
  _init_charlock_converter();
14
+ _init_charlock_transliterator();
13
15
  }
@@ -58,6 +58,7 @@ Dir.chdir("#{CWD}/src") do
58
58
  sys("tar zxvf #{src}")
59
59
  Dir.chdir(dir) do
60
60
  sys("./configure --prefix=#{CWD}/dst/ --disable-shared --enable-static --with-pic")
61
+ sys("patch -p0 < ../file-soft-check.patch")
61
62
  sys("make -C src install")
62
63
  sys("make -C magic install")
63
64
  end
@@ -0,0 +1,86 @@
1
+ #include "common.h"
2
+ #undef UChar
3
+
4
+ #include <unicode/translit.h>
5
+
6
+ extern "C" {
7
+
8
+ #ifdef HAVE_RUBY_ENCODING_H
9
+ #include <ruby/encoding.h>
10
+ static VALUE rb_eEncodingCompatibilityError;
11
+
12
+ static void check_utf8_encoding(VALUE str) {
13
+ static rb_encoding *_cached[3] = {NULL, NULL, NULL};
14
+ rb_encoding *enc;
15
+
16
+ if (_cached[0] == NULL) {
17
+ _cached[0] = rb_utf8_encoding();
18
+ _cached[1] = rb_usascii_encoding();
19
+ _cached[2] = rb_ascii8bit_encoding();
20
+ }
21
+
22
+ enc = rb_enc_get(str);
23
+ if (enc != _cached[0] && enc != _cached[1] && enc != _cached[2]) {
24
+ rb_raise(rb_eEncodingCompatibilityError,
25
+ "Input must be UTF-8 or US-ASCII, %s given", rb_enc_name(enc));
26
+ }
27
+ }
28
+
29
+ #else
30
+ static void check_utf8_encoding(VALUE str) {}
31
+ #endif
32
+
33
+ extern VALUE rb_mCharlockHolmes;
34
+ static VALUE rb_cTransliterator;
35
+
36
+ static VALUE rb_transliterator_transliterate(VALUE self, VALUE rb_txt, VALUE rb_id) {
37
+ UErrorCode status = U_ZERO_ERROR;
38
+ UParseError p_error;
39
+ Transliterator *trans;
40
+ const char *txt;
41
+ size_t txt_len;
42
+ const char *id;
43
+ size_t id_len;
44
+ UnicodeString *u_txt;
45
+ std::string result;
46
+ VALUE rb_out;
47
+
48
+ Check_Type(rb_txt, T_STRING);
49
+ Check_Type(rb_id, T_STRING);
50
+
51
+ check_utf8_encoding(rb_txt);
52
+ check_utf8_encoding(rb_id);
53
+
54
+ txt = RSTRING_PTR(rb_txt);
55
+ txt_len = RSTRING_LEN(rb_txt);
56
+ id = RSTRING_PTR(rb_id);
57
+ id_len = RSTRING_LEN(rb_id);
58
+
59
+ trans = Transliterator::createInstance(UnicodeString(id, id_len), UTRANS_FORWARD, p_error, status);
60
+ if(!U_SUCCESS(status)) {
61
+ rb_raise(rb_eArgError, "%s", u_errorName(status));
62
+ }
63
+
64
+ u_txt = new UnicodeString(txt, txt_len);
65
+ trans->transliterate(*u_txt);
66
+ result = u_txt->toUTF8String(result);
67
+
68
+ delete u_txt;
69
+ delete trans;
70
+
71
+ rb_out = charlock_new_str(result.data(), result.length());
72
+
73
+ return rb_out;
74
+ }
75
+
76
+ void _init_charlock_transliterator() {
77
+ #ifdef HAVE_RUBY_ENCODING_H
78
+ rb_eEncodingCompatibilityError = rb_const_get(rb_cEncoding, rb_intern("CompatibilityError"));
79
+ #endif
80
+
81
+ rb_cTransliterator = rb_define_class_under(rb_mCharlockHolmes, "Transliterator", rb_cObject);
82
+
83
+ rb_define_singleton_method(rb_cTransliterator, "transliterate", (VALUE(*)(...))rb_transliterator_transliterate, 2);
84
+ }
85
+
86
+ }
@@ -1,3 +1,3 @@
1
1
  module CharlockHolmes
2
- VERSION = "0.6.9.1"
2
+ VERSION = "0.6.9.2"
3
3
  end
@@ -1,9 +1,8 @@
1
1
  # encoding: utf-8
2
+ require File.expand_path("../helper", __FILE__)
2
3
 
3
- require 'spec_helper'
4
-
5
- describe CharlockHolmes::Converter do
6
- test 'is able to convert regular ascii content from ISO-8859-1 to UTF-16, and back again' do
4
+ class ConverterTest < MiniTest::Unit::TestCase
5
+ def test_convert_ascii_from_iso859_1_to_utf16_and_back
7
6
  input = 'test'
8
7
 
9
8
  output = CharlockHolmes::Converter.convert input, 'ISO-8859-1', 'UTF-16'
@@ -15,7 +14,7 @@ describe CharlockHolmes::Converter do
15
14
  assert input == output
16
15
  end
17
16
 
18
- test 'is able to convert UTF-8 content from UTF-8 to UTF-16, and back again' do
17
+ def test_convert_utf8_to_utf16_and_back
19
18
  input = 'λ, λ, λ'
20
19
 
21
20
  output = CharlockHolmes::Converter.convert input, 'UTF-8', 'UTF-16'
@@ -27,7 +26,7 @@ describe CharlockHolmes::Converter do
27
26
  assert input == output
28
27
  end
29
28
 
30
- test 'all params must be strings' do
29
+ def test_params_must_be_strings
31
30
  assert_raises TypeError do
32
31
  CharlockHolmes::Converter.convert nil, 'UTF-8', 'UTF-16'
33
32
  end
@@ -40,8 +39,10 @@ describe CharlockHolmes::Converter do
40
39
  CharlockHolmes::Converter.convert 'lol', 'UTF-8', nil
41
40
  end
42
41
 
43
- assert_nothing_raised do
42
+ begin
44
43
  CharlockHolmes::Converter.convert 'lol', 'UTF-8', 'UTF-16'
44
+ rescue Exception => e
45
+ assert_nil e, "#{e.class.name} raised, expected nothing"
45
46
  end
46
47
  end
47
48
  end
@@ -1,25 +1,24 @@
1
1
  # encoding: utf-8
2
+ require File.expand_path("../helper", __FILE__)
2
3
 
3
- require 'spec_helper'
4
-
5
- describe CharlockHolmes::EncodingDetector do
6
- before :all do
4
+ class EncodingDetectorTest < MiniTest::Unit::TestCase
5
+ def setup
7
6
  @detector = CharlockHolmes::EncodingDetector.new
8
7
  end
9
8
 
10
- test 'has a class-level detect method' do
9
+ def test_has_class_level_detect_method
11
10
  CharlockHolmes::EncodingDetector.respond_to? :detect
12
11
  detected = CharlockHolmes::EncodingDetector.detect 'test'
13
12
  assert_equal 'ISO-8859-1', detected[:encoding]
14
13
  end
15
14
 
16
- test 'has a class-level detect method that accepts an encoding hint' do
15
+ def test_class_level_detect_accepts_encoding_hint
17
16
  CharlockHolmes::EncodingDetector.respond_to? :detect
18
17
  detected = CharlockHolmes::EncodingDetector.detect 'test', 'UTF-8'
19
18
  assert_equal 'ISO-8859-1', detected[:encoding]
20
19
  end
21
20
 
22
- test 'has a class-level detect_all method' do
21
+ def test_has_class_level_detect_all_method
23
22
  CharlockHolmes::EncodingDetector.respond_to? :detect_all
24
23
  detected_list = CharlockHolmes::EncodingDetector.detect_all 'test'
25
24
  assert detected_list.is_a? Array
@@ -28,7 +27,7 @@ describe CharlockHolmes::EncodingDetector do
28
27
  assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
29
28
  end
30
29
 
31
- test 'has a class-level detect_all method that accepts an encoding hint' do
30
+ def test_class_level_detect_all_method_accepts_encoding_hint
32
31
  CharlockHolmes::EncodingDetector.respond_to? :detect_all
33
32
  detected_list = CharlockHolmes::EncodingDetector.detect_all 'test', 'UTF-8'
34
33
  assert detected_list.is_a? Array
@@ -37,19 +36,19 @@ describe CharlockHolmes::EncodingDetector do
37
36
  assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
38
37
  end
39
38
 
40
- test 'has a detect method' do
39
+ def test_has_detect_method
41
40
  @detector.respond_to? :detect
42
41
  detected = @detector.detect 'test'
43
42
  assert_equal 'ISO-8859-1', detected[:encoding]
44
43
  end
45
44
 
46
- test 'has a detect method that accepts an encoding hint' do
45
+ def test_detect_accepts_encoding_hint
47
46
  @detector.respond_to? :detect
48
47
  detected = @detector.detect 'test', 'UTF-8'
49
48
  assert_equal 'ISO-8859-1', detected[:encoding]
50
49
  end
51
50
 
52
- test 'has a detect_all method' do
51
+ def test_has_detect_all_method
53
52
  @detector.respond_to? :detect_all
54
53
  detected_list = @detector.detect_all 'test'
55
54
  assert detected_list.is_a? Array
@@ -58,7 +57,7 @@ describe CharlockHolmes::EncodingDetector do
58
57
  assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
59
58
  end
60
59
 
61
- test 'has a detect_all method that accepts an encoding hint' do
60
+ def test_detect_all_accepts_encoding_hint
62
61
  @detector.respond_to? :detect_all
63
62
  detected_list = @detector.detect_all 'test', 'UTF-8'
64
63
  assert detected_list.is_a? Array
@@ -67,7 +66,7 @@ describe CharlockHolmes::EncodingDetector do
67
66
  assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
68
67
  end
69
68
 
70
- test 'has a strip_tags flag' do
69
+ def test_strip_tags_flag
71
70
  detector = CharlockHolmes::EncodingDetector.new
72
71
  detector.strip_tags = true
73
72
  assert detector.strip_tags
@@ -82,7 +81,7 @@ describe CharlockHolmes::EncodingDetector do
82
81
  assert_equal 'UTF-8', detection[:encoding]
83
82
  end
84
83
 
85
- test 'has a list of supported encodings' do
84
+ def test_has_list_of_supported_encodings
86
85
  CharlockHolmes::EncodingDetector.respond_to? :supported_encodings
87
86
  supported_encodings = CharlockHolmes::EncodingDetector.supported_encodings
88
87
 
@@ -90,32 +89,30 @@ describe CharlockHolmes::EncodingDetector do
90
89
  assert supported_encodings.include? 'UTF-8'
91
90
  end
92
91
 
93
- context 'encoding detection' do
94
- MAPPING = [
95
- ['repl2.cljs', 'ISO-8859-1', :text],
96
- ['core.rkt', 'UTF-8', :text],
97
- ['cl-messagepack.lisp', 'ISO-8859-1', :text],
98
- ['TwigExtensionsDate.es.yml', 'UTF-8', :text],
99
- ['AnsiGraph.psm1', 'UTF-16LE', :text],
100
- ['laholator.py', 'UTF-8', :text],
101
- ['hello_world', nil, :binary]
102
- ]
103
-
92
+ MAPPING = [
93
+ ['repl2.cljs', 'ISO-8859-1', :text],
94
+ ['core.rkt', 'UTF-8', :text],
95
+ ['cl-messagepack.lisp', 'ISO-8859-1', :text],
96
+ ['TwigExtensionsDate.es.yml', 'UTF-8', :text],
97
+ ['AnsiGraph.psm1', 'UTF-16LE', :text],
98
+ ['laholator.py', 'UTF-8', :text],
99
+ ['hello_world', nil, :binary]
100
+ ]
101
+
102
+ def test_detection_works_as_expected
104
103
  MAPPING.each do |mapping|
105
104
  file, encoding, type = mapping
106
105
 
107
- test "#{file} should be detected as #{encoding || 'binary'}" do
108
- path = File.expand_path "../fixtures/#{file}", __FILE__
109
- content = File.read path
110
- guessed = @detector.detect content
106
+ path = File.expand_path "../fixtures/#{file}", __FILE__
107
+ content = File.read path
108
+ guessed = @detector.detect content
111
109
 
112
- assert_equal encoding, guessed[:encoding]
113
- assert_equal type, guessed[:type]
110
+ assert_equal encoding, guessed[:encoding]
111
+ assert_equal type, guessed[:type]
114
112
 
115
- if content.respond_to?(:force_encoding) && guessed[:type] == :text
116
- content.force_encoding guessed[:encoding]
117
- assert content.valid_encoding?
118
- end
113
+ if content.respond_to?(:force_encoding) && guessed[:type] == :text
114
+ content.force_encoding guessed[:encoding]
115
+ assert content.valid_encoding?
119
116
  end
120
117
  end
121
118
  end
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -0,0 +1,14 @@
1
+ # Basic test environment.
2
+
3
+ # blah fuck this
4
+ require 'rubygems' if !defined?(Gem)
5
+ require 'bundler/setup'
6
+
7
+ require 'charlock_holmes'
8
+
9
+ # bring in minitest
10
+ require 'minitest/autorun'
11
+
12
+ # put lib and test dirs directly on load path
13
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
14
+ $LOAD_PATH.unshift File.expand_path('..', __FILE__)
@@ -1,8 +1,8 @@
1
- require 'spec_helper'
1
+ require File.expand_path("../helper", __FILE__)
2
2
  require 'charlock_holmes/string'
3
3
 
4
- describe String do
5
- test 'has a detect_encoding method' do
4
+ class StringMethodsTest < MiniTest::Unit::TestCase
5
+ def test_adds_detect_encoding_method
6
6
  str = 'test'
7
7
  str.respond_to? :detect_encoding
8
8
 
@@ -10,7 +10,7 @@ describe String do
10
10
  assert_equal 'ISO-8859-1', detected[:encoding]
11
11
  end
12
12
 
13
- test 'has a detect_encoding method that accepts an encoding hint' do
13
+ def test_detect_encoding_accepts_encoding_hint_param
14
14
  str = 'test'
15
15
  str.respond_to? :detect_encoding
16
16
 
@@ -18,7 +18,7 @@ describe String do
18
18
  assert_equal 'ISO-8859-1', detected[:encoding]
19
19
  end
20
20
 
21
- test 'has a detect_encodings method' do
21
+ def test_adds_detect_encodings_method
22
22
  str = 'test'
23
23
  str.respond_to? :detect_encodings
24
24
 
@@ -29,7 +29,7 @@ describe String do
29
29
  assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
30
30
  end
31
31
 
32
- test 'has a detect_encodings method that accepts an encoding hint' do
32
+ def test_detect_encodings_accepts_encoding_hint_param
33
33
  str = 'test'
34
34
  str.respond_to? :detect_encodings
35
35
 
@@ -41,7 +41,7 @@ describe String do
41
41
  end
42
42
 
43
43
  if RUBY_VERSION =~ /1.9/
44
- test 'has a detect_encoding! method' do
44
+ def test_adds_detect_encoding_bang_method
45
45
  str = 'test'
46
46
  str.respond_to? :detect_encoding!
47
47
 
@@ -0,0 +1,119 @@
1
+ # encoding: utf-8
2
+ require File.expand_path("../helper", __FILE__)
3
+
4
+ class TransliteratorTest < MiniTest::Unit::TestCase
5
+ DONT_CONVERT = [
6
+ "Vitrum edere possum; mihi non nocet.", # Latin
7
+ "Je puis mangier del voirre. Ne me nuit.", # Old French
8
+ "Kristala jan dezaket, ez dit minik ematen.", # Basque
9
+ "Kaya kong kumain nang bubog at hindi ako masaktan.", # Tagalog
10
+ "Ich kann Glas essen, ohne mir weh zu tun.", # German
11
+ "I can eat glass and it doesn't hurt me.", # English
12
+ ]
13
+
14
+ CONVERT_PAIRS = {
15
+ "Je peux manger du verre, ça ne me fait pas de mal." => # French
16
+ "Je peux manger du verre, ca ne me fait pas de mal.",
17
+ "Pot să mănânc sticlă și ea nu mă rănește." => # Romanian
18
+ "Pot sa mananc sticla si ea nu ma raneste.",
19
+ "Ég get etið gler án þess að meiða mig." => # Icelandic
20
+ "Eg get etid gler an thess ad meida mig.",
21
+ "Unë mund të ha qelq dhe nuk më gjen gjë." => # Albanian
22
+ "Une mund te ha qelq dhe nuk me gjen gje.",
23
+ "Mogę jeść szkło i mi nie szkodzi." => # Polish
24
+ "Moge jesc szklo i mi nie szkodzi.",
25
+ # "Я могу есть стекло, оно мне не вредит." => # Russian
26
+ # "Ia moghu iest' stieklo, ono mnie nie vriedit.",
27
+ # "Мога да ям стъкло, то не ми вреди." => # Bulgarian
28
+ # "Mogha da iam stklo, to nie mi vriedi.",
29
+ # "ᛁᚳ᛫ᛗᚨᚷ᛫ᚷᛚᚨᛋ᛫ᛖᚩᛏᚪᚾ᛫ᚩᚾᛞ᛫ᚻᛁᛏ᛫ᚾᛖ᛫ᚻᛖᚪᚱᛗᛁᚪᚧ᛫ᛗᛖ᛬" => # Anglo-Saxon
30
+ # "ic.mag.glas.eotacn.ond.hit.ne.heacrmiacth.me:",
31
+ # "ὕαλον ϕαγεῖν δύναμαι· τοῦτο οὔ με βλάπτει" => # Classical Greek
32
+ # "ualon phagein dunamai; touto ou me blaptei",
33
+ # "मैं काँच खा सकता हूँ और मुझे उससे कोई चोट नहीं पहुंचती" => # Hindi
34
+ # "maiN kaaNc khaa sktaa huuN aur mujhe usse koii cott nhiiN phuNctii",
35
+ # "من می توانم بدونِ احساس درد شيشه بخورم" => # Persian
36
+ # "mn my twnm bdwni Hss drd shyshh bkhwrm",
37
+ # "أنا قادر على أكل الزجاج و هذا لا يؤلمن" => # Arabic
38
+ # "'n qdr 'l~ 'kl lzjj w hdh l yw'lmn",
39
+ # "אני יכול לאכול זכוכית וזה לא מזיק לי" => # Hebrew
40
+ # "ny ykvl lkvl zkvkyt vzh l mzyq ly",
41
+ # "ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ" => # Thai
42
+ # "chankinkracchkaid aetmanaimthamaihchanecchb",
43
+ # "我能吞下玻璃而不伤身体。" => # Chinese
44
+ # "Wo Neng Tun Xia Bo Li Er Bu Shang Shen Ti . ",
45
+ # "私はガラスを食べられます。それは私を傷つけません。" => # Japanese
46
+ # "Si hagarasuwoShi beraremasu. sorehaSi woShang tukemasen. ",
47
+ # "⠋⠗⠁⠝⠉⠑" => # Braille
48
+ # "france",
49
+ "Schloß - Assunção - Łódź" =>
50
+ "Schloss - Assuncao - Lodz",
51
+ "TÜM GOLLER Fb 4-1 Bursa Maç Özeti Íƶle" =>
52
+ "TUM GOLLER Fb 4-1 Bursa Mac Ozeti Izle",
53
+ "ßßßßß" => "ssssssssss"
54
+ }
55
+
56
+ def test_transliterate
57
+ trans_id = "Any-NFD; Any-Latin; Latin-ASCII; Any-NFC"
58
+
59
+ DONT_CONVERT.each do |subject|
60
+ assert_equal subject, trans(subject, trans_id)
61
+ end
62
+
63
+ CONVERT_PAIRS.each do |before, after|
64
+ assert_equal after, trans(before, trans_id)
65
+ end
66
+ end
67
+
68
+ if "".respond_to? :force_encoding
69
+ def test_transliterate_id_must_be_utf8_or_ascii
70
+ trans_id = "Any-NFD; Any-Latin; Latin-ASCII; Any-NFC".force_encoding('big5')
71
+ txt = "blah blah blah"
72
+
73
+ assert_raises Encoding::CompatibilityError do
74
+ trans(txt, trans_id)
75
+ end
76
+
77
+ trans_id.force_encoding('UTF-8')
78
+ begin
79
+ trans(txt, trans_id)
80
+ rescue Encoding::CompatibilityError => e
81
+ assert_nil e, "#{e.class.name} raised, expected not to"
82
+ end
83
+
84
+ trans_id.force_encoding('US-ASCII')
85
+ begin
86
+ trans(txt, trans_id)
87
+ rescue Encoding::CompatibilityError => e
88
+ assert_nil e, "#{e.class.name} raised, expected not to"
89
+ end
90
+ end
91
+
92
+ def test_transliterate_text_must_be_utf8_or_ascii
93
+ trans_id = "Any-NFD; Any-Latin; Latin-ASCII; Any-NFC"
94
+ txt = "blah blah blah".force_encoding('big5')
95
+
96
+ assert_raises Encoding::CompatibilityError do
97
+ trans(txt, trans_id)
98
+ end
99
+
100
+ txt.force_encoding('UTF-8')
101
+ begin
102
+ trans(txt, trans_id)
103
+ rescue Encoding::CompatibilityError => e
104
+ assert_nil e, "#{e.class.name} raised, expected not to"
105
+ end
106
+
107
+ txt.force_encoding('US-ASCII')
108
+ begin
109
+ trans(txt, trans_id)
110
+ rescue Encoding::CompatibilityError => e
111
+ assert_nil e, "#{e.class.name} raised, expected not to"
112
+ end
113
+ end
114
+ end
115
+
116
+ def trans(text, id)
117
+ CharlockHolmes::Transliterator.transliterate(text, id)
118
+ end
119
+ end
metadata CHANGED
@@ -1,82 +1,73 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: charlock_holmes
3
- version: !ruby/object:Gem::Version
4
- hash: 89
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.6.9.2
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 6
9
- - 9
10
- - 1
11
- version: 0.6.9.1
12
6
  platform: ruby
13
- authors:
7
+ authors:
14
8
  - Brian Lopez
15
- - "Vicent Mart\xC3\xAD"
9
+ - Vicent Martí
16
10
  autorequire:
17
11
  bindir: bin
18
12
  cert_chain: []
19
-
20
- date: 2013-01-31 00:00:00 -08:00
21
- default_executable:
22
- dependencies:
23
- - !ruby/object:Gem::Dependency
13
+ date: 2013-03-20 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
24
16
  name: rake-compiler
25
- prerelease: false
26
- requirement: &id001 !ruby/object:Gem::Requirement
17
+ requirement: !ruby/object:Gem::Requirement
27
18
  none: false
28
- requirements:
29
- - - ">="
30
- - !ruby/object:Gem::Version
31
- hash: 9
32
- segments:
33
- - 0
34
- - 7
35
- - 5
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
36
22
  version: 0.7.5
37
23
  type: :development
38
- version_requirements: *id001
39
- - !ruby/object:Gem::Dependency
40
- name: rspec
41
24
  prerelease: false
42
- requirement: &id002 !ruby/object:Gem::Requirement
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - ! '>='
29
+ - !ruby/object:Gem::Version
30
+ version: 0.7.5
31
+ - !ruby/object:Gem::Dependency
32
+ name: minitest
33
+ requirement: !ruby/object:Gem::Requirement
43
34
  none: false
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- hash: 15
48
- segments:
49
- - 2
50
- - 0
51
- - 0
52
- version: 2.0.0
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
53
39
  type: :development
54
- version_requirements: *id002
55
- - !ruby/object:Gem::Dependency
56
- name: chardet
57
40
  prerelease: false
58
- requirement: &id003 !ruby/object:Gem::Requirement
41
+ version_requirements: !ruby/object:Gem::Requirement
59
42
  none: false
60
- requirements:
61
- - - ">="
62
- - !ruby/object:Gem::Version
63
- hash: 3
64
- segments:
65
- - 0
66
- version: "0"
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ - !ruby/object:Gem::Dependency
48
+ name: chardet
49
+ requirement: !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
67
55
  type: :development
68
- version_requirements: *id003
56
+ prerelease: false
57
+ version_requirements: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
69
63
  description:
70
64
  email: seniorlopez@gmail.com
71
65
  executables: []
72
-
73
- extensions:
66
+ extensions:
74
67
  - ext/charlock_holmes/extconf.rb
75
68
  extra_rdoc_files: []
76
-
77
- files:
69
+ files:
78
70
  - .gitignore
79
- - .rspec
80
71
  - Gemfile
81
72
  - Gemfile.lock
82
73
  - MIT-LICENSE
@@ -91,64 +82,47 @@ files:
91
82
  - ext/charlock_holmes/ext.c
92
83
  - ext/charlock_holmes/extconf.rb
93
84
  - ext/charlock_holmes/src/file-5.08.tar.gz
85
+ - ext/charlock_holmes/src/file-soft-check.patch
86
+ - ext/charlock_holmes/transliterator.cpp
94
87
  - lib/charlock_holmes.rb
95
88
  - lib/charlock_holmes/encoding_detector.rb
96
89
  - lib/charlock_holmes/string.rb
97
90
  - lib/charlock_holmes/version.rb
98
- - spec/converter_spec.rb
99
- - spec/encoding_detector_spec.rb
100
- - spec/fixtures/AnsiGraph.psm1
101
- - spec/fixtures/TwigExtensionsDate.es.yml
102
- - spec/fixtures/cl-messagepack.lisp
103
- - spec/fixtures/core.rkt
104
- - spec/fixtures/hello_world
105
- - spec/fixtures/laholator.py
106
- - spec/fixtures/repl2.cljs
107
- - spec/spec_helper.rb
108
- - spec/string_method_spec.rb
109
- has_rdoc: true
91
+ - test/converter_test.rb
92
+ - test/encoding_detector_test.rb
93
+ - test/fixtures/AnsiGraph.psm1
94
+ - test/fixtures/TwigExtensionsDate.es.yml
95
+ - test/fixtures/cl-messagepack.lisp
96
+ - test/fixtures/core.rkt
97
+ - test/fixtures/hello_world
98
+ - test/fixtures/laholator.py
99
+ - test/fixtures/repl2.cljs
100
+ - test/helper.rb
101
+ - test/string_methods_test.rb
102
+ - test/transliterator_test.rb
110
103
  homepage: http://github.com/brianmario/charlock_holmes
111
104
  licenses: []
112
-
113
105
  post_install_message:
114
- rdoc_options:
106
+ rdoc_options:
115
107
  - --charset=UTF-8
116
- require_paths:
108
+ require_paths:
117
109
  - lib
118
- required_ruby_version: !ruby/object:Gem::Requirement
110
+ required_ruby_version: !ruby/object:Gem::Requirement
119
111
  none: false
120
- requirements:
121
- - - ">="
122
- - !ruby/object:Gem::Version
123
- hash: 3
124
- segments:
125
- - 0
126
- version: "0"
127
- required_rubygems_version: !ruby/object:Gem::Requirement
112
+ requirements:
113
+ - - ! '>='
114
+ - !ruby/object:Gem::Version
115
+ version: '0'
116
+ required_rubygems_version: !ruby/object:Gem::Requirement
128
117
  none: false
129
- requirements:
130
- - - ">="
131
- - !ruby/object:Gem::Version
132
- hash: 3
133
- segments:
134
- - 0
135
- version: "0"
118
+ requirements:
119
+ - - ! '>='
120
+ - !ruby/object:Gem::Version
121
+ version: '0'
136
122
  requirements: []
137
-
138
123
  rubyforge_project:
139
- rubygems_version: 1.6.2
124
+ rubygems_version: 1.8.23
140
125
  signing_key:
141
126
  specification_version: 3
142
127
  summary: Character encoding detection, brought to you by ICU
143
- test_files:
144
- - spec/converter_spec.rb
145
- - spec/encoding_detector_spec.rb
146
- - spec/fixtures/AnsiGraph.psm1
147
- - spec/fixtures/TwigExtensionsDate.es.yml
148
- - spec/fixtures/cl-messagepack.lisp
149
- - spec/fixtures/core.rkt
150
- - spec/fixtures/hello_world
151
- - spec/fixtures/laholator.py
152
- - spec/fixtures/repl2.cljs
153
- - spec/spec_helper.rb
154
- - spec/string_method_spec.rb
128
+ test_files: []
data/.rspec DELETED
@@ -1,3 +0,0 @@
1
- --color
2
- --format=documentation
3
- --fail-fast
@@ -1,9 +0,0 @@
1
- $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
-
3
- require 'charlock_holmes'
4
- require 'rspec'
5
-
6
- RSpec.configure do |config|
7
- config.expect_with :stdlib
8
- config.alias_example_to :test
9
- end