charlock_holmes 0.7.1 → 0.7.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 14eb002e3883172c0808434587c7c7198e862fbe
4
- data.tar.gz: ec27785739f9e4c3ca5c3c2987cf7d214415db36
3
+ metadata.gz: 53abf00f6c72c2ac1339b3f856011bed111b9ad4
4
+ data.tar.gz: c17048fa5ddf8c5c37f1378653e0bdcea6849e7a
5
5
  SHA512:
6
- metadata.gz: 33d8ce4b9bbd9408459c3ab6dd4e834352b183919a45b910d74472fb2d38b898d4898cb5d2f718768192cd4b92bb102795c39784aa1238e327419051c5b47b85
7
- data.tar.gz: 7f7fdcfb7c1996bf8e451cf536410d89b9c88d5bd956c292a7716f5fe119264406cacbd7872af38b2a66613233c88cc98166977da766f63398326d3176b8f945
6
+ metadata.gz: 4f97cf5d2bca0e320e1eb54efd77bd4efed5add1621a4c9dcd62e6ec0c1b0b0834a30a5247341308974683e93a064044c2fb524b562d88dd26ead19694bf2121
7
+ data.tar.gz: 57ac5e9d12ae54f65387ef81f45afde88a8d4988f38f2647e2fbc3c88fe00fe094361f8eb26120f7bacea21024f73a74118bfc06a762d7fcc47caecfd5edc372
@@ -15,6 +15,9 @@ static VALUE rb_encdec_buildmatch(const UCharsetMatch *match)
15
15
  const char *mlang;
16
16
  int mconfidence;
17
17
  VALUE rb_match;
18
+ VALUE enc_tbl;
19
+ VALUE enc_name;
20
+ VALUE compat_enc;
18
21
 
19
22
  if (!match)
20
23
  return Qnil;
@@ -26,7 +29,16 @@ static VALUE rb_encdec_buildmatch(const UCharsetMatch *match)
26
29
  rb_match = rb_hash_new();
27
30
 
28
31
  rb_hash_aset(rb_match, ID2SYM(rb_intern("type")), ID2SYM(rb_intern("text")));
29
- rb_hash_aset(rb_match, ID2SYM(rb_intern("encoding")), charlock_new_str2(mname));
32
+
33
+ enc_name = charlock_new_str2(mname);
34
+ rb_hash_aset(rb_match, ID2SYM(rb_intern("encoding")), enc_name);
35
+
36
+ enc_tbl = rb_iv_get(rb_cEncodingDetector, "@encoding_table");
37
+ compat_enc = rb_hash_aref(enc_tbl, enc_name);
38
+ if (!NIL_P(compat_enc)) {
39
+ rb_hash_aset(rb_match, ID2SYM(rb_intern("ruby_encoding")), compat_enc);
40
+ }
41
+
30
42
  rb_hash_aset(rb_match, ID2SYM(rb_intern("confidence")), INT2NUM(mconfidence));
31
43
 
32
44
  if (mlang && mlang[0])
@@ -41,5 +41,36 @@ module CharlockHolmes
41
41
  def self.detect_all(str, hint_enc=nil)
42
42
  new.detect_all(str, hint_enc)
43
43
  end
44
+
45
+ # A mapping table of supported encoding names from EncodingDetector
46
+ # which point to the corresponding supported encoding name in Ruby.
47
+ # Like: {"UTF-8" => "UTF-8", "IBM420_rtl" => "ASCII-8BIT"}
48
+ #
49
+ # Note that encodings that can't be mapped between Charlock and Ruby will resolve
50
+ # to "ASCII-8BIT".
51
+ @encoding_table = {}
52
+
53
+ def self.encoding_table
54
+ @encoding_table
55
+ end
56
+
57
+ BINARY = 'binary'
58
+
59
+ # Builds the ENCODING_TABLE hash by running through the list of supported encodings
60
+ # in the ICU detection API and trying to map them to supported encodings in Ruby.
61
+ # This is built dynamically so as to take advantage of ICU upgrades which may have
62
+ # support for more encodings in the future.
63
+ #
64
+ # Returns nothing.
65
+ def self.build_encoding_table
66
+ supported_encodings.each do |name|
67
+ @encoding_table[name] = begin
68
+ ::Encoding.find(name).name
69
+ rescue ArgumentError
70
+ BINARY
71
+ end
72
+ end
73
+ end
74
+ build_encoding_table
44
75
  end
45
76
  end
@@ -26,7 +26,7 @@ class String
26
26
  # Returns: self
27
27
  def detect_encoding!(hint_enc=nil)
28
28
  if detected = self.detect_encoding(hint_enc)
29
- self.force_encoding(detected[:encoding]) if detected[:encoding]
29
+ self.force_encoding(detected[:ruby_encoding]) if detected[:ruby_encoding]
30
30
  end
31
31
  self
32
32
  end
@@ -1,3 +1,3 @@
1
1
  module CharlockHolmes
2
- VERSION = "0.7.1"
2
+ VERSION = "0.7.2"
3
3
  end
@@ -89,6 +89,17 @@ class EncodingDetectorTest < MiniTest::Test
89
89
  assert supported_encodings.include? 'UTF-8'
90
90
  end
91
91
 
92
+ def test_returns_a_ruby_compatible_encoding_name
93
+ detected = @detector.detect 'test'
94
+ assert_equal 'ISO-8859-1', detected[:encoding]
95
+ assert_equal 'ISO-8859-1', detected[:ruby_encoding]
96
+
97
+ not_compat_txt = fixture("ISO-2022-KR.txt").read
98
+ detected = @detector.detect not_compat_txt
99
+ assert_equal 'ISO-2022-KR', detected[:encoding]
100
+ assert_equal 'binary', detected[:ruby_encoding]
101
+ end
102
+
92
103
  MAPPING = [
93
104
  ['repl2.cljs', 'ISO-8859-1', :text],
94
105
  ['cl-messagepack.lisp', 'ISO-8859-1', :text],
@@ -114,8 +125,7 @@ class EncodingDetectorTest < MiniTest::Test
114
125
  MAPPING.each do |mapping|
115
126
  file, encoding, type = mapping
116
127
 
117
- path = File.expand_path "../fixtures/#{file}", __FILE__
118
- content = File.read path
128
+ content = fixture(file).read
119
129
  guessed = @detector.detect content
120
130
 
121
131
  assert_equal encoding, guessed[:encoding]
@@ -0,0 +1,43 @@
1
+ $)C#
2
+ # Out-AnsiGraph.psm1
3
+ # Author: xcud
4
+ # History:
5
+ # v0.1 September 21, 2009 initial version
6
+ #
7
+ # PS Example> ps | select -first 5 | sort -property VM |
8
+ # Out-AnsiGraph ProcessName, VM
9
+ # AEADISRV  14508032
10
+ # audiodg  50757632
11
+ # conhost  73740288
12
+ # AppleMobileDeviceService  92061696
13
+ # btdna  126443520
14
+ #
15
+ function Out-AnsiGraph($Parameter1=$null) {
16
+ BEGIN {
17
+ $q = new-object Collections.queue
18
+ $max = 0; $namewidth = 0;
19
+ }
20
+
21
+ PROCESS {
22
+ if($_) {
23
+ $name = $_.($Parameter1[0]);
24
+ $val = $_.($Parameter1[1])
25
+ if($max -lt $val) { $max = $val}
26
+ if($namewidth -lt $name.length) {
27
+ $namewidth = $name.length }
28
+ $q.enqueue(@($name, $val))
29
+ }
30
+ }
31
+
32
+ END {
33
+ $q | %{
34
+ $graph = ""; 0..($_[1]/$max*20) |
35
+ %{ $graph += "" }
36
+ $name = "{0,$namewidth}" -f $_[0]
37
+ "$name $graph " + $_[1]
38
+ }
39
+
40
+ }
41
+ }
42
+
43
+ Export-ModuleMember Out-AnsiGraph
@@ -16,6 +16,11 @@ else
16
16
  Minitest::Test = MiniTest::Unit::TestCase
17
17
  end
18
18
 
19
+ def fixture(name)
20
+ path = File.expand_path "../fixtures/#{name}", __FILE__
21
+ File.new path
22
+ end
23
+
19
24
  # put lib and test dirs directly on load path
20
25
  $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
21
- $LOAD_PATH.unshift File.expand_path('..', __FILE__)
26
+ $LOAD_PATH.unshift File.expand_path('..', __FILE__)
@@ -40,7 +40,18 @@ class StringMethodsTest < MiniTest::Test
40
40
  assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
41
41
  end
42
42
 
43
- if RUBY_VERSION =~ /1.9/
43
+ def test_returns_a_ruby_compatible_encoding_name
44
+ detected = 'test'.detect_encoding
45
+ assert_equal 'ISO-8859-1', detected[:encoding]
46
+ assert_equal 'ISO-8859-1', detected[:ruby_encoding]
47
+
48
+ not_compat_txt = fixture("ISO-2022-KR.txt").read
49
+ detected = not_compat_txt.detect_encoding
50
+ assert_equal 'ISO-2022-KR', detected[:encoding]
51
+ assert_equal 'binary', detected[:ruby_encoding]
52
+ end
53
+
54
+ if "".respond_to? :force_encoding
44
55
  def test_adds_detect_encoding_bang_method
45
56
  str = 'test'
46
57
  str.respond_to? :detect_encoding!
@@ -48,5 +59,15 @@ class StringMethodsTest < MiniTest::Test
48
59
  str.detect_encoding!
49
60
  assert_equal Encoding.find('ISO-8859-1'), str.encoding
50
61
  end
62
+
63
+ def test_sets_a_ruby_compatible_encoding_name
64
+ str1 = 'test'
65
+ str1.detect_encoding!
66
+ assert_equal 'ISO-8859-1', str1.encoding.name
67
+
68
+ not_compat_txt = fixture("ISO-2022-KR.txt").read
69
+ not_compat_txt.detect_encoding!
70
+ assert_equal 'ASCII-8BIT', not_compat_txt.encoding.name
71
+ end
51
72
  end
52
- end
73
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: charlock_holmes
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.1
4
+ version: 0.7.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Lopez
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-05-12 00:00:00.000000000 Z
12
+ date: 2014-06-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake-compiler
@@ -82,6 +82,7 @@ files:
82
82
  - test/converter_test.rb
83
83
  - test/encoding_detector_test.rb
84
84
  - test/fixtures/AnsiGraph.psm1
85
+ - test/fixtures/ISO-2022-KR.txt
85
86
  - test/fixtures/TwigExtensionsDate.es.yml
86
87
  - test/fixtures/cl-messagepack.lisp
87
88
  - test/fixtures/core.rkt