charlock_holmes 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 14eb002e3883172c0808434587c7c7198e862fbe
4
- data.tar.gz: ec27785739f9e4c3ca5c3c2987cf7d214415db36
3
+ metadata.gz: 53abf00f6c72c2ac1339b3f856011bed111b9ad4
4
+ data.tar.gz: c17048fa5ddf8c5c37f1378653e0bdcea6849e7a
5
5
  SHA512:
6
- metadata.gz: 33d8ce4b9bbd9408459c3ab6dd4e834352b183919a45b910d74472fb2d38b898d4898cb5d2f718768192cd4b92bb102795c39784aa1238e327419051c5b47b85
7
- data.tar.gz: 7f7fdcfb7c1996bf8e451cf536410d89b9c88d5bd956c292a7716f5fe119264406cacbd7872af38b2a66613233c88cc98166977da766f63398326d3176b8f945
6
+ metadata.gz: 4f97cf5d2bca0e320e1eb54efd77bd4efed5add1621a4c9dcd62e6ec0c1b0b0834a30a5247341308974683e93a064044c2fb524b562d88dd26ead19694bf2121
7
+ data.tar.gz: 57ac5e9d12ae54f65387ef81f45afde88a8d4988f38f2647e2fbc3c88fe00fe094361f8eb26120f7bacea21024f73a74118bfc06a762d7fcc47caecfd5edc372
@@ -15,6 +15,9 @@ static VALUE rb_encdec_buildmatch(const UCharsetMatch *match)
15
15
  const char *mlang;
16
16
  int mconfidence;
17
17
  VALUE rb_match;
18
+ VALUE enc_tbl;
19
+ VALUE enc_name;
20
+ VALUE compat_enc;
18
21
 
19
22
  if (!match)
20
23
  return Qnil;
@@ -26,7 +29,16 @@ static VALUE rb_encdec_buildmatch(const UCharsetMatch *match)
26
29
  rb_match = rb_hash_new();
27
30
 
28
31
  rb_hash_aset(rb_match, ID2SYM(rb_intern("type")), ID2SYM(rb_intern("text")));
29
- rb_hash_aset(rb_match, ID2SYM(rb_intern("encoding")), charlock_new_str2(mname));
32
+
33
+ enc_name = charlock_new_str2(mname);
34
+ rb_hash_aset(rb_match, ID2SYM(rb_intern("encoding")), enc_name);
35
+
36
+ enc_tbl = rb_iv_get(rb_cEncodingDetector, "@encoding_table");
37
+ compat_enc = rb_hash_aref(enc_tbl, enc_name);
38
+ if (!NIL_P(compat_enc)) {
39
+ rb_hash_aset(rb_match, ID2SYM(rb_intern("ruby_encoding")), compat_enc);
40
+ }
41
+
30
42
  rb_hash_aset(rb_match, ID2SYM(rb_intern("confidence")), INT2NUM(mconfidence));
31
43
 
32
44
  if (mlang && mlang[0])
@@ -41,5 +41,36 @@ module CharlockHolmes
41
41
  def self.detect_all(str, hint_enc=nil)
42
42
  new.detect_all(str, hint_enc)
43
43
  end
44
+
45
+ # A mapping table of supported encoding names from EncodingDetector
46
+ # which point to the corresponding supported encoding name in Ruby.
47
+ # Like: {"UTF-8" => "UTF-8", "IBM420_rtl" => "ASCII-8BIT"}
48
+ #
49
+ # Note that encodings that can't be mapped between Charlock and Ruby will resolve
50
+ # to "ASCII-8BIT".
51
+ @encoding_table = {}
52
+
53
+ def self.encoding_table
54
+ @encoding_table
55
+ end
56
+
57
+ BINARY = 'binary'
58
+
59
+ # Builds the ENCODING_TABLE hash by running through the list of supported encodings
60
+ # in the ICU detection API and trying to map them to supported encodings in Ruby.
61
+ # This is built dynamically so as to take advantage of ICU upgrades which may have
62
+ # support for more encodings in the future.
63
+ #
64
+ # Returns nothing.
65
+ def self.build_encoding_table
66
+ supported_encodings.each do |name|
67
+ @encoding_table[name] = begin
68
+ ::Encoding.find(name).name
69
+ rescue ArgumentError
70
+ BINARY
71
+ end
72
+ end
73
+ end
74
+ build_encoding_table
44
75
  end
45
76
  end
@@ -26,7 +26,7 @@ class String
26
26
  # Returns: self
27
27
  def detect_encoding!(hint_enc=nil)
28
28
  if detected = self.detect_encoding(hint_enc)
29
- self.force_encoding(detected[:encoding]) if detected[:encoding]
29
+ self.force_encoding(detected[:ruby_encoding]) if detected[:ruby_encoding]
30
30
  end
31
31
  self
32
32
  end
@@ -1,3 +1,3 @@
1
1
  module CharlockHolmes
2
- VERSION = "0.7.1"
2
+ VERSION = "0.7.2"
3
3
  end
@@ -89,6 +89,17 @@ class EncodingDetectorTest < MiniTest::Test
89
89
  assert supported_encodings.include? 'UTF-8'
90
90
  end
91
91
 
92
+ def test_returns_a_ruby_compatible_encoding_name
93
+ detected = @detector.detect 'test'
94
+ assert_equal 'ISO-8859-1', detected[:encoding]
95
+ assert_equal 'ISO-8859-1', detected[:ruby_encoding]
96
+
97
+ not_compat_txt = fixture("ISO-2022-KR.txt").read
98
+ detected = @detector.detect not_compat_txt
99
+ assert_equal 'ISO-2022-KR', detected[:encoding]
100
+ assert_equal 'binary', detected[:ruby_encoding]
101
+ end
102
+
92
103
  MAPPING = [
93
104
  ['repl2.cljs', 'ISO-8859-1', :text],
94
105
  ['cl-messagepack.lisp', 'ISO-8859-1', :text],
@@ -114,8 +125,7 @@ class EncodingDetectorTest < MiniTest::Test
114
125
  MAPPING.each do |mapping|
115
126
  file, encoding, type = mapping
116
127
 
117
- path = File.expand_path "../fixtures/#{file}", __FILE__
118
- content = File.read path
128
+ content = fixture(file).read
119
129
  guessed = @detector.detect content
120
130
 
121
131
  assert_equal encoding, guessed[:encoding]
@@ -0,0 +1,43 @@
1
+ $)C#
2
+ # Out-AnsiGraph.psm1
3
+ # Author: xcud
4
+ # History:
5
+ # v0.1 September 21, 2009 initial version
6
+ #
7
+ # PS Example> ps | select -first 5 | sort -property VM |
8
+ # Out-AnsiGraph ProcessName, VM
9
+ # AEADISRV  14508032
10
+ # audiodg  50757632
11
+ # conhost  73740288
12
+ # AppleMobileDeviceService  92061696
13
+ # btdna  126443520
14
+ #
15
+ function Out-AnsiGraph($Parameter1=$null) {
16
+ BEGIN {
17
+ $q = new-object Collections.queue
18
+ $max = 0; $namewidth = 0;
19
+ }
20
+
21
+ PROCESS {
22
+ if($_) {
23
+ $name = $_.($Parameter1[0]);
24
+ $val = $_.($Parameter1[1])
25
+ if($max -lt $val) { $max = $val}
26
+ if($namewidth -lt $name.length) {
27
+ $namewidth = $name.length }
28
+ $q.enqueue(@($name, $val))
29
+ }
30
+ }
31
+
32
+ END {
33
+ $q | %{
34
+ $graph = ""; 0..($_[1]/$max*20) |
35
+ %{ $graph += "" }
36
+ $name = "{0,$namewidth}" -f $_[0]
37
+ "$name $graph " + $_[1]
38
+ }
39
+
40
+ }
41
+ }
42
+
43
+ Export-ModuleMember Out-AnsiGraph
@@ -16,6 +16,11 @@ else
16
16
  Minitest::Test = MiniTest::Unit::TestCase
17
17
  end
18
18
 
19
+ def fixture(name)
20
+ path = File.expand_path "../fixtures/#{name}", __FILE__
21
+ File.new path
22
+ end
23
+
19
24
  # put lib and test dirs directly on load path
20
25
  $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
21
- $LOAD_PATH.unshift File.expand_path('..', __FILE__)
26
+ $LOAD_PATH.unshift File.expand_path('..', __FILE__)
@@ -40,7 +40,18 @@ class StringMethodsTest < MiniTest::Test
40
40
  assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
41
41
  end
42
42
 
43
- if RUBY_VERSION =~ /1.9/
43
+ def test_returns_a_ruby_compatible_encoding_name
44
+ detected = 'test'.detect_encoding
45
+ assert_equal 'ISO-8859-1', detected[:encoding]
46
+ assert_equal 'ISO-8859-1', detected[:ruby_encoding]
47
+
48
+ not_compat_txt = fixture("ISO-2022-KR.txt").read
49
+ detected = not_compat_txt.detect_encoding
50
+ assert_equal 'ISO-2022-KR', detected[:encoding]
51
+ assert_equal 'binary', detected[:ruby_encoding]
52
+ end
53
+
54
+ if "".respond_to? :force_encoding
44
55
  def test_adds_detect_encoding_bang_method
45
56
  str = 'test'
46
57
  str.respond_to? :detect_encoding!
@@ -48,5 +59,15 @@ class StringMethodsTest < MiniTest::Test
48
59
  str.detect_encoding!
49
60
  assert_equal Encoding.find('ISO-8859-1'), str.encoding
50
61
  end
62
+
63
+ def test_sets_a_ruby_compatible_encoding_name
64
+ str1 = 'test'
65
+ str1.detect_encoding!
66
+ assert_equal 'ISO-8859-1', str1.encoding.name
67
+
68
+ not_compat_txt = fixture("ISO-2022-KR.txt").read
69
+ not_compat_txt.detect_encoding!
70
+ assert_equal 'ASCII-8BIT', not_compat_txt.encoding.name
71
+ end
51
72
  end
52
- end
73
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: charlock_holmes
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.1
4
+ version: 0.7.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Lopez
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-05-12 00:00:00.000000000 Z
12
+ date: 2014-06-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake-compiler
@@ -82,6 +82,7 @@ files:
82
82
  - test/converter_test.rb
83
83
  - test/encoding_detector_test.rb
84
84
  - test/fixtures/AnsiGraph.psm1
85
+ - test/fixtures/ISO-2022-KR.txt
85
86
  - test/fixtures/TwigExtensionsDate.es.yml
86
87
  - test/fixtures/cl-messagepack.lisp
87
88
  - test/fixtures/core.rkt