coder 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,14 +1,25 @@
1
1
  require 'coder/version'
2
2
  require 'coder/cleaner'
3
+ require 'coder/error'
3
4
 
4
5
  module Coder
5
6
  extend self
7
+ DEFAULT_ENCODING = 'UTF-8'
6
8
 
7
- def clean(str, encoding = nil)
8
- Cleaner.new(encoding || 'UTF-8').clean(str)
9
+ def clean(str, encoding = DEFAULT_ENCODING)
10
+ Cleaner.new(encoding).clean(str)
9
11
  end
10
12
 
11
- def clean!(str, encoding = nil)
13
+ def clean!(str, encoding = DEFAULT_ENCODING)
12
14
  str.replace clean(str, encoding)
13
15
  end
16
+
17
+ def force_encoding!(str, encoding = DEFAULT_ENCODING)
18
+ return str unless str.respond_to? :force_encoding
19
+ str.force_encoding(encoding.to_s)
20
+ end
21
+
22
+ def force_encoding(str, encoding = DEFAULT_ENCODING)
23
+ force_encoding! str.dup, encoding
24
+ end
14
25
  end
@@ -2,6 +2,7 @@ require 'coder/cleaner/builtin'
2
2
  require 'coder/cleaner/iconv'
3
3
  require 'coder/cleaner/java'
4
4
  require 'coder/cleaner/simple'
5
+ require 'coder'
5
6
 
6
7
  module Coder
7
8
  module Cleaner
@@ -1,4 +1,4 @@
1
- require 'coder/error'
1
+ require 'coder'
2
2
 
3
3
  module Coder
4
4
  module Cleaner
@@ -1,3 +1,4 @@
1
+ require 'coder'
1
2
  require 'coder/error'
2
3
  require 'stringio'
3
4
 
@@ -31,15 +32,17 @@ module Coder
31
32
  end
32
33
 
33
34
  def initialize(encoding)
35
+ @encoding = encoding.to_s
34
36
  @nullbyte = "\0"
35
- @iconv = ::Iconv.new("#{encoding}//ignore", encoding.to_s)
36
- @nullbyte.encode! encoding if @nullbyte.respond_to? :encode!
37
+ @iconv = ::Iconv.new("#{encoding}//ignore", @encoding)
38
+ @nullbyte.encode! @encoding if @nullbyte.respond_to? :encode!
37
39
  rescue ::Iconv::InvalidEncoding => e
38
40
  raise Coder::InvalidEncoding, e.message
39
41
  end
40
42
 
41
43
  def clean(str)
42
- @iconv.iconv(str).gsub(@nullbyte, "")
44
+ string = @iconv.iconv(str).gsub(@nullbyte, "")
45
+ Coder.force_encoding! string, @encoding
43
46
  rescue ::Iconv::Failure => e
44
47
  raise Coder::Error, e.message
45
48
  end
@@ -1,3 +1,4 @@
1
+ require 'coder'
1
2
  require 'coder/error'
2
3
 
3
4
  module Coder
@@ -15,7 +16,7 @@ module Coder
15
16
  end
16
17
 
17
18
  def initialize(encoding)
18
- encoding = encoding.to_s.upcase
19
+ @encoding = encoding.to_s.upcase
19
20
  @nullbyte = "\0"
20
21
  @charset = ::Java::JavaNioCharset::Charset.for_name(encoding)
21
22
  @decoder = @charset.new_decoder
@@ -30,7 +31,8 @@ module Coder
30
31
 
31
32
  def clean(str)
32
33
  buffer = ::Java::JavaNio::ByteBuffer.wrap(str.to_java_bytes)
33
- @decoder.decode(buffer).to_s.gsub(@nullbyte, '')
34
+ string = @decoder.decode(buffer).to_s
35
+ Coder.force_encoding!(string, @encoding).gsub(@nullbyte, '')
34
36
  rescue Java::JavaLang::RuntimeException => e
35
37
  raise Coder::Error, e.message, e.backtrace
36
38
  end
@@ -1,4 +1,4 @@
1
- require 'coder/error'
1
+ require 'coder'
2
2
  require 'coder/cleaner/simple/byte_buffer'
3
3
  require 'coder/cleaner/simple/encodings'
4
4
 
@@ -25,14 +25,7 @@ module Coder
25
25
  def clean(str)
26
26
  bytes = ByteBuffer.new(@encoding)
27
27
  str.each_byte { |b| bytes << b }
28
- force_encoding bytes.to_s
29
- end
30
-
31
- private
32
-
33
- def force_encoding(str)
34
- return str unless str.respond_to? :force_encoding
35
- str.force_encoding(@name)
28
+ Coder.force_encoding!(bytes.to_s, @name)
36
29
  end
37
30
  end
38
31
  end
@@ -1,3 +1,3 @@
1
1
  module Coder
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
@@ -1,7 +1,5 @@
1
1
  # encoding: UTF-8
2
- require 'coder/cleaner'
3
- require 'coder/error'
4
- require 'support/clean_helpers'
2
+ require 'spec_helper'
5
3
 
6
4
  shared_examples Coder::Cleaner do
7
5
  encoding "UTF-8" do
@@ -11,12 +9,14 @@ shared_examples Coder::Cleaner do
11
9
  cleans "\0", ""
12
10
  cleans "{foo \xC3 'bar'}", "{foo 'bar'}"
13
11
  cleans "yummy\xE2 \xF0\x9F\x8D\x94 \x9F\x8D\x94", "yummy 🍔 "
12
+ sets_encoding
14
13
  end
15
14
 
16
15
  encoding "UCS-2BE" do
17
16
  cleans "\x00f\x00o\x00o"
18
17
  cleans "\x00f\x00ox", "\x00f\x00o"
19
18
  cleans "\x00f\x00o\x00\x00", "\x00f\x00o"
19
+ sets_encoding
20
20
  end
21
21
 
22
22
  encoding "UCS-4BE" do
@@ -24,6 +24,7 @@ shared_examples Coder::Cleaner do
24
24
  cleans "\x00\x00\x00f\x00\x00\x00o\x00\x00x", "\x00\x00\x00f\x00\x00\x00o"
25
25
  cleans "\x00\x00\x00f\x00\x00\x00o\x00\x00\x00\x00", "\x00\x00\x00f\x00\x00\x00o"
26
26
  cleans "\xFF\xFF\x10\x10", ""
27
+ sets_encoding
27
28
  end
28
29
 
29
30
  context "unknown encoding" do
@@ -0,0 +1,64 @@
1
+ # encoding: UTF-8
2
+ require 'spec_helper'
3
+
4
+ describe Coder do
5
+ describe :clean do
6
+ it 'cleans up UTF-8 strings without having to specify the encoding' do
7
+ Coder.clean("yummy\xE2 \xF0\x9F\x8D\x94 \x9F\x8D\x94").should binary_equal("yummy 🍔 ")
8
+ end
9
+
10
+ it 'allows specifying the encoding' do
11
+ Coder.clean("\x00f\x00ox", "UCS-2BE").should binary_equal("\x00f\x00o")
12
+ end
13
+ end
14
+
15
+ describe :force_encoding do
16
+ it 'returns a different string' do
17
+ str = ''
18
+ Coder.force_encoding(str).should_not be_equal(str)
19
+ end
20
+
21
+ it 'leaves the content untouched' do
22
+ Coder.force_encoding('foo').should be == 'foo'
23
+ end
24
+
25
+ if ''.respond_to? :force_encoding
26
+ it 'sets the encoding to UTF-8' do
27
+ Coder.force_encoding(''.encode('binary')).encoding.name.should be == 'UTF-8'
28
+ end
29
+
30
+ it 'allows specifying a different encoding' do
31
+ Coder.force_encoding(''.encode('binary'), 'UTF-16BE').encoding.name.should be == 'UTF-16BE'
32
+ end
33
+
34
+ it 'does not modify the encoding of the passed in string' do
35
+ str = ''.encode('binary')
36
+ Coder.force_encoding str
37
+ str.encoding.name.should be == 'ASCII-8BIT'
38
+ end
39
+ end
40
+ end
41
+
42
+ describe :force_encoding! do
43
+ it 'returns the string' do
44
+ str = ''
45
+ Coder.force_encoding!(str).should be_equal(str)
46
+ end
47
+
48
+ if ''.respond_to? :force_encoding
49
+ it 'sets the encoding to UTF-8' do
50
+ Coder.force_encoding!(''.encode('binary')).encoding.name.should be == 'UTF-8'
51
+ end
52
+
53
+ it 'allows specifying a different encoding' do
54
+ Coder.force_encoding!(''.encode('binary'), 'UTF-16BE').encoding.name.should be == 'UTF-16BE'
55
+ end
56
+
57
+ it 'modifies the encoding of the passed in string' do
58
+ str = ''.encode('binary')
59
+ Coder.force_encoding! str
60
+ str.encoding.name.should be == 'UTF-8'
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,3 @@
1
+ require 'coder'
2
+ require 'support/clean_helpers'
3
+ require 'support/binary_matcher'
@@ -0,0 +1,4 @@
1
+ RSpec::Matchers.define :binary_equal do |expected|
2
+ match { |actual| actual.bytes.to_a == expected.bytes.to_a }
3
+ diffable
4
+ end
@@ -3,22 +3,24 @@ module CleanHelpers
3
3
  def encoding(encoding, &block)
4
4
  return unless described_class.supports? encoding
5
5
  context(encoding) do
6
- let(:encoding) { encoding}
6
+ let(:encoding) { encoding }
7
+ subject { described_class.new(encoding) }
7
8
  instance_eval(&block)
8
9
  end
9
10
  end
10
11
 
11
12
  def cleans(from, to = from)
12
13
  it "cleans #{from.inspect} to #{to.inspect}" do
13
- result = described_class.new(encoding).clean(binary(from))
14
- binary(result).should be == binary(to)
14
+ subject.clean(from).should binary_equal(to)
15
15
  end
16
16
  end
17
- end
18
17
 
19
- def binary(str)
20
- return str unless str.respond_to? :force_encoding
21
- str.force_encoding('binary')
18
+ def sets_encoding
19
+ return unless ''.respond_to? :encoding
20
+ it 'sets encoding properly' do
21
+ subject.clean(''.encode('US-ASCII')).encoding.names.should include(encoding)
22
+ end
23
+ end
22
24
  end
23
25
 
24
26
  def support(encoding)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: coder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -69,6 +69,9 @@ files:
69
69
  - lib/coder/error.rb
70
70
  - lib/coder/version.rb
71
71
  - spec/coder/cleaner_spec.rb
72
+ - spec/coder_spec.rb
73
+ - spec/spec_helper.rb
74
+ - spec/support/binary_matcher.rb
72
75
  - spec/support/clean_helpers.rb
73
76
  homepage: http://github.com/rkh/coder
74
77
  licenses: []
@@ -96,5 +99,8 @@ specification_version: 3
96
99
  summary: library to handle encodings
97
100
  test_files:
98
101
  - spec/coder/cleaner_spec.rb
102
+ - spec/coder_spec.rb
103
+ - spec/spec_helper.rb
104
+ - spec/support/binary_matcher.rb
99
105
  - spec/support/clean_helpers.rb
100
106
  has_rdoc: