coder 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,25 @@
1
1
  require 'coder/version'
2
2
  require 'coder/cleaner'
3
+ require 'coder/error'
3
4
 
4
5
  module Coder
5
6
  extend self
7
+ DEFAULT_ENCODING = 'UTF-8'
6
8
 
7
- def clean(str, encoding = nil)
8
- Cleaner.new(encoding || 'UTF-8').clean(str)
9
+ def clean(str, encoding = DEFAULT_ENCODING)
10
+ Cleaner.new(encoding).clean(str)
9
11
  end
10
12
 
11
- def clean!(str, encoding = nil)
13
+ def clean!(str, encoding = DEFAULT_ENCODING)
12
14
  str.replace clean(str, encoding)
13
15
  end
16
+
17
+ def force_encoding!(str, encoding = DEFAULT_ENCODING)
18
+ return str unless str.respond_to? :force_encoding
19
+ str.force_encoding(encoding.to_s)
20
+ end
21
+
22
+ def force_encoding(str, encoding = DEFAULT_ENCODING)
23
+ force_encoding! str.dup, encoding
24
+ end
14
25
  end
@@ -2,6 +2,7 @@ require 'coder/cleaner/builtin'
2
2
  require 'coder/cleaner/iconv'
3
3
  require 'coder/cleaner/java'
4
4
  require 'coder/cleaner/simple'
5
+ require 'coder'
5
6
 
6
7
  module Coder
7
8
  module Cleaner
@@ -1,4 +1,4 @@
1
- require 'coder/error'
1
+ require 'coder'
2
2
 
3
3
  module Coder
4
4
  module Cleaner
@@ -1,3 +1,4 @@
1
+ require 'coder'
1
2
  require 'coder/error'
2
3
  require 'stringio'
3
4
 
@@ -31,15 +32,17 @@ module Coder
31
32
  end
32
33
 
33
34
  def initialize(encoding)
35
+ @encoding = encoding.to_s
34
36
  @nullbyte = "\0"
35
- @iconv = ::Iconv.new("#{encoding}//ignore", encoding.to_s)
36
- @nullbyte.encode! encoding if @nullbyte.respond_to? :encode!
37
+ @iconv = ::Iconv.new("#{encoding}//ignore", @encoding)
38
+ @nullbyte.encode! @encoding if @nullbyte.respond_to? :encode!
37
39
  rescue ::Iconv::InvalidEncoding => e
38
40
  raise Coder::InvalidEncoding, e.message
39
41
  end
40
42
 
41
43
  def clean(str)
42
- @iconv.iconv(str).gsub(@nullbyte, "")
44
+ string = @iconv.iconv(str).gsub(@nullbyte, "")
45
+ Coder.force_encoding! string, @encoding
43
46
  rescue ::Iconv::Failure => e
44
47
  raise Coder::Error, e.message
45
48
  end
@@ -1,3 +1,4 @@
1
+ require 'coder'
1
2
  require 'coder/error'
2
3
 
3
4
  module Coder
@@ -15,7 +16,7 @@ module Coder
15
16
  end
16
17
 
17
18
  def initialize(encoding)
18
- encoding = encoding.to_s.upcase
19
+ @encoding = encoding.to_s.upcase
19
20
  @nullbyte = "\0"
20
21
  @charset = ::Java::JavaNioCharset::Charset.for_name(encoding)
21
22
  @decoder = @charset.new_decoder
@@ -30,7 +31,8 @@ module Coder
30
31
 
31
32
  def clean(str)
32
33
  buffer = ::Java::JavaNio::ByteBuffer.wrap(str.to_java_bytes)
33
- @decoder.decode(buffer).to_s.gsub(@nullbyte, '')
34
+ string = @decoder.decode(buffer).to_s
35
+ Coder.force_encoding!(string, @encoding).gsub(@nullbyte, '')
34
36
  rescue Java::JavaLang::RuntimeException => e
35
37
  raise Coder::Error, e.message, e.backtrace
36
38
  end
@@ -1,4 +1,4 @@
1
- require 'coder/error'
1
+ require 'coder'
2
2
  require 'coder/cleaner/simple/byte_buffer'
3
3
  require 'coder/cleaner/simple/encodings'
4
4
 
@@ -25,14 +25,7 @@ module Coder
25
25
  def clean(str)
26
26
  bytes = ByteBuffer.new(@encoding)
27
27
  str.each_byte { |b| bytes << b }
28
- force_encoding bytes.to_s
29
- end
30
-
31
- private
32
-
33
- def force_encoding(str)
34
- return str unless str.respond_to? :force_encoding
35
- str.force_encoding(@name)
28
+ Coder.force_encoding!(bytes.to_s, @name)
36
29
  end
37
30
  end
38
31
  end
@@ -1,3 +1,3 @@
1
1
  module Coder
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
@@ -1,7 +1,5 @@
1
1
  # encoding: UTF-8
2
- require 'coder/cleaner'
3
- require 'coder/error'
4
- require 'support/clean_helpers'
2
+ require 'spec_helper'
5
3
 
6
4
  shared_examples Coder::Cleaner do
7
5
  encoding "UTF-8" do
@@ -11,12 +9,14 @@ shared_examples Coder::Cleaner do
11
9
  cleans "\0", ""
12
10
  cleans "{foo \xC3 'bar'}", "{foo 'bar'}"
13
11
  cleans "yummy\xE2 \xF0\x9F\x8D\x94 \x9F\x8D\x94", "yummy 🍔 "
12
+ sets_encoding
14
13
  end
15
14
 
16
15
  encoding "UCS-2BE" do
17
16
  cleans "\x00f\x00o\x00o"
18
17
  cleans "\x00f\x00ox", "\x00f\x00o"
19
18
  cleans "\x00f\x00o\x00\x00", "\x00f\x00o"
19
+ sets_encoding
20
20
  end
21
21
 
22
22
  encoding "UCS-4BE" do
@@ -24,6 +24,7 @@ shared_examples Coder::Cleaner do
24
24
  cleans "\x00\x00\x00f\x00\x00\x00o\x00\x00x", "\x00\x00\x00f\x00\x00\x00o"
25
25
  cleans "\x00\x00\x00f\x00\x00\x00o\x00\x00\x00\x00", "\x00\x00\x00f\x00\x00\x00o"
26
26
  cleans "\xFF\xFF\x10\x10", ""
27
+ sets_encoding
27
28
  end
28
29
 
29
30
  context "unknown encoding" do
@@ -0,0 +1,64 @@
1
+ # encoding: UTF-8
2
+ require 'spec_helper'
3
+
4
+ describe Coder do
5
+ describe :clean do
6
+ it 'cleans up UTF-8 strings without having to specify the encoding' do
7
+ Coder.clean("yummy\xE2 \xF0\x9F\x8D\x94 \x9F\x8D\x94").should binary_equal("yummy 🍔 ")
8
+ end
9
+
10
+ it 'allows specifying the encoding' do
11
+ Coder.clean("\x00f\x00ox", "UCS-2BE").should binary_equal("\x00f\x00o")
12
+ end
13
+ end
14
+
15
+ describe :force_encoding do
16
+ it 'returns a different string' do
17
+ str = ''
18
+ Coder.force_encoding(str).should_not be_equal(str)
19
+ end
20
+
21
+ it 'leaves the content untouched' do
22
+ Coder.force_encoding('foo').should be == 'foo'
23
+ end
24
+
25
+ if ''.respond_to? :force_encoding
26
+ it 'sets the encoding to UTF-8' do
27
+ Coder.force_encoding(''.encode('binary')).encoding.name.should be == 'UTF-8'
28
+ end
29
+
30
+ it 'allows specifying a different encoding' do
31
+ Coder.force_encoding(''.encode('binary'), 'UTF-16BE').encoding.name.should be == 'UTF-16BE'
32
+ end
33
+
34
+ it 'does not modify the encoding of the passed in string' do
35
+ str = ''.encode('binary')
36
+ Coder.force_encoding str
37
+ str.encoding.name.should be == 'ASCII-8BIT'
38
+ end
39
+ end
40
+ end
41
+
42
+ describe :force_encoding! do
43
+ it 'returns the string' do
44
+ str = ''
45
+ Coder.force_encoding!(str).should be_equal(str)
46
+ end
47
+
48
+ if ''.respond_to? :force_encoding
49
+ it 'sets the encoding to UTF-8' do
50
+ Coder.force_encoding!(''.encode('binary')).encoding.name.should be == 'UTF-8'
51
+ end
52
+
53
+ it 'allows specifying a different encoding' do
54
+ Coder.force_encoding!(''.encode('binary'), 'UTF-16BE').encoding.name.should be == 'UTF-16BE'
55
+ end
56
+
57
+ it 'modifies the encoding of the passed in string' do
58
+ str = ''.encode('binary')
59
+ Coder.force_encoding! str
60
+ str.encoding.name.should be == 'UTF-8'
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,3 @@
1
+ require 'coder'
2
+ require 'support/clean_helpers'
3
+ require 'support/binary_matcher'
@@ -0,0 +1,4 @@
1
+ RSpec::Matchers.define :binary_equal do |expected|
2
+ match { |actual| actual.bytes.to_a == expected.bytes.to_a }
3
+ diffable
4
+ end
@@ -3,22 +3,24 @@ module CleanHelpers
3
3
  def encoding(encoding, &block)
4
4
  return unless described_class.supports? encoding
5
5
  context(encoding) do
6
- let(:encoding) { encoding}
6
+ let(:encoding) { encoding }
7
+ subject { described_class.new(encoding) }
7
8
  instance_eval(&block)
8
9
  end
9
10
  end
10
11
 
11
12
  def cleans(from, to = from)
12
13
  it "cleans #{from.inspect} to #{to.inspect}" do
13
- result = described_class.new(encoding).clean(binary(from))
14
- binary(result).should be == binary(to)
14
+ subject.clean(from).should binary_equal(to)
15
15
  end
16
16
  end
17
- end
18
17
 
19
- def binary(str)
20
- return str unless str.respond_to? :force_encoding
21
- str.force_encoding('binary')
18
+ def sets_encoding
19
+ return unless ''.respond_to? :encoding
20
+ it 'sets encoding properly' do
21
+ subject.clean(''.encode('US-ASCII')).encoding.names.should include(encoding)
22
+ end
23
+ end
22
24
  end
23
25
 
24
26
  def support(encoding)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: coder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -69,6 +69,9 @@ files:
69
69
  - lib/coder/error.rb
70
70
  - lib/coder/version.rb
71
71
  - spec/coder/cleaner_spec.rb
72
+ - spec/coder_spec.rb
73
+ - spec/spec_helper.rb
74
+ - spec/support/binary_matcher.rb
72
75
  - spec/support/clean_helpers.rb
73
76
  homepage: http://github.com/rkh/coder
74
77
  licenses: []
@@ -96,5 +99,8 @@ specification_version: 3
96
99
  summary: library to handle encodings
97
100
  test_files:
98
101
  - spec/coder/cleaner_spec.rb
102
+ - spec/coder_spec.rb
103
+ - spec/spec_helper.rb
104
+ - spec/support/binary_matcher.rb
99
105
  - spec/support/clean_helpers.rb
100
106
  has_rdoc: