coder 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/coder.rb +14 -3
- data/lib/coder/cleaner.rb +1 -0
- data/lib/coder/cleaner/builtin.rb +1 -1
- data/lib/coder/cleaner/iconv.rb +6 -3
- data/lib/coder/cleaner/java.rb +4 -2
- data/lib/coder/cleaner/simple.rb +2 -9
- data/lib/coder/version.rb +1 -1
- data/spec/coder/cleaner_spec.rb +4 -3
- data/spec/coder_spec.rb +64 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/support/binary_matcher.rb +4 -0
- data/spec/support/clean_helpers.rb +9 -7
- metadata +7 -1
data/lib/coder.rb
CHANGED
@@ -1,14 +1,25 @@
|
|
1
1
|
require 'coder/version'
|
2
2
|
require 'coder/cleaner'
|
3
|
+
require 'coder/error'
|
3
4
|
|
4
5
|
module Coder
|
5
6
|
extend self
|
7
|
+
DEFAULT_ENCODING = 'UTF-8'
|
6
8
|
|
7
|
-
def clean(str, encoding =
|
8
|
-
Cleaner.new(encoding
|
9
|
+
def clean(str, encoding = DEFAULT_ENCODING)
|
10
|
+
Cleaner.new(encoding).clean(str)
|
9
11
|
end
|
10
12
|
|
11
|
-
def clean!(str, encoding =
|
13
|
+
def clean!(str, encoding = DEFAULT_ENCODING)
|
12
14
|
str.replace clean(str, encoding)
|
13
15
|
end
|
16
|
+
|
17
|
+
def force_encoding!(str, encoding = DEFAULT_ENCODING)
|
18
|
+
return str unless str.respond_to? :force_encoding
|
19
|
+
str.force_encoding(encoding.to_s)
|
20
|
+
end
|
21
|
+
|
22
|
+
def force_encoding(str, encoding = DEFAULT_ENCODING)
|
23
|
+
force_encoding! str.dup, encoding
|
24
|
+
end
|
14
25
|
end
|
data/lib/coder/cleaner.rb
CHANGED
data/lib/coder/cleaner/iconv.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'coder'
|
1
2
|
require 'coder/error'
|
2
3
|
require 'stringio'
|
3
4
|
|
@@ -31,15 +32,17 @@ module Coder
|
|
31
32
|
end
|
32
33
|
|
33
34
|
def initialize(encoding)
|
35
|
+
@encoding = encoding.to_s
|
34
36
|
@nullbyte = "\0"
|
35
|
-
@iconv = ::Iconv.new("#{encoding}//ignore", encoding
|
36
|
-
@nullbyte.encode! encoding if @nullbyte.respond_to? :encode!
|
37
|
+
@iconv = ::Iconv.new("#{encoding}//ignore", @encoding)
|
38
|
+
@nullbyte.encode! @encoding if @nullbyte.respond_to? :encode!
|
37
39
|
rescue ::Iconv::InvalidEncoding => e
|
38
40
|
raise Coder::InvalidEncoding, e.message
|
39
41
|
end
|
40
42
|
|
41
43
|
def clean(str)
|
42
|
-
@iconv.iconv(str).gsub(@nullbyte, "")
|
44
|
+
string = @iconv.iconv(str).gsub(@nullbyte, "")
|
45
|
+
Coder.force_encoding! string, @encoding
|
43
46
|
rescue ::Iconv::Failure => e
|
44
47
|
raise Coder::Error, e.message
|
45
48
|
end
|
data/lib/coder/cleaner/java.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'coder'
|
1
2
|
require 'coder/error'
|
2
3
|
|
3
4
|
module Coder
|
@@ -15,7 +16,7 @@ module Coder
|
|
15
16
|
end
|
16
17
|
|
17
18
|
def initialize(encoding)
|
18
|
-
encoding
|
19
|
+
@encoding = encoding.to_s.upcase
|
19
20
|
@nullbyte = "\0"
|
20
21
|
@charset = ::Java::JavaNioCharset::Charset.for_name(encoding)
|
21
22
|
@decoder = @charset.new_decoder
|
@@ -30,7 +31,8 @@ module Coder
|
|
30
31
|
|
31
32
|
def clean(str)
|
32
33
|
buffer = ::Java::JavaNio::ByteBuffer.wrap(str.to_java_bytes)
|
33
|
-
@decoder.decode(buffer).to_s
|
34
|
+
string = @decoder.decode(buffer).to_s
|
35
|
+
Coder.force_encoding!(string, @encoding).gsub(@nullbyte, '')
|
34
36
|
rescue Java::JavaLang::RuntimeException => e
|
35
37
|
raise Coder::Error, e.message, e.backtrace
|
36
38
|
end
|
data/lib/coder/cleaner/simple.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require 'coder
|
1
|
+
require 'coder'
|
2
2
|
require 'coder/cleaner/simple/byte_buffer'
|
3
3
|
require 'coder/cleaner/simple/encodings'
|
4
4
|
|
@@ -25,14 +25,7 @@ module Coder
|
|
25
25
|
def clean(str)
|
26
26
|
bytes = ByteBuffer.new(@encoding)
|
27
27
|
str.each_byte { |b| bytes << b }
|
28
|
-
force_encoding
|
29
|
-
end
|
30
|
-
|
31
|
-
private
|
32
|
-
|
33
|
-
def force_encoding(str)
|
34
|
-
return str unless str.respond_to? :force_encoding
|
35
|
-
str.force_encoding(@name)
|
28
|
+
Coder.force_encoding!(bytes.to_s, @name)
|
36
29
|
end
|
37
30
|
end
|
38
31
|
end
|
data/lib/coder/version.rb
CHANGED
data/spec/coder/cleaner_spec.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
require '
|
3
|
-
require 'coder/error'
|
4
|
-
require 'support/clean_helpers'
|
2
|
+
require 'spec_helper'
|
5
3
|
|
6
4
|
shared_examples Coder::Cleaner do
|
7
5
|
encoding "UTF-8" do
|
@@ -11,12 +9,14 @@ shared_examples Coder::Cleaner do
|
|
11
9
|
cleans "\0", ""
|
12
10
|
cleans "{foo \xC3 'bar'}", "{foo 'bar'}"
|
13
11
|
cleans "yummy\xE2 \xF0\x9F\x8D\x94 \x9F\x8D\x94", "yummy 🍔 "
|
12
|
+
sets_encoding
|
14
13
|
end
|
15
14
|
|
16
15
|
encoding "UCS-2BE" do
|
17
16
|
cleans "\x00f\x00o\x00o"
|
18
17
|
cleans "\x00f\x00ox", "\x00f\x00o"
|
19
18
|
cleans "\x00f\x00o\x00\x00", "\x00f\x00o"
|
19
|
+
sets_encoding
|
20
20
|
end
|
21
21
|
|
22
22
|
encoding "UCS-4BE" do
|
@@ -24,6 +24,7 @@ shared_examples Coder::Cleaner do
|
|
24
24
|
cleans "\x00\x00\x00f\x00\x00\x00o\x00\x00x", "\x00\x00\x00f\x00\x00\x00o"
|
25
25
|
cleans "\x00\x00\x00f\x00\x00\x00o\x00\x00\x00\x00", "\x00\x00\x00f\x00\x00\x00o"
|
26
26
|
cleans "\xFF\xFF\x10\x10", ""
|
27
|
+
sets_encoding
|
27
28
|
end
|
28
29
|
|
29
30
|
context "unknown encoding" do
|
data/spec/coder_spec.rb
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Coder do
|
5
|
+
describe :clean do
|
6
|
+
it 'cleans up UTF-8 strings without having to specify the encoding' do
|
7
|
+
Coder.clean("yummy\xE2 \xF0\x9F\x8D\x94 \x9F\x8D\x94").should binary_equal("yummy 🍔 ")
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'allows specifying the encoding' do
|
11
|
+
Coder.clean("\x00f\x00ox", "UCS-2BE").should binary_equal("\x00f\x00o")
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
describe :force_encoding do
|
16
|
+
it 'returns a different string' do
|
17
|
+
str = ''
|
18
|
+
Coder.force_encoding(str).should_not be_equal(str)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'leaves the content untouched' do
|
22
|
+
Coder.force_encoding('foo').should be == 'foo'
|
23
|
+
end
|
24
|
+
|
25
|
+
if ''.respond_to? :force_encoding
|
26
|
+
it 'sets the encoding to UTF-8' do
|
27
|
+
Coder.force_encoding(''.encode('binary')).encoding.name.should be == 'UTF-8'
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'allows specifying a different encoding' do
|
31
|
+
Coder.force_encoding(''.encode('binary'), 'UTF-16BE').encoding.name.should be == 'UTF-16BE'
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'does not modify the encoding of the passed in string' do
|
35
|
+
str = ''.encode('binary')
|
36
|
+
Coder.force_encoding str
|
37
|
+
str.encoding.name.should be == 'ASCII-8BIT'
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
describe :force_encoding! do
|
43
|
+
it 'returns the string' do
|
44
|
+
str = ''
|
45
|
+
Coder.force_encoding!(str).should be_equal(str)
|
46
|
+
end
|
47
|
+
|
48
|
+
if ''.respond_to? :force_encoding
|
49
|
+
it 'sets the encoding to UTF-8' do
|
50
|
+
Coder.force_encoding!(''.encode('binary')).encoding.name.should be == 'UTF-8'
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'allows specifying a different encoding' do
|
54
|
+
Coder.force_encoding!(''.encode('binary'), 'UTF-16BE').encoding.name.should be == 'UTF-16BE'
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'modifies the encoding of the passed in string' do
|
58
|
+
str = ''.encode('binary')
|
59
|
+
Coder.force_encoding! str
|
60
|
+
str.encoding.name.should be == 'UTF-8'
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -3,22 +3,24 @@ module CleanHelpers
|
|
3
3
|
def encoding(encoding, &block)
|
4
4
|
return unless described_class.supports? encoding
|
5
5
|
context(encoding) do
|
6
|
-
let(:encoding) { encoding}
|
6
|
+
let(:encoding) { encoding }
|
7
|
+
subject { described_class.new(encoding) }
|
7
8
|
instance_eval(&block)
|
8
9
|
end
|
9
10
|
end
|
10
11
|
|
11
12
|
def cleans(from, to = from)
|
12
13
|
it "cleans #{from.inspect} to #{to.inspect}" do
|
13
|
-
|
14
|
-
binary(result).should be == binary(to)
|
14
|
+
subject.clean(from).should binary_equal(to)
|
15
15
|
end
|
16
16
|
end
|
17
|
-
end
|
18
17
|
|
19
|
-
|
20
|
-
|
21
|
-
|
18
|
+
def sets_encoding
|
19
|
+
return unless ''.respond_to? :encoding
|
20
|
+
it 'sets encoding properly' do
|
21
|
+
subject.clean(''.encode('US-ASCII')).encoding.names.should include(encoding)
|
22
|
+
end
|
23
|
+
end
|
22
24
|
end
|
23
25
|
|
24
26
|
def support(encoding)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: coder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -69,6 +69,9 @@ files:
|
|
69
69
|
- lib/coder/error.rb
|
70
70
|
- lib/coder/version.rb
|
71
71
|
- spec/coder/cleaner_spec.rb
|
72
|
+
- spec/coder_spec.rb
|
73
|
+
- spec/spec_helper.rb
|
74
|
+
- spec/support/binary_matcher.rb
|
72
75
|
- spec/support/clean_helpers.rb
|
73
76
|
homepage: http://github.com/rkh/coder
|
74
77
|
licenses: []
|
@@ -96,5 +99,8 @@ specification_version: 3
|
|
96
99
|
summary: library to handle encodings
|
97
100
|
test_files:
|
98
101
|
- spec/coder/cleaner_spec.rb
|
102
|
+
- spec/coder_spec.rb
|
103
|
+
- spec/spec_helper.rb
|
104
|
+
- spec/support/binary_matcher.rb
|
99
105
|
- spec/support/clean_helpers.rb
|
100
106
|
has_rdoc:
|