coder 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/coder.rb +14 -3
- data/lib/coder/cleaner.rb +1 -0
- data/lib/coder/cleaner/builtin.rb +1 -1
- data/lib/coder/cleaner/iconv.rb +6 -3
- data/lib/coder/cleaner/java.rb +4 -2
- data/lib/coder/cleaner/simple.rb +2 -9
- data/lib/coder/version.rb +1 -1
- data/spec/coder/cleaner_spec.rb +4 -3
- data/spec/coder_spec.rb +64 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/support/binary_matcher.rb +4 -0
- data/spec/support/clean_helpers.rb +9 -7
- metadata +7 -1
data/lib/coder.rb
CHANGED
@@ -1,14 +1,25 @@
|
|
1
1
|
require 'coder/version'
|
2
2
|
require 'coder/cleaner'
|
3
|
+
require 'coder/error'
|
3
4
|
|
4
5
|
module Coder
|
5
6
|
extend self
|
7
|
+
DEFAULT_ENCODING = 'UTF-8'
|
6
8
|
|
7
|
-
def clean(str, encoding =
|
8
|
-
Cleaner.new(encoding
|
9
|
+
def clean(str, encoding = DEFAULT_ENCODING)
|
10
|
+
Cleaner.new(encoding).clean(str)
|
9
11
|
end
|
10
12
|
|
11
|
-
def clean!(str, encoding =
|
13
|
+
def clean!(str, encoding = DEFAULT_ENCODING)
|
12
14
|
str.replace clean(str, encoding)
|
13
15
|
end
|
16
|
+
|
17
|
+
def force_encoding!(str, encoding = DEFAULT_ENCODING)
|
18
|
+
return str unless str.respond_to? :force_encoding
|
19
|
+
str.force_encoding(encoding.to_s)
|
20
|
+
end
|
21
|
+
|
22
|
+
def force_encoding(str, encoding = DEFAULT_ENCODING)
|
23
|
+
force_encoding! str.dup, encoding
|
24
|
+
end
|
14
25
|
end
|
data/lib/coder/cleaner.rb
CHANGED
data/lib/coder/cleaner/iconv.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'coder'
|
1
2
|
require 'coder/error'
|
2
3
|
require 'stringio'
|
3
4
|
|
@@ -31,15 +32,17 @@ module Coder
|
|
31
32
|
end
|
32
33
|
|
33
34
|
def initialize(encoding)
|
35
|
+
@encoding = encoding.to_s
|
34
36
|
@nullbyte = "\0"
|
35
|
-
@iconv = ::Iconv.new("#{encoding}//ignore", encoding
|
36
|
-
@nullbyte.encode! encoding if @nullbyte.respond_to? :encode!
|
37
|
+
@iconv = ::Iconv.new("#{encoding}//ignore", @encoding)
|
38
|
+
@nullbyte.encode! @encoding if @nullbyte.respond_to? :encode!
|
37
39
|
rescue ::Iconv::InvalidEncoding => e
|
38
40
|
raise Coder::InvalidEncoding, e.message
|
39
41
|
end
|
40
42
|
|
41
43
|
def clean(str)
|
42
|
-
@iconv.iconv(str).gsub(@nullbyte, "")
|
44
|
+
string = @iconv.iconv(str).gsub(@nullbyte, "")
|
45
|
+
Coder.force_encoding! string, @encoding
|
43
46
|
rescue ::Iconv::Failure => e
|
44
47
|
raise Coder::Error, e.message
|
45
48
|
end
|
data/lib/coder/cleaner/java.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'coder'
|
1
2
|
require 'coder/error'
|
2
3
|
|
3
4
|
module Coder
|
@@ -15,7 +16,7 @@ module Coder
|
|
15
16
|
end
|
16
17
|
|
17
18
|
def initialize(encoding)
|
18
|
-
encoding
|
19
|
+
@encoding = encoding.to_s.upcase
|
19
20
|
@nullbyte = "\0"
|
20
21
|
@charset = ::Java::JavaNioCharset::Charset.for_name(encoding)
|
21
22
|
@decoder = @charset.new_decoder
|
@@ -30,7 +31,8 @@ module Coder
|
|
30
31
|
|
31
32
|
def clean(str)
|
32
33
|
buffer = ::Java::JavaNio::ByteBuffer.wrap(str.to_java_bytes)
|
33
|
-
@decoder.decode(buffer).to_s
|
34
|
+
string = @decoder.decode(buffer).to_s
|
35
|
+
Coder.force_encoding!(string, @encoding).gsub(@nullbyte, '')
|
34
36
|
rescue Java::JavaLang::RuntimeException => e
|
35
37
|
raise Coder::Error, e.message, e.backtrace
|
36
38
|
end
|
data/lib/coder/cleaner/simple.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require 'coder
|
1
|
+
require 'coder'
|
2
2
|
require 'coder/cleaner/simple/byte_buffer'
|
3
3
|
require 'coder/cleaner/simple/encodings'
|
4
4
|
|
@@ -25,14 +25,7 @@ module Coder
|
|
25
25
|
def clean(str)
|
26
26
|
bytes = ByteBuffer.new(@encoding)
|
27
27
|
str.each_byte { |b| bytes << b }
|
28
|
-
force_encoding
|
29
|
-
end
|
30
|
-
|
31
|
-
private
|
32
|
-
|
33
|
-
def force_encoding(str)
|
34
|
-
return str unless str.respond_to? :force_encoding
|
35
|
-
str.force_encoding(@name)
|
28
|
+
Coder.force_encoding!(bytes.to_s, @name)
|
36
29
|
end
|
37
30
|
end
|
38
31
|
end
|
data/lib/coder/version.rb
CHANGED
data/spec/coder/cleaner_spec.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
require '
|
3
|
-
require 'coder/error'
|
4
|
-
require 'support/clean_helpers'
|
2
|
+
require 'spec_helper'
|
5
3
|
|
6
4
|
shared_examples Coder::Cleaner do
|
7
5
|
encoding "UTF-8" do
|
@@ -11,12 +9,14 @@ shared_examples Coder::Cleaner do
|
|
11
9
|
cleans "\0", ""
|
12
10
|
cleans "{foo \xC3 'bar'}", "{foo 'bar'}"
|
13
11
|
cleans "yummy\xE2 \xF0\x9F\x8D\x94 \x9F\x8D\x94", "yummy 🍔 "
|
12
|
+
sets_encoding
|
14
13
|
end
|
15
14
|
|
16
15
|
encoding "UCS-2BE" do
|
17
16
|
cleans "\x00f\x00o\x00o"
|
18
17
|
cleans "\x00f\x00ox", "\x00f\x00o"
|
19
18
|
cleans "\x00f\x00o\x00\x00", "\x00f\x00o"
|
19
|
+
sets_encoding
|
20
20
|
end
|
21
21
|
|
22
22
|
encoding "UCS-4BE" do
|
@@ -24,6 +24,7 @@ shared_examples Coder::Cleaner do
|
|
24
24
|
cleans "\x00\x00\x00f\x00\x00\x00o\x00\x00x", "\x00\x00\x00f\x00\x00\x00o"
|
25
25
|
cleans "\x00\x00\x00f\x00\x00\x00o\x00\x00\x00\x00", "\x00\x00\x00f\x00\x00\x00o"
|
26
26
|
cleans "\xFF\xFF\x10\x10", ""
|
27
|
+
sets_encoding
|
27
28
|
end
|
28
29
|
|
29
30
|
context "unknown encoding" do
|
data/spec/coder_spec.rb
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Coder do
|
5
|
+
describe :clean do
|
6
|
+
it 'cleans up UTF-8 strings without having to specify the encoding' do
|
7
|
+
Coder.clean("yummy\xE2 \xF0\x9F\x8D\x94 \x9F\x8D\x94").should binary_equal("yummy 🍔 ")
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'allows specifying the encoding' do
|
11
|
+
Coder.clean("\x00f\x00ox", "UCS-2BE").should binary_equal("\x00f\x00o")
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
describe :force_encoding do
|
16
|
+
it 'returns a different string' do
|
17
|
+
str = ''
|
18
|
+
Coder.force_encoding(str).should_not be_equal(str)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'leaves the content untouched' do
|
22
|
+
Coder.force_encoding('foo').should be == 'foo'
|
23
|
+
end
|
24
|
+
|
25
|
+
if ''.respond_to? :force_encoding
|
26
|
+
it 'sets the encoding to UTF-8' do
|
27
|
+
Coder.force_encoding(''.encode('binary')).encoding.name.should be == 'UTF-8'
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'allows specifying a different encoding' do
|
31
|
+
Coder.force_encoding(''.encode('binary'), 'UTF-16BE').encoding.name.should be == 'UTF-16BE'
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'does not modify the encoding of the passed in string' do
|
35
|
+
str = ''.encode('binary')
|
36
|
+
Coder.force_encoding str
|
37
|
+
str.encoding.name.should be == 'ASCII-8BIT'
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
describe :force_encoding! do
|
43
|
+
it 'returns the string' do
|
44
|
+
str = ''
|
45
|
+
Coder.force_encoding!(str).should be_equal(str)
|
46
|
+
end
|
47
|
+
|
48
|
+
if ''.respond_to? :force_encoding
|
49
|
+
it 'sets the encoding to UTF-8' do
|
50
|
+
Coder.force_encoding!(''.encode('binary')).encoding.name.should be == 'UTF-8'
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'allows specifying a different encoding' do
|
54
|
+
Coder.force_encoding!(''.encode('binary'), 'UTF-16BE').encoding.name.should be == 'UTF-16BE'
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'modifies the encoding of the passed in string' do
|
58
|
+
str = ''.encode('binary')
|
59
|
+
Coder.force_encoding! str
|
60
|
+
str.encoding.name.should be == 'UTF-8'
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -3,22 +3,24 @@ module CleanHelpers
|
|
3
3
|
def encoding(encoding, &block)
|
4
4
|
return unless described_class.supports? encoding
|
5
5
|
context(encoding) do
|
6
|
-
let(:encoding) { encoding}
|
6
|
+
let(:encoding) { encoding }
|
7
|
+
subject { described_class.new(encoding) }
|
7
8
|
instance_eval(&block)
|
8
9
|
end
|
9
10
|
end
|
10
11
|
|
11
12
|
def cleans(from, to = from)
|
12
13
|
it "cleans #{from.inspect} to #{to.inspect}" do
|
13
|
-
|
14
|
-
binary(result).should be == binary(to)
|
14
|
+
subject.clean(from).should binary_equal(to)
|
15
15
|
end
|
16
16
|
end
|
17
|
-
end
|
18
17
|
|
19
|
-
|
20
|
-
|
21
|
-
|
18
|
+
def sets_encoding
|
19
|
+
return unless ''.respond_to? :encoding
|
20
|
+
it 'sets encoding properly' do
|
21
|
+
subject.clean(''.encode('US-ASCII')).encoding.names.should include(encoding)
|
22
|
+
end
|
23
|
+
end
|
22
24
|
end
|
23
25
|
|
24
26
|
def support(encoding)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: coder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -69,6 +69,9 @@ files:
|
|
69
69
|
- lib/coder/error.rb
|
70
70
|
- lib/coder/version.rb
|
71
71
|
- spec/coder/cleaner_spec.rb
|
72
|
+
- spec/coder_spec.rb
|
73
|
+
- spec/spec_helper.rb
|
74
|
+
- spec/support/binary_matcher.rb
|
72
75
|
- spec/support/clean_helpers.rb
|
73
76
|
homepage: http://github.com/rkh/coder
|
74
77
|
licenses: []
|
@@ -96,5 +99,8 @@ specification_version: 3
|
|
96
99
|
summary: library to handle encodings
|
97
100
|
test_files:
|
98
101
|
- spec/coder/cleaner_spec.rb
|
102
|
+
- spec/coder_spec.rb
|
103
|
+
- spec/spec_helper.rb
|
104
|
+
- spec/support/binary_matcher.rb
|
99
105
|
- spec/support/clean_helpers.rb
|
100
106
|
has_rdoc:
|