coder 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
@@ -0,0 +1,16 @@
1
+ rvm:
2
+ - 1.8.7
3
+ - 1.9.2
4
+ - 1.9.3
5
+ - rbx-18mode
6
+ - rbx-19mode
7
+ - jruby-18mode
8
+ - jruby-19mode
9
+ - jruby-head
10
+ - ruby-head
11
+ before_install:
12
+ - export JRUBY_OPTS="--server -Xcext.enabled=false -Xcompile.invokedynamic=false"
13
+ matrix:
14
+ allow_failures:
15
+ - rvm: ruby-head
16
+ - rvm: jruby-head
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in coder.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Konstantin Haase
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,37 @@
1
+ # Coder
2
+
3
+ A Ruby library to deal with encodings, no matter if you're on 1.8 or 1.9, JRuby
4
+ or Rubinius, if you have a working iconv or not, it chooses the best way for you
5
+ to handle String encodings.
6
+
7
+ ## Usage
8
+
9
+ At the moment, Coder only cleans strings for you. I plan to add string
10
+ conversion and encoding detection later.
11
+
12
+ ### Cleaning Strings
13
+
14
+ ``` ruby
15
+ clean_string = Coder.clean(dirty_string)
16
+ ```
17
+
18
+ You can also specify the encoding:
19
+
20
+
21
+ ``` ruby
22
+ clean_string = Coder.clean(dirty_string, 'UTF-8')
23
+ ```
24
+
25
+ You can also modify a string in-place:
26
+
27
+ ``` ruby
28
+ Coder.clean! some_string
29
+ ```
30
+
31
+ ## Contributing
32
+
33
+ 1. Fork it
34
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
35
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
36
+ 4. Push to the branch (`git push origin my-new-feature`)
37
+ 5. Create new Pull Request
@@ -0,0 +1,2 @@
1
+ desc "run specs"
2
+ task(:default) { ruby '-S rspec spec' }
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'coder/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "coder"
8
+ gem.version = Coder::VERSION
9
+ gem.authors = ["Konstantin Haase"]
10
+ gem.email = ["konstantin.mailinglists@googlemail.com"]
11
+ gem.description = %q{handle encodings, no matter what}
12
+ gem.summary = %q{library to handle encodings}
13
+ gem.homepage = "http://github.com/rkh/coder"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_development_dependency("rspec", "~> 2.11")
21
+ gem.add_development_dependency("rake")
22
+ end
@@ -0,0 +1,14 @@
1
+ require 'coder/version'
2
+ require 'coder/cleaner'
3
+
4
+ module Coder
5
+ extend self
6
+
7
+ def clean(str, encoding = nil)
8
+ Cleaner.new(encoding || 'UTF-8').clean(str)
9
+ end
10
+
11
+ def clean!(str, encoding = nil)
12
+ str.replace clean(str, encoding)
13
+ end
14
+ end
@@ -0,0 +1,13 @@
1
+ require 'coder/cleaner/builtin'
2
+ require 'coder/cleaner/iconv'
3
+ require 'coder/cleaner/simple'
4
+
5
+ module Coder
6
+ module Cleaner
7
+ Default = [Builtin, Iconv, Simple].detect { |c| c.available? }
8
+
9
+ def self.new(encoding)
10
+ Default.new(encoding)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,43 @@
1
+ require 'coder/error'
2
+
3
+ module Coder
4
+ module Cleaner
5
+ class Builtin
6
+ OPTIONS = { :undef => :replace, :invalid => :replace, :replace => "" }
7
+
8
+ def self.available?
9
+ defined? Encoding.find and
10
+ defined? EncodingError and
11
+ String.method_defined? :encode and
12
+ String.method_defined? :force_encoding
13
+ end
14
+
15
+ def initialize(encoding)
16
+ @encoding = encoding.to_s.upcase
17
+ @dummy = @encoding == 'UTF-8' ? 'UTF-16BE' : 'UTF-8' if needs_dummy?
18
+ @dummy ||= @encoding
19
+
20
+ check_encoding
21
+ end
22
+
23
+ def clean(str)
24
+ str = str.dup.force_encoding(@encoding)
25
+ str.encode(@dummy, OPTIONS).encode(@encoding).gsub("\0", "")
26
+ rescue EncodingError => e
27
+ raise Coder::Error, e.message
28
+ end
29
+
30
+ private
31
+
32
+ def check_encoding
33
+ Encoding.find(@encoding)
34
+ rescue ArgumentError => e
35
+ raise Coder::InvalidEncoding, e.message
36
+ end
37
+
38
+ def needs_dummy?
39
+ RUBY_VERSION < '2.0'
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,40 @@
1
+ require 'coder/error'
2
+ require 'stringio'
3
+
4
+ module Coder
5
+ module Cleaner
6
+ class Iconv
7
+ def self.load_iconv
8
+ return if defined? ::Iconv
9
+ stderr_was, $stderr = $stderr, StringIO.new
10
+ require 'iconv'
11
+ ensure
12
+ $stderr = stderr_was if stderr_was
13
+ end
14
+
15
+ def self.new(*)
16
+ load_iconv
17
+ super
18
+ end
19
+
20
+ def self.available?
21
+ load_iconv
22
+ !!::Iconv.conv("iso-8859-1//ignore", "utf-8", "\305\253" + "a"*8160)
23
+ rescue Exception => e
24
+ false
25
+ end
26
+
27
+ def initialize(encoding)
28
+ @iconv = ::Iconv.new("#{encoding}//ignore", encoding.to_s)
29
+ rescue ::Iconv::InvalidEncoding => e
30
+ raise Coder::InvalidEncoding, e.message
31
+ end
32
+
33
+ def clean(str)
34
+ @iconv.iconv(str).gsub("\0", "")
35
+ rescue ::Iconv::Failure => e
36
+ raise Coder::Error, e.message
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,32 @@
1
+ require 'coder/error'
2
+ require 'coder/cleaner/simple/byte_buffer'
3
+ require 'coder/cleaner/simple/encodings'
4
+
5
+ module Coder
6
+ module Cleaner
7
+ class Simple
8
+ def self.available?
9
+ true
10
+ end
11
+
12
+ def initialize(encoding)
13
+ const_name = encoding.to_s.upcase.gsub('-', '_')
14
+ raise Coder::InvalidEncoding, "unknown encoding name - #{encoding}" unless Encodings.const_defined? const_name
15
+ @encoding, @name = Encodings.const_get(const_name), encoding
16
+ end
17
+
18
+ def clean(str)
19
+ bytes = ByteBuffer.new(@encoding)
20
+ str.each_byte { |b| bytes << b }
21
+ force_encoding bytes.to_s
22
+ end
23
+
24
+ private
25
+
26
+ def force_encoding(str)
27
+ return str unless str.respond_to? :force_encoding
28
+ str.force_encoding(@name)
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,53 @@
1
+ module Coder
2
+ module Cleaner
3
+ class Simple
4
+ class ByteBuffer
5
+ attr_accessor :encoding, :bytes, :buffer, :outstanding
6
+
7
+ def initialize(encoding)
8
+ @encoding, @bytes = encoding, []
9
+ clear_buffer
10
+ end
11
+
12
+ def <<(byte)
13
+ if encoding.garbage? byte, buffer
14
+ clear_buffer
15
+ elsif encoding.single_byte? byte, buffer
16
+ add(byte)
17
+ elsif encoding.multibyte? byte, buffer
18
+ fill_buffer(byte)
19
+ elsif encoding.multibyte_start? byte, buffer
20
+ start_buffer(byte, encoding.multibyte_size(byte, buffer))
21
+ else
22
+ clear_buffer
23
+ end
24
+ end
25
+
26
+ def to_s
27
+ bytes.pack('C*')
28
+ end
29
+
30
+ private
31
+
32
+ def clear_buffer
33
+ start_buffer(nil, 0)
34
+ end
35
+
36
+ def start_buffer(byte, size)
37
+ @buffer, @outstanding = Array(byte), size
38
+ end
39
+
40
+ def fill_buffer(byte)
41
+ buffer << byte
42
+ add(buffer) if buffer.size == outstanding
43
+ clear_buffer if buffer.size > outstanding
44
+ end
45
+
46
+ def add(input)
47
+ clear_buffer
48
+ bytes.concat Array(input)
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,82 @@
1
+ module Coder
2
+ module Cleaner
3
+ class Simple
4
+ module Encodings
5
+ # Note: This currently does not remove most overlong forms
6
+ module UTF_8
7
+ extend self
8
+
9
+ def garbage?(input, buffered)
10
+ return true if input > 244 or input == 192 or input == 193
11
+ case buffered <=> [244, 143, 191]
12
+ when -1 then false
13
+ when 0 then input < 192
14
+ when 1 then true
15
+ end
16
+ end
17
+
18
+ def single_byte?(input, buffered)
19
+ input.between? 1, 127
20
+ end
21
+
22
+ def multibyte_start?(input, buffered)
23
+ input.between? 192, 244
24
+ end
25
+
26
+ def multibyte?(input, buffered)
27
+ input.between? 128, 191
28
+ end
29
+
30
+ def multibyte_size(input, buffered)
31
+ case input
32
+ when 192..223 then 2
33
+ when 224..239 then 3
34
+ when 240..247 then 4
35
+ when 248..244 then 5
36
+ when 001..127 then 1
37
+ else 0
38
+ end
39
+ end
40
+ end
41
+
42
+ module UCS_2
43
+ extend self
44
+
45
+ def garbage?(input, buffered)
46
+ return false unless buffered.size + 1 == multibyte_size
47
+ input == 0 and buffered.all? { |b| b == 0 }
48
+ end
49
+
50
+ def single_byte?(input, buffered)
51
+ false
52
+ end
53
+
54
+ def multibyte_start?(input, buffered)
55
+ buffered.size % multibyte_size == 0
56
+ end
57
+
58
+ def multibyte?(input, buffered)
59
+ not multibyte_start? input, buffered
60
+ end
61
+
62
+ def multibyte_size(*)
63
+ 2
64
+ end
65
+ end
66
+
67
+ module UCS_4
68
+ include UCS_2
69
+ extend self
70
+
71
+ def garbage?(input, buffered)
72
+ super or input > 0x10FFFF
73
+ end
74
+
75
+ def multibyte_size(*)
76
+ 4
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,7 @@
1
+ module Cleaner
2
+ class Error < StandardError
3
+ end
4
+
5
+ class InvalidEncoding < Error
6
+ end
7
+ end
@@ -0,0 +1,3 @@
1
+ module Coder
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,33 @@
1
+ require 'coder/cleaner'
2
+
3
+ shared_examples Coder::Cleaner do
4
+ let(:encoding) { example.example_group.description }
5
+ subject { described_class.new(encoding) }
6
+
7
+ def self.cleans(from, to = from)
8
+ it "cleans #{from.inspect} to #{to.inspect}" do
9
+ subject.clean(from).should be == to
10
+ end
11
+ end
12
+
13
+ context "UTF-8" do
14
+ cleans "foo"
15
+ cleans ""
16
+ cleans "yummy 🍔 "
17
+
18
+ cleans "{foo \xC3 'bar'}", "{foo 'bar'}"
19
+ cleans "yummy\xE2 \xF0\x9F\x8D\x94 \x9F\x8D\x94", "yummy 🍔 "
20
+ end
21
+ end
22
+
23
+ describe Coder::Cleaner::Builtin do
24
+ it_behaves_like Coder::Cleaner if described_class.available?
25
+ end
26
+
27
+ describe Coder::Cleaner::Iconv do
28
+ it_behaves_like Coder::Cleaner if described_class.available?
29
+ end
30
+
31
+ describe Coder::Cleaner::Simple do
32
+ it_behaves_like Coder::Cleaner if described_class.available?
33
+ end
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: coder
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Konstantin Haase
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-09-21 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '2.11'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '2.11'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ description: handle encodings, no matter what
47
+ email:
48
+ - konstantin.mailinglists@googlemail.com
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - .gitignore
54
+ - .travis.yml
55
+ - Gemfile
56
+ - LICENSE.txt
57
+ - README.md
58
+ - Rakefile
59
+ - coder.gemspec
60
+ - lib/coder.rb
61
+ - lib/coder/cleaner.rb
62
+ - lib/coder/cleaner/builtin.rb
63
+ - lib/coder/cleaner/iconv.rb
64
+ - lib/coder/cleaner/simple.rb
65
+ - lib/coder/cleaner/simple/byte_buffer.rb
66
+ - lib/coder/cleaner/simple/encodings.rb
67
+ - lib/coder/error.rb
68
+ - lib/coder/version.rb
69
+ - spec/coder/cleaner_spec.rb
70
+ homepage: http://github.com/rkh/coder
71
+ licenses: []
72
+ post_install_message:
73
+ rdoc_options: []
74
+ require_paths:
75
+ - lib
76
+ required_ruby_version: !ruby/object:Gem::Requirement
77
+ none: false
78
+ requirements:
79
+ - - ! '>='
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ required_rubygems_version: !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ! '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ requirements: []
89
+ rubyforge_project:
90
+ rubygems_version: 1.8.23
91
+ signing_key:
92
+ specification_version: 3
93
+ summary: library to handle encodings
94
+ test_files:
95
+ - spec/coder/cleaner_spec.rb
96
+ has_rdoc: