coder 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
@@ -0,0 +1,16 @@
1
+ rvm:
2
+ - 1.8.7
3
+ - 1.9.2
4
+ - 1.9.3
5
+ - rbx-18mode
6
+ - rbx-19mode
7
+ - jruby-18mode
8
+ - jruby-19mode
9
+ - jruby-head
10
+ - ruby-head
11
+ before_install:
12
+ - export JRUBY_OPTS="--server -Xcext.enabled=false -Xcompile.invokedynamic=false"
13
+ matrix:
14
+ allow_failures:
15
+ - rvm: ruby-head
16
+ - rvm: jruby-head
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in coder.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Konstantin Haase
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,37 @@
1
+ # Coder
2
+
3
+ A Ruby library to deal with encodings, no matter if you're on 1.8 or 1.9, JRuby
4
+ or Rubinius, if you have a working iconv or not, it chooses the best way for you
5
+ to handle String encodings.
6
+
7
+ ## Usage
8
+
9
+ At the moment, Coder only cleans strings for you. I plan to add string
10
+ conversion and encoding detection later.
11
+
12
+ ### Cleaning Strings
13
+
14
+ ``` ruby
15
+ clean_string = Coder.clean(dirty_string)
16
+ ```
17
+
18
+ You can also specify the encoding:
19
+
20
+
21
+ ``` ruby
22
+ clean_string = Coder.clean(dirty_string, 'UTF-8')
23
+ ```
24
+
25
+ You can also modify a string in-place:
26
+
27
+ ``` ruby
28
+ Coder.clean! some_string
29
+ ```
30
+
31
+ ## Contributing
32
+
33
+ 1. Fork it
34
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
35
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
36
+ 4. Push to the branch (`git push origin my-new-feature`)
37
+ 5. Create new Pull Request
@@ -0,0 +1,2 @@
1
+ desc "run specs"
2
+ task(:default) { ruby '-S rspec spec' }
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'coder/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "coder"
8
+ gem.version = Coder::VERSION
9
+ gem.authors = ["Konstantin Haase"]
10
+ gem.email = ["konstantin.mailinglists@googlemail.com"]
11
+ gem.description = %q{handle encodings, no matter what}
12
+ gem.summary = %q{library to handle encodings}
13
+ gem.homepage = "http://github.com/rkh/coder"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_development_dependency("rspec", "~> 2.11")
21
+ gem.add_development_dependency("rake")
22
+ end
@@ -0,0 +1,14 @@
1
+ require 'coder/version'
2
+ require 'coder/cleaner'
3
+
4
+ module Coder
5
+ extend self
6
+
7
+ def clean(str, encoding = nil)
8
+ Cleaner.new(encoding || 'UTF-8').clean(str)
9
+ end
10
+
11
+ def clean!(str, encoding = nil)
12
+ str.replace clean(str, encoding)
13
+ end
14
+ end
@@ -0,0 +1,13 @@
1
+ require 'coder/cleaner/builtin'
2
+ require 'coder/cleaner/iconv'
3
+ require 'coder/cleaner/simple'
4
+
5
+ module Coder
6
+ module Cleaner
7
+ Default = [Builtin, Iconv, Simple].detect { |c| c.available? }
8
+
9
+ def self.new(encoding)
10
+ Default.new(encoding)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,43 @@
1
+ require 'coder/error'
2
+
3
+ module Coder
4
+ module Cleaner
5
+ class Builtin
6
+ OPTIONS = { :undef => :replace, :invalid => :replace, :replace => "" }
7
+
8
+ def self.available?
9
+ defined? Encoding.find and
10
+ defined? EncodingError and
11
+ String.method_defined? :encode and
12
+ String.method_defined? :force_encoding
13
+ end
14
+
15
+ def initialize(encoding)
16
+ @encoding = encoding.to_s.upcase
17
+ @dummy = @encoding == 'UTF-8' ? 'UTF-16BE' : 'UTF-8' if needs_dummy?
18
+ @dummy ||= @encoding
19
+
20
+ check_encoding
21
+ end
22
+
23
+ def clean(str)
24
+ str = str.dup.force_encoding(@encoding)
25
+ str.encode(@dummy, OPTIONS).encode(@encoding).gsub("\0", "")
26
+ rescue EncodingError => e
27
+ raise Coder::Error, e.message
28
+ end
29
+
30
+ private
31
+
32
+ def check_encoding
33
+ Encoding.find(@encoding)
34
+ rescue ArgumentError => e
35
+ raise Coder::InvalidEncoding, e.message
36
+ end
37
+
38
+ def needs_dummy?
39
+ RUBY_VERSION < '2.0'
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,40 @@
1
+ require 'coder/error'
2
+ require 'stringio'
3
+
4
+ module Coder
5
+ module Cleaner
6
+ class Iconv
7
+ def self.load_iconv
8
+ return if defined? ::Iconv
9
+ stderr_was, $stderr = $stderr, StringIO.new
10
+ require 'iconv'
11
+ ensure
12
+ $stderr = stderr_was if stderr_was
13
+ end
14
+
15
+ def self.new(*)
16
+ load_iconv
17
+ super
18
+ end
19
+
20
+ def self.available?
21
+ load_iconv
22
+ !!::Iconv.conv("iso-8859-1//ignore", "utf-8", "\305\253" + "a"*8160)
23
+ rescue Exception => e
24
+ false
25
+ end
26
+
27
+ def initialize(encoding)
28
+ @iconv = ::Iconv.new("#{encoding}//ignore", encoding.to_s)
29
+ rescue ::Iconv::InvalidEncoding => e
30
+ raise Coder::InvalidEncoding, e.message
31
+ end
32
+
33
+ def clean(str)
34
+ @iconv.iconv(str).gsub("\0", "")
35
+ rescue ::Iconv::Failure => e
36
+ raise Coder::Error, e.message
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,32 @@
1
+ require 'coder/error'
2
+ require 'coder/cleaner/simple/byte_buffer'
3
+ require 'coder/cleaner/simple/encodings'
4
+
5
+ module Coder
6
+ module Cleaner
7
+ class Simple
8
+ def self.available?
9
+ true
10
+ end
11
+
12
+ def initialize(encoding)
13
+ const_name = encoding.to_s.upcase.gsub('-', '_')
14
+ raise Coder::InvalidEncoding, "unknown encoding name - #{encoding}" unless Encodings.const_defined? const_name
15
+ @encoding, @name = Encodings.const_get(const_name), encoding
16
+ end
17
+
18
+ def clean(str)
19
+ bytes = ByteBuffer.new(@encoding)
20
+ str.each_byte { |b| bytes << b }
21
+ force_encoding bytes.to_s
22
+ end
23
+
24
+ private
25
+
26
+ def force_encoding(str)
27
+ return str unless str.respond_to? :force_encoding
28
+ str.force_encoding(@name)
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,53 @@
1
+ module Coder
2
+ module Cleaner
3
+ class Simple
4
+ class ByteBuffer
5
+ attr_accessor :encoding, :bytes, :buffer, :outstanding
6
+
7
+ def initialize(encoding)
8
+ @encoding, @bytes = encoding, []
9
+ clear_buffer
10
+ end
11
+
12
+ def <<(byte)
13
+ if encoding.garbage? byte, buffer
14
+ clear_buffer
15
+ elsif encoding.single_byte? byte, buffer
16
+ add(byte)
17
+ elsif encoding.multibyte? byte, buffer
18
+ fill_buffer(byte)
19
+ elsif encoding.multibyte_start? byte, buffer
20
+ start_buffer(byte, encoding.multibyte_size(byte, buffer))
21
+ else
22
+ clear_buffer
23
+ end
24
+ end
25
+
26
+ def to_s
27
+ bytes.pack('C*')
28
+ end
29
+
30
+ private
31
+
32
+ def clear_buffer
33
+ start_buffer(nil, 0)
34
+ end
35
+
36
+ def start_buffer(byte, size)
37
+ @buffer, @outstanding = Array(byte), size
38
+ end
39
+
40
+ def fill_buffer(byte)
41
+ buffer << byte
42
+ add(buffer) if buffer.size == outstanding
43
+ clear_buffer if buffer.size > outstanding
44
+ end
45
+
46
+ def add(input)
47
+ clear_buffer
48
+ bytes.concat Array(input)
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,82 @@
1
+ module Coder
2
+ module Cleaner
3
+ class Simple
4
+ module Encodings
5
+ # Note: This currently does not remove most overlong forms
6
+ module UTF_8
7
+ extend self
8
+
9
+ def garbage?(input, buffered)
10
+ return true if input > 244 or input == 192 or input == 193
11
+ case buffered <=> [244, 143, 191]
12
+ when -1 then false
13
+ when 0 then input < 192
14
+ when 1 then true
15
+ end
16
+ end
17
+
18
+ def single_byte?(input, buffered)
19
+ input.between? 1, 127
20
+ end
21
+
22
+ def multibyte_start?(input, buffered)
23
+ input.between? 192, 244
24
+ end
25
+
26
+ def multibyte?(input, buffered)
27
+ input.between? 128, 191
28
+ end
29
+
30
+ def multibyte_size(input, buffered)
31
+ case input
32
+ when 192..223 then 2
33
+ when 224..239 then 3
34
+ when 240..247 then 4
35
+ when 248..244 then 5
36
+ when 001..127 then 1
37
+ else 0
38
+ end
39
+ end
40
+ end
41
+
42
+ module UCS_2
43
+ extend self
44
+
45
+ def garbage?(input, buffered)
46
+ return false unless buffered.size + 1 == multibyte_size
47
+ input == 0 and buffered.all? { |b| b == 0 }
48
+ end
49
+
50
+ def single_byte?(input, buffered)
51
+ false
52
+ end
53
+
54
+ def multibyte_start?(input, buffered)
55
+ buffered.size % multibyte_size == 0
56
+ end
57
+
58
+ def multibyte?(input, buffered)
59
+ not multibyte_start? input, buffered
60
+ end
61
+
62
+ def multibyte_size(*)
63
+ 2
64
+ end
65
+ end
66
+
67
+ module UCS_4
68
+ include UCS_2
69
+ extend self
70
+
71
+ def garbage?(input, buffered)
72
+ super or input > 0x10FFFF
73
+ end
74
+
75
+ def multibyte_size(*)
76
+ 4
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,7 @@
1
+ module Cleaner
2
+ class Error < StandardError
3
+ end
4
+
5
+ class InvalidEncoding < Error
6
+ end
7
+ end
@@ -0,0 +1,3 @@
1
+ module Coder
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,33 @@
1
+ require 'coder/cleaner'
2
+
3
+ shared_examples Coder::Cleaner do
4
+ let(:encoding) { example.example_group.description }
5
+ subject { described_class.new(encoding) }
6
+
7
+ def self.cleans(from, to = from)
8
+ it "cleans #{from.inspect} to #{to.inspect}" do
9
+ subject.clean(from).should be == to
10
+ end
11
+ end
12
+
13
+ context "UTF-8" do
14
+ cleans "foo"
15
+ cleans ""
16
+ cleans "yummy 🍔 "
17
+
18
+ cleans "{foo \xC3 'bar'}", "{foo 'bar'}"
19
+ cleans "yummy\xE2 \xF0\x9F\x8D\x94 \x9F\x8D\x94", "yummy 🍔 "
20
+ end
21
+ end
22
+
23
+ describe Coder::Cleaner::Builtin do
24
+ it_behaves_like Coder::Cleaner if described_class.available?
25
+ end
26
+
27
+ describe Coder::Cleaner::Iconv do
28
+ it_behaves_like Coder::Cleaner if described_class.available?
29
+ end
30
+
31
+ describe Coder::Cleaner::Simple do
32
+ it_behaves_like Coder::Cleaner if described_class.available?
33
+ end
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: coder
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Konstantin Haase
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-09-21 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '2.11'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '2.11'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ description: handle encodings, no matter what
47
+ email:
48
+ - konstantin.mailinglists@googlemail.com
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - .gitignore
54
+ - .travis.yml
55
+ - Gemfile
56
+ - LICENSE.txt
57
+ - README.md
58
+ - Rakefile
59
+ - coder.gemspec
60
+ - lib/coder.rb
61
+ - lib/coder/cleaner.rb
62
+ - lib/coder/cleaner/builtin.rb
63
+ - lib/coder/cleaner/iconv.rb
64
+ - lib/coder/cleaner/simple.rb
65
+ - lib/coder/cleaner/simple/byte_buffer.rb
66
+ - lib/coder/cleaner/simple/encodings.rb
67
+ - lib/coder/error.rb
68
+ - lib/coder/version.rb
69
+ - spec/coder/cleaner_spec.rb
70
+ homepage: http://github.com/rkh/coder
71
+ licenses: []
72
+ post_install_message:
73
+ rdoc_options: []
74
+ require_paths:
75
+ - lib
76
+ required_ruby_version: !ruby/object:Gem::Requirement
77
+ none: false
78
+ requirements:
79
+ - - ! '>='
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ required_rubygems_version: !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ! '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ requirements: []
89
+ rubyforge_project:
90
+ rubygems_version: 1.8.23
91
+ signing_key:
92
+ specification_version: 3
93
+ summary: library to handle encodings
94
+ test_files:
95
+ - spec/coder/cleaner_spec.rb
96
+ has_rdoc: