encoding_checker 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +18 -16
- data/encoding_checker.gemspec +1 -0
- data/lib/encoding_checker/version.rb +1 -1
- data/lib/encoding_checker.rb +1 -1
- data/spec/encoding_checker_spec.rb +14 -1
- data/spec/fixture.dat +0 -0
- metadata +17 -4
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# EncodingChecker
|
2
2
|
|
3
|
+
[![Build Status](https://secure.travis-ci.org/fxposter/encoding_checker.png?branch=master)](http://travis-ci.org/fxposter/encoding_checker)
|
4
|
+
|
3
5
|
When you need to parse some text files - you need to be sure, that they are in some particular encoding
|
4
6
|
before actually parsing them. For example, some symbols are invalid for UTF-8 encoding, but nevertheless
|
5
7
|
files which are mainly in UTF-8 can contain some invalid characters and many of editors will not show you that.
|
@@ -12,34 +14,34 @@ Maybe sometimes I'll add 1.8.x support through iconv library, but for now 1.8.x
|
|
12
14
|
|
13
15
|
Add this line to your application's Gemfile:
|
14
16
|
|
15
|
-
|
17
|
+
gem 'encoding_checker'
|
16
18
|
|
17
19
|
And then execute:
|
18
20
|
|
19
|
-
|
21
|
+
$ bundle
|
20
22
|
|
21
23
|
Or install it yourself as:
|
22
24
|
|
23
|
-
|
25
|
+
$ gem install encoding_checker
|
24
26
|
|
25
27
|
## Usage
|
26
28
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
29
|
+
# instantiate checker with encoding name
|
30
|
+
checker = EncodingChecker.new("utf-8")
|
31
|
+
# check any particular text
|
32
|
+
result = checker.check("some string with wrong\xA0symbol")
|
33
|
+
|
34
|
+
unless result.empty?
|
35
|
+
result.invalid_lines.each do |line|
|
36
|
+
# use line.content, line.index and line.invalid_characters
|
37
|
+
line.invalid_characters.each do |character|
|
38
|
+
# use character.content and character.index
|
39
|
+
end
|
37
40
|
end
|
38
41
|
end
|
39
|
-
end
|
40
42
|
|
41
|
-
|
42
|
-
|
43
|
+
# raises EncodingChecker::Error
|
44
|
+
checker.check!("some string with wrong\xA0symbol")
|
43
45
|
|
44
46
|
Read the specs for more information.
|
45
47
|
|
data/encoding_checker.gemspec
CHANGED
data/lib/encoding_checker.rb
CHANGED
@@ -2,12 +2,25 @@
|
|
2
2
|
require 'spec_helper'
|
3
3
|
|
4
4
|
describe EncodingChecker do
|
5
|
-
let(:
|
5
|
+
let(:encoding) { 'utf-8' }
|
6
|
+
let(:checker) { EncodingChecker.new(encoding) }
|
6
7
|
let(:valid_string) { "some string with only_right symbols" }
|
7
8
|
let(:invalid_symbol) { "\xA0" }
|
8
9
|
let(:invalid_string) { "some string with wrong#{invalid_symbol}symbol" }
|
9
10
|
let(:invalid_text) { [valid_string, invalid_string].join("\n") }
|
10
11
|
|
12
|
+
describe 'utf-16' do
|
13
|
+
let(:encoding) { 'utf-16le' }
|
14
|
+
|
15
|
+
it "doesn't raise error when making error message" do
|
16
|
+
begin
|
17
|
+
checker.check!(File.read('spec/fixture.dat'))
|
18
|
+
rescue => e
|
19
|
+
expect { e.to_s }.not_to raise_error
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
11
24
|
describe '#check(string)' do
|
12
25
|
it 'returns result which contains invalid lines and characters in them' do
|
13
26
|
result = checker.check(invalid_text)
|
data/spec/fixture.dat
ADDED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: encoding_checker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-03-
|
12
|
+
date: 2012-03-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &70365888012520 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,7 +21,18 @@ dependencies:
|
|
21
21
|
version: 2.8.0
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70365888012520
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rake
|
27
|
+
requirement: &70365888012080 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70365888012080
|
25
36
|
description: This gem will helps you identify lines and characters of the text which
|
26
37
|
are invalid for particular encoding
|
27
38
|
email:
|
@@ -42,6 +53,7 @@ files:
|
|
42
53
|
- lib/encoding_checker.rb
|
43
54
|
- lib/encoding_checker/version.rb
|
44
55
|
- spec/encoding_checker_spec.rb
|
56
|
+
- spec/fixture.dat
|
45
57
|
- spec/spec_helper.rb
|
46
58
|
homepage: https://github.com/fxposter/encoding_checker
|
47
59
|
licenses: []
|
@@ -74,4 +86,5 @@ summary: When you need to parse some text files - you need to be sure, that they
|
|
74
86
|
for particular encoding.
|
75
87
|
test_files:
|
76
88
|
- spec/encoding_checker_spec.rb
|
89
|
+
- spec/fixture.dat
|
77
90
|
- spec/spec_helper.rb
|