charazard 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.markdown +4 -15
- data/charazard.gemspec +2 -0
- data/lib/charazard/io.rb +20 -0
- data/lib/charazard/version.rb +1 -1
- data/test/charazard_io_test.rb +13 -0
- metadata +33 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 78cecee8b3d4cbc404f29ca445fe056e50560d32
|
4
|
+
data.tar.gz: 28d6d6e0982f2250ace5d28bc3451b1d2b646d38
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b2f8ff14f35fad23fe61330df3f892a0a429799bbf24e936d20b9b94697834009154754c687d33b622ec46316e59613b991dac1b6b3ec62064434f5f9cd6018e
|
7
|
+
data.tar.gz: 59d6e5d5f2cf07daacbf9acb9e2ad36a1266f4d551090c3a9cd92ba6bd1609b4c1d74fa33066dd4f8ee4b7ee1caaa2be1cca02a7966268f2804ef0d72b5bee92
|
data/README.markdown
CHANGED
@@ -16,26 +16,15 @@ Charazard.fix_invalid_unicode_literals("\x93Smart quotes\x94 \xC3\x9Cber Unicode
|
|
16
16
|
|
17
17
|
`Charazard.fix_invalid_unicode_literals` can be used in combination with
|
18
18
|
[`filter_io`](https://github.com/jasoncodes/filter_io) to filter CSV streams.
|
19
|
-
|
19
|
+
Since this is such a common use case, Charazard includes a handy
|
20
|
+
[helper class](https://github.com/jasoncodes/charazard/blob/master/lib/charazard/io.rb):
|
20
21
|
|
21
22
|
``` ruby
|
22
|
-
require '
|
23
|
-
require 'charazard'
|
23
|
+
require 'charazard/io'
|
24
24
|
require 'csv'
|
25
25
|
|
26
26
|
File.open(filename, external_encoding: 'UTF-8') do |io|
|
27
|
-
io =
|
28
|
-
# fix invalid UTF-8 literals
|
29
|
-
data = Charazard.fix_invalid_unicode_literals(data)
|
30
|
-
|
31
|
-
# grab another chunk if the last character is a delimiter
|
32
|
-
raise FilterIO::NeedMoreData if data =~ /[\r\n]\z/ && !state.eof?
|
33
|
-
# normalise line endings to LF
|
34
|
-
data = data.gsub /\r\n|\r|\n/, "\n"
|
35
|
-
|
36
|
-
data
|
37
|
-
end
|
38
|
-
|
27
|
+
io = Charazard::IO.new(io)
|
39
28
|
CSV.parse(io, row_sep: "\n") do |row|
|
40
29
|
p row
|
41
30
|
end
|
data/charazard.gemspec
CHANGED
@@ -21,5 +21,7 @@ Gem::Specification.new do |spec|
|
|
21
21
|
|
22
22
|
spec.add_development_dependency 'bundler', '~> 1.6'
|
23
23
|
spec.add_development_dependency 'rake'
|
24
|
+
spec.add_development_dependency 'minitest'
|
25
|
+
spec.add_development_dependency 'filter_io'
|
24
26
|
spec.add_development_dependency 'pry'
|
25
27
|
end
|
data/lib/charazard/io.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'charazard'
|
2
|
+
require 'filter_io'
|
3
|
+
|
4
|
+
module Charazard
|
5
|
+
class IO < FilterIO
|
6
|
+
def initialize(io)
|
7
|
+
super do |data, state|
|
8
|
+
# fix invalid UTF-8 literals
|
9
|
+
data = Charazard.fix_invalid_unicode_literals(data)
|
10
|
+
|
11
|
+
# grab another chunk if the last character is a delimiter
|
12
|
+
raise FilterIO::NeedMoreData if data =~ /[\r\n]\z/ && !state.eof?
|
13
|
+
# normalise line endings to LF
|
14
|
+
data = data.gsub /\r\n|\r|\n/, "\n"
|
15
|
+
|
16
|
+
data
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/charazard/version.rb
CHANGED
@@ -0,0 +1,13 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'test_helper'
|
4
|
+
require 'charazard/io'
|
5
|
+
require 'csv'
|
6
|
+
|
7
|
+
describe Charazard::IO do
|
8
|
+
it 'converts mixed character encodings into valid UTF-8' do
|
9
|
+
src = StringIO.new "Name,Character\nEm dash,\x97\r\nSmart quotes,\x93Quoted String\x94\r"
|
10
|
+
dst = Charazard::IO.new(src)
|
11
|
+
assert_equal "Name,Character\nEm dash,—\nSmart quotes,“Quoted String”\n", dst.read
|
12
|
+
end
|
13
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: charazard
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jason Weathered
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-06-
|
11
|
+
date: 2014-06-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -52,6 +52,34 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: filter_io
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
55
83
|
- !ruby/object:Gem::Dependency
|
56
84
|
name: pry
|
57
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,7 +108,9 @@ files:
|
|
80
108
|
- Rakefile
|
81
109
|
- charazard.gemspec
|
82
110
|
- lib/charazard.rb
|
111
|
+
- lib/charazard/io.rb
|
83
112
|
- lib/charazard/version.rb
|
113
|
+
- test/charazard_io_test.rb
|
84
114
|
- test/charazard_test.rb
|
85
115
|
- test/test_helper.rb
|
86
116
|
homepage: https://github.com/jasoncodes/charazard
|
@@ -108,5 +138,6 @@ signing_key:
|
|
108
138
|
specification_version: 4
|
109
139
|
summary: Cleans up bad character encodings with liberal application of fire.
|
110
140
|
test_files:
|
141
|
+
- test/charazard_io_test.rb
|
111
142
|
- test/charazard_test.rb
|
112
143
|
- test/test_helper.rb
|