charazard 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.markdown +4 -15
- data/charazard.gemspec +2 -0
- data/lib/charazard/io.rb +20 -0
- data/lib/charazard/version.rb +1 -1
- data/test/charazard_io_test.rb +13 -0
- metadata +33 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 78cecee8b3d4cbc404f29ca445fe056e50560d32
|
4
|
+
data.tar.gz: 28d6d6e0982f2250ace5d28bc3451b1d2b646d38
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b2f8ff14f35fad23fe61330df3f892a0a429799bbf24e936d20b9b94697834009154754c687d33b622ec46316e59613b991dac1b6b3ec62064434f5f9cd6018e
|
7
|
+
data.tar.gz: 59d6e5d5f2cf07daacbf9acb9e2ad36a1266f4d551090c3a9cd92ba6bd1609b4c1d74fa33066dd4f8ee4b7ee1caaa2be1cca02a7966268f2804ef0d72b5bee92
|
data/README.markdown
CHANGED
@@ -16,26 +16,15 @@ Charazard.fix_invalid_unicode_literals("\x93Smart quotes\x94 \xC3\x9Cber Unicode
|
|
16
16
|
|
17
17
|
`Charazard.fix_invalid_unicode_literals` can be used in combination with
|
18
18
|
[`filter_io`](https://github.com/jasoncodes/filter_io) to filter CSV streams.
|
19
|
-
|
19
|
+
Since this is such a common use case, Charazard includes a handy
|
20
|
+
[helper class](https://github.com/jasoncodes/charazard/blob/master/lib/charazard/io.rb):
|
20
21
|
|
21
22
|
``` ruby
|
22
|
-
require '
|
23
|
-
require 'charazard'
|
23
|
+
require 'charazard/io'
|
24
24
|
require 'csv'
|
25
25
|
|
26
26
|
File.open(filename, external_encoding: 'UTF-8') do |io|
|
27
|
-
io =
|
28
|
-
# fix invalid UTF-8 literals
|
29
|
-
data = Charazard.fix_invalid_unicode_literals(data)
|
30
|
-
|
31
|
-
# grab another chunk if the last character is a delimiter
|
32
|
-
raise FilterIO::NeedMoreData if data =~ /[\r\n]\z/ && !state.eof?
|
33
|
-
# normalise line endings to LF
|
34
|
-
data = data.gsub /\r\n|\r|\n/, "\n"
|
35
|
-
|
36
|
-
data
|
37
|
-
end
|
38
|
-
|
27
|
+
io = Charazard::IO.new(io)
|
39
28
|
CSV.parse(io, row_sep: "\n") do |row|
|
40
29
|
p row
|
41
30
|
end
|
data/charazard.gemspec
CHANGED
@@ -21,5 +21,7 @@ Gem::Specification.new do |spec|
|
|
21
21
|
|
22
22
|
spec.add_development_dependency 'bundler', '~> 1.6'
|
23
23
|
spec.add_development_dependency 'rake'
|
24
|
+
spec.add_development_dependency 'minitest'
|
25
|
+
spec.add_development_dependency 'filter_io'
|
24
26
|
spec.add_development_dependency 'pry'
|
25
27
|
end
|
data/lib/charazard/io.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'charazard'
|
2
|
+
require 'filter_io'
|
3
|
+
|
4
|
+
module Charazard
|
5
|
+
class IO < FilterIO
|
6
|
+
def initialize(io)
|
7
|
+
super do |data, state|
|
8
|
+
# fix invalid UTF-8 literals
|
9
|
+
data = Charazard.fix_invalid_unicode_literals(data)
|
10
|
+
|
11
|
+
# grab another chunk if the last character is a delimiter
|
12
|
+
raise FilterIO::NeedMoreData if data =~ /[\r\n]\z/ && !state.eof?
|
13
|
+
# normalise line endings to LF
|
14
|
+
data = data.gsub /\r\n|\r|\n/, "\n"
|
15
|
+
|
16
|
+
data
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/charazard/version.rb
CHANGED
@@ -0,0 +1,13 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'test_helper'
|
4
|
+
require 'charazard/io'
|
5
|
+
require 'csv'
|
6
|
+
|
7
|
+
describe Charazard::IO do
|
8
|
+
it 'converts mixed character encodings into valid UTF-8' do
|
9
|
+
src = StringIO.new "Name,Character\nEm dash,\x97\r\nSmart quotes,\x93Quoted String\x94\r"
|
10
|
+
dst = Charazard::IO.new(src)
|
11
|
+
assert_equal "Name,Character\nEm dash,—\nSmart quotes,“Quoted String”\n", dst.read
|
12
|
+
end
|
13
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: charazard
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jason Weathered
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-06-
|
11
|
+
date: 2014-06-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -52,6 +52,34 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: filter_io
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
55
83
|
- !ruby/object:Gem::Dependency
|
56
84
|
name: pry
|
57
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,7 +108,9 @@ files:
|
|
80
108
|
- Rakefile
|
81
109
|
- charazard.gemspec
|
82
110
|
- lib/charazard.rb
|
111
|
+
- lib/charazard/io.rb
|
83
112
|
- lib/charazard/version.rb
|
113
|
+
- test/charazard_io_test.rb
|
84
114
|
- test/charazard_test.rb
|
85
115
|
- test/test_helper.rb
|
86
116
|
homepage: https://github.com/jasoncodes/charazard
|
@@ -108,5 +138,6 @@ signing_key:
|
|
108
138
|
specification_version: 4
|
109
139
|
summary: Cleans up bad character encodings with liberal application of fire.
|
110
140
|
test_files:
|
141
|
+
- test/charazard_io_test.rb
|
111
142
|
- test/charazard_test.rb
|
112
143
|
- test/test_helper.rb
|