hippie_csv 0.0.1 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/hippie_csv.gemspec +2 -0
- data/lib/hippie_csv/errors.rb +2 -2
- data/lib/hippie_csv/support.rb +11 -4
- data/lib/hippie_csv/version.rb +1 -1
- data/lib/hippie_csv.rb +2 -2
- data/spec/fixtures/accents_semicolon_windows_1252.csv +3 -0
- data/spec/hippie_csv/support_spec.rb +3 -6
- data/spec/hippie_csv/version_spec.rb +1 -1
- data/spec/hippie_csv_spec.rb +10 -1
- metadata +18 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 662fc68658433183bde7f943fff313555281db4a
|
4
|
+
data.tar.gz: c0202320766061ee14a7fc7077e3d85d9f0548d7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 659b01340829dc9056ef86f5df2ad8eb6c70f5ed66977f39bc7720149c5274d0ea26861b4bc7db20c2bc72ff3ca9e9165e09dde31812deb486b0974cbeb2a4d8
|
7
|
+
data.tar.gz: 5c9118714c431e0385598759b22d6b7d3f90c5b490afb1b8aaf911620a9ae5031c9bce34d0a591b602e395bd4ab8fd35784ef9ecc364f6e3aabbab68fc41344a
|
data/README.md
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
[ ![Codeship Status for intercom/hippie_csv](https://codeship.com/projects/f3b188e0-f312-0132-75cb-5ed004d44c71/status?branch=master)](https://codeship.com/projects/85324)
|
4
4
|
|
5
5
|
Ruby's `CSV` is great. It complies with the [proposed CSV spec](https://www.ietf.org/rfc/rfc4180.txt)
|
6
|
-
pretty well. If you pass its methods bad or
|
6
|
+
pretty well. If you pass its methods bad or non-compliant CSVs, it’ll rightfully
|
7
7
|
and loudly complain. It’s great 👍
|
8
8
|
|
9
9
|
Except…if you want to be able to deal with files from the real world. At
|
data/hippie_csv.gemspec
CHANGED
data/lib/hippie_csv/errors.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module HippieCSV
|
2
2
|
class UnableToParseError < StandardError
|
3
|
-
def
|
4
|
-
|
3
|
+
def initialize(msg = "Something went wrong. Report this CSV: https://github.com/intercom/hippie_csv")
|
4
|
+
super(msg)
|
5
5
|
end
|
6
6
|
end
|
7
7
|
end
|
data/lib/hippie_csv/support.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require "hippie_csv/constants"
|
2
|
+
require "charlock_holmes"
|
2
3
|
|
3
4
|
module HippieCSV
|
4
5
|
module Support
|
@@ -7,13 +8,19 @@ module HippieCSV
|
|
7
8
|
File.read(file_path, encoding: ENCODING_WITH_BOM)
|
8
9
|
end
|
9
10
|
|
10
|
-
def encode
|
11
|
+
def encode(string)
|
11
12
|
unless string.valid_encoding?
|
12
|
-
|
13
|
-
|
13
|
+
current_encoding = CharlockHolmes::EncodingDetector.detect(string)[:encoding]
|
14
|
+
|
15
|
+
string = if !current_encoding.nil?
|
16
|
+
CharlockHolmes::Converter.convert(string, current_encoding, ENCODING)
|
17
|
+
else
|
18
|
+
string.encode(ALTERNATE_ENCODING, ENCODING, invalid: :replace, replace: "")
|
19
|
+
.encode(ENCODING, ALTERNATE_ENCODING)
|
20
|
+
end
|
14
21
|
end
|
15
22
|
|
16
|
-
string.encode
|
23
|
+
string.encode(string.encoding, universal_newline: true)
|
17
24
|
end
|
18
25
|
|
19
26
|
def maybe_parse(string)
|
data/lib/hippie_csv/version.rb
CHANGED
data/lib/hippie_csv.rb
CHANGED
@@ -23,7 +23,7 @@ describe HippieCSV::Support do
|
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
-
describe ".encode
|
26
|
+
describe ".encode" do
|
27
27
|
context "with invalid byte sequence" do
|
28
28
|
let(:string) { "\u0014\xFE\u000E\u0000" }
|
29
29
|
|
@@ -34,9 +34,7 @@ describe HippieCSV::Support do
|
|
34
34
|
it "works" do
|
35
35
|
expect(string).not_to be_valid_encoding
|
36
36
|
|
37
|
-
subject.encode
|
38
|
-
|
39
|
-
expect(string).to be_valid_encoding
|
37
|
+
expect(subject.encode(string)).to be_valid_encoding
|
40
38
|
end
|
41
39
|
end
|
42
40
|
|
@@ -48,9 +46,8 @@ describe HippieCSV::Support do
|
|
48
46
|
end
|
49
47
|
|
50
48
|
it "works" do
|
51
|
-
subject.encode
|
49
|
+
result = CSV.parse(subject.encode(string))
|
52
50
|
|
53
|
-
result = CSV.parse(string)
|
54
51
|
rows, columns = result.size, result.first.size
|
55
52
|
|
56
53
|
expect(rows).to eq(2)
|
data/spec/hippie_csv_spec.rb
CHANGED
@@ -17,7 +17,8 @@ describe HippieCSV do
|
|
17
17
|
|
18
18
|
describe ".parse" do
|
19
19
|
it "encodes the string" do
|
20
|
-
expect(subject::Support).to receive(:encode
|
20
|
+
expect(subject::Support).to receive(:encode).with(string)
|
21
|
+
allow(subject::Support).to receive(:maybe_parse).and_return(double)
|
21
22
|
|
22
23
|
subject.parse(string)
|
23
24
|
end
|
@@ -100,5 +101,13 @@ describe HippieCSV do
|
|
100
101
|
import = subject.read(path)
|
101
102
|
expect(import[0].count).to eq(9)
|
102
103
|
end
|
104
|
+
|
105
|
+
it "works for a hard case" do
|
106
|
+
path = fixture_path(:accents_semicolon_windows_1252)
|
107
|
+
|
108
|
+
import = subject.read(path)
|
109
|
+
expect(import[0][1]).to eq("Jérome")
|
110
|
+
expect(import[1][0]).to eq("Héloïse")
|
111
|
+
end
|
103
112
|
end
|
104
113
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hippie_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stephen O'Brien
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-06-
|
11
|
+
date: 2015-06-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - '>='
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: charlock_holmes
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
69
83
|
description:
|
70
84
|
email:
|
71
85
|
- stephen@intercom.io
|
@@ -85,6 +99,7 @@ files:
|
|
85
99
|
- lib/hippie_csv/errors.rb
|
86
100
|
- lib/hippie_csv/support.rb
|
87
101
|
- lib/hippie_csv/version.rb
|
102
|
+
- spec/fixtures/accents_semicolon_windows_1252.csv
|
88
103
|
- spec/fixtures/dos_line_ending.csv
|
89
104
|
- spec/fixtures/encoding.csv
|
90
105
|
- spec/fixtures/escaped_quotes.csv
|
@@ -123,6 +138,7 @@ signing_key:
|
|
123
138
|
specification_version: 4
|
124
139
|
summary: Tolerant, liberal CSV parsing
|
125
140
|
test_files:
|
141
|
+
- spec/fixtures/accents_semicolon_windows_1252.csv
|
126
142
|
- spec/fixtures/dos_line_ending.csv
|
127
143
|
- spec/fixtures/encoding.csv
|
128
144
|
- spec/fixtures/escaped_quotes.csv
|