hippie_csv 0.0.1 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 99c371eea25788cf2b155a089ef2378ce564a039
4
- data.tar.gz: 53c18182171b267f4c6a13f50b8672d39e177296
3
+ metadata.gz: 662fc68658433183bde7f943fff313555281db4a
4
+ data.tar.gz: c0202320766061ee14a7fc7077e3d85d9f0548d7
5
5
  SHA512:
6
- metadata.gz: 30ece254d7ba4e882d529c839fafa551979b3974d2af14ced46587d369a99ff94d9a3528bddac6ea174761a4430bd1d3b223717a5791cf8559218024da14f507
7
- data.tar.gz: d9ce69502180d757d890d69b30985a7200ab9611f1694bbb140c399c2e577764c07d2839a7e9cea856d8767de23dbe6a463e52f44645c3a3b089a8e49e3acf45
6
+ metadata.gz: 659b01340829dc9056ef86f5df2ad8eb6c70f5ed66977f39bc7720149c5274d0ea26861b4bc7db20c2bc72ff3ca9e9165e09dde31812deb486b0974cbeb2a4d8
7
+ data.tar.gz: 5c9118714c431e0385598759b22d6b7d3f90c5b490afb1b8aaf911620a9ae5031c9bce34d0a591b602e395bd4ab8fd35784ef9ecc364f6e3aabbab68fc41344a
data/README.md CHANGED
@@ -3,7 +3,7 @@
3
3
  [ ![Codeship Status for intercom/hippie_csv](https://codeship.com/projects/f3b188e0-f312-0132-75cb-5ed004d44c71/status?branch=master)](https://codeship.com/projects/85324)
4
4
 
5
5
  Ruby's `CSV` is great. It complies with the [proposed CSV spec](https://www.ietf.org/rfc/rfc4180.txt)
6
- pretty well. If you pass its methods bad or incompliant CSVs, it’ll rightfully
6
+ pretty well. If you pass its methods bad or non-compliant CSVs, it’ll rightfully
7
7
  and loudly complain. It’s great 👍
8
8
 
9
9
  Except…if you want to be able to deal with files from the real world. At
data/hippie_csv.gemspec CHANGED
@@ -21,4 +21,6 @@ Gem::Specification.new do |spec|
21
21
  spec.add_development_dependency "rake"
22
22
  spec.add_development_dependency "rspec"
23
23
  spec.add_development_dependency "pry"
24
+
25
+ spec.add_dependency "charlock_holmes"
24
26
  end
@@ -1,7 +1,7 @@
1
1
  module HippieCSV
2
2
  class UnableToParseError < StandardError
3
- def self.explain
4
- raise new "Something went wrong. Report this CSV: https://github.com/intercom/hippie_csv"
3
+ def initialize(msg = "Something went wrong. Report this CSV: https://github.com/intercom/hippie_csv")
4
+ super(msg)
5
5
  end
6
6
  end
7
7
  end
@@ -1,4 +1,5 @@
1
1
  require "hippie_csv/constants"
2
+ require "charlock_holmes"
2
3
 
3
4
  module HippieCSV
4
5
  module Support
@@ -7,13 +8,19 @@ module HippieCSV
7
8
  File.read(file_path, encoding: ENCODING_WITH_BOM)
8
9
  end
9
10
 
10
- def encode!(string)
11
+ def encode(string)
11
12
  unless string.valid_encoding?
12
- string.encode!(ALTERNATE_ENCODING, ENCODING, invalid: :replace, replace: "")
13
- string.encode!(ENCODING, ALTERNATE_ENCODING)
13
+ current_encoding = CharlockHolmes::EncodingDetector.detect(string)[:encoding]
14
+
15
+ string = if !current_encoding.nil?
16
+ CharlockHolmes::Converter.convert(string, current_encoding, ENCODING)
17
+ else
18
+ string.encode(ALTERNATE_ENCODING, ENCODING, invalid: :replace, replace: "")
19
+ .encode(ENCODING, ALTERNATE_ENCODING)
20
+ end
14
21
  end
15
22
 
16
- string.encode!(string.encoding, universal_newline: true)
23
+ string.encode(string.encoding, universal_newline: true)
17
24
  end
18
25
 
19
26
  def maybe_parse(string)
@@ -1,3 +1,3 @@
1
1
  module HippieCSV
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.3"
3
3
  end
data/lib/hippie_csv.rb CHANGED
@@ -9,7 +9,7 @@ module HippieCSV
9
9
  end
10
10
 
11
11
  def self.parse(string)
12
- Support.encode!(string)
13
- Support.maybe_parse(string) || UnableToParseError.explain
12
+ string = Support.encode(string)
13
+ Support.maybe_parse(string) || (raise UnableToParseError)
14
14
  end
15
15
  end
@@ -0,0 +1,3 @@
1
+ Vincent;J�rome;jvincent@example.com;06 02 21 32 91;Vincent J�rome;
2
+ H�lo�se;Martin ;martinheloise@example.com;06 12 81 12 28;;
3
+ Guzole;Virgile ;v.guzole@example.com ;02 23 23 20 10;;
@@ -23,7 +23,7 @@ describe HippieCSV::Support do
23
23
  end
24
24
  end
25
25
 
26
- describe ".encode!" do
26
+ describe ".encode" do
27
27
  context "with invalid byte sequence" do
28
28
  let(:string) { "\u0014\xFE\u000E\u0000" }
29
29
 
@@ -34,9 +34,7 @@ describe HippieCSV::Support do
34
34
  it "works" do
35
35
  expect(string).not_to be_valid_encoding
36
36
 
37
- subject.encode!(string)
38
-
39
- expect(string).to be_valid_encoding
37
+ expect(subject.encode(string)).to be_valid_encoding
40
38
  end
41
39
  end
42
40
 
@@ -48,9 +46,8 @@ describe HippieCSV::Support do
48
46
  end
49
47
 
50
48
  it "works" do
51
- subject.encode!(string)
49
+ result = CSV.parse(subject.encode(string))
52
50
 
53
- result = CSV.parse(string)
54
51
  rows, columns = result.size, result.first.size
55
52
 
56
53
  expect(rows).to eq(2)
@@ -3,7 +3,7 @@ require "spec_helper"
3
3
  describe HippieCSV do
4
4
 
5
5
  it "defines a version" do
6
- expect(HippieCSV::VERSION).to eq('0.0.1')
6
+ expect(HippieCSV::VERSION).to eq("0.0.3")
7
7
  end
8
8
 
9
9
  end
@@ -17,7 +17,8 @@ describe HippieCSV do
17
17
 
18
18
  describe ".parse" do
19
19
  it "encodes the string" do
20
- expect(subject::Support).to receive(:encode!).with(string)
20
+ expect(subject::Support).to receive(:encode).with(string)
21
+ allow(subject::Support).to receive(:maybe_parse).and_return(double)
21
22
 
22
23
  subject.parse(string)
23
24
  end
@@ -100,5 +101,13 @@ describe HippieCSV do
100
101
  import = subject.read(path)
101
102
  expect(import[0].count).to eq(9)
102
103
  end
104
+
105
+ it "works for a hard case" do
106
+ path = fixture_path(:accents_semicolon_windows_1252)
107
+
108
+ import = subject.read(path)
109
+ expect(import[0][1]).to eq("Jérome")
110
+ expect(import[1][0]).to eq("Héloïse")
111
+ end
103
112
  end
104
113
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hippie_csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stephen O'Brien
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-12 00:00:00.000000000 Z
11
+ date: 2015-06-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - '>='
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: charlock_holmes
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  description:
70
84
  email:
71
85
  - stephen@intercom.io
@@ -85,6 +99,7 @@ files:
85
99
  - lib/hippie_csv/errors.rb
86
100
  - lib/hippie_csv/support.rb
87
101
  - lib/hippie_csv/version.rb
102
+ - spec/fixtures/accents_semicolon_windows_1252.csv
88
103
  - spec/fixtures/dos_line_ending.csv
89
104
  - spec/fixtures/encoding.csv
90
105
  - spec/fixtures/escaped_quotes.csv
@@ -123,6 +138,7 @@ signing_key:
123
138
  specification_version: 4
124
139
  summary: Tolerant, liberal CSV parsing
125
140
  test_files:
141
+ - spec/fixtures/accents_semicolon_windows_1252.csv
126
142
  - spec/fixtures/dos_line_ending.csv
127
143
  - spec/fixtures/encoding.csv
128
144
  - spec/fixtures/escaped_quotes.csv