hippie_csv 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 99c371eea25788cf2b155a089ef2378ce564a039
4
- data.tar.gz: 53c18182171b267f4c6a13f50b8672d39e177296
3
+ metadata.gz: 662fc68658433183bde7f943fff313555281db4a
4
+ data.tar.gz: c0202320766061ee14a7fc7077e3d85d9f0548d7
5
5
  SHA512:
6
- metadata.gz: 30ece254d7ba4e882d529c839fafa551979b3974d2af14ced46587d369a99ff94d9a3528bddac6ea174761a4430bd1d3b223717a5791cf8559218024da14f507
7
- data.tar.gz: d9ce69502180d757d890d69b30985a7200ab9611f1694bbb140c399c2e577764c07d2839a7e9cea856d8767de23dbe6a463e52f44645c3a3b089a8e49e3acf45
6
+ metadata.gz: 659b01340829dc9056ef86f5df2ad8eb6c70f5ed66977f39bc7720149c5274d0ea26861b4bc7db20c2bc72ff3ca9e9165e09dde31812deb486b0974cbeb2a4d8
7
+ data.tar.gz: 5c9118714c431e0385598759b22d6b7d3f90c5b490afb1b8aaf911620a9ae5031c9bce34d0a591b602e395bd4ab8fd35784ef9ecc364f6e3aabbab68fc41344a
data/README.md CHANGED
@@ -3,7 +3,7 @@
3
3
  [ ![Codeship Status for intercom/hippie_csv](https://codeship.com/projects/f3b188e0-f312-0132-75cb-5ed004d44c71/status?branch=master)](https://codeship.com/projects/85324)
4
4
 
5
5
  Ruby's `CSV` is great. It complies with the [proposed CSV spec](https://www.ietf.org/rfc/rfc4180.txt)
6
- pretty well. If you pass its methods bad or incompliant CSVs, it’ll rightfully
6
+ pretty well. If you pass its methods bad or non-compliant CSVs, it’ll rightfully
7
7
  and loudly complain. It’s great 👍
8
8
 
9
9
  Except…if you want to be able to deal with files from the real world. At
data/hippie_csv.gemspec CHANGED
@@ -21,4 +21,6 @@ Gem::Specification.new do |spec|
21
21
  spec.add_development_dependency "rake"
22
22
  spec.add_development_dependency "rspec"
23
23
  spec.add_development_dependency "pry"
24
+
25
+ spec.add_dependency "charlock_holmes"
24
26
  end
@@ -1,7 +1,7 @@
1
1
  module HippieCSV
2
2
  class UnableToParseError < StandardError
3
- def self.explain
4
- raise new "Something went wrong. Report this CSV: https://github.com/intercom/hippie_csv"
3
+ def initialize(msg = "Something went wrong. Report this CSV: https://github.com/intercom/hippie_csv")
4
+ super(msg)
5
5
  end
6
6
  end
7
7
  end
@@ -1,4 +1,5 @@
1
1
  require "hippie_csv/constants"
2
+ require "charlock_holmes"
2
3
 
3
4
  module HippieCSV
4
5
  module Support
@@ -7,13 +8,19 @@ module HippieCSV
7
8
  File.read(file_path, encoding: ENCODING_WITH_BOM)
8
9
  end
9
10
 
10
- def encode!(string)
11
+ def encode(string)
11
12
  unless string.valid_encoding?
12
- string.encode!(ALTERNATE_ENCODING, ENCODING, invalid: :replace, replace: "")
13
- string.encode!(ENCODING, ALTERNATE_ENCODING)
13
+ current_encoding = CharlockHolmes::EncodingDetector.detect(string)[:encoding]
14
+
15
+ string = if !current_encoding.nil?
16
+ CharlockHolmes::Converter.convert(string, current_encoding, ENCODING)
17
+ else
18
+ string.encode(ALTERNATE_ENCODING, ENCODING, invalid: :replace, replace: "")
19
+ .encode(ENCODING, ALTERNATE_ENCODING)
20
+ end
14
21
  end
15
22
 
16
- string.encode!(string.encoding, universal_newline: true)
23
+ string.encode(string.encoding, universal_newline: true)
17
24
  end
18
25
 
19
26
  def maybe_parse(string)
@@ -1,3 +1,3 @@
1
1
  module HippieCSV
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.3"
3
3
  end
data/lib/hippie_csv.rb CHANGED
@@ -9,7 +9,7 @@ module HippieCSV
9
9
  end
10
10
 
11
11
  def self.parse(string)
12
- Support.encode!(string)
13
- Support.maybe_parse(string) || UnableToParseError.explain
12
+ string = Support.encode(string)
13
+ Support.maybe_parse(string) || (raise UnableToParseError)
14
14
  end
15
15
  end
@@ -0,0 +1,3 @@
1
+ Vincent;J�rome;jvincent@example.com;06 02 21 32 91;Vincent J�rome;
2
+ H�lo�se;Martin ;martinheloise@example.com;06 12 81 12 28;;
3
+ Guzole;Virgile ;v.guzole@example.com ;02 23 23 20 10;;
@@ -23,7 +23,7 @@ describe HippieCSV::Support do
23
23
  end
24
24
  end
25
25
 
26
- describe ".encode!" do
26
+ describe ".encode" do
27
27
  context "with invalid byte sequence" do
28
28
  let(:string) { "\u0014\xFE\u000E\u0000" }
29
29
 
@@ -34,9 +34,7 @@ describe HippieCSV::Support do
34
34
  it "works" do
35
35
  expect(string).not_to be_valid_encoding
36
36
 
37
- subject.encode!(string)
38
-
39
- expect(string).to be_valid_encoding
37
+ expect(subject.encode(string)).to be_valid_encoding
40
38
  end
41
39
  end
42
40
 
@@ -48,9 +46,8 @@ describe HippieCSV::Support do
48
46
  end
49
47
 
50
48
  it "works" do
51
- subject.encode!(string)
49
+ result = CSV.parse(subject.encode(string))
52
50
 
53
- result = CSV.parse(string)
54
51
  rows, columns = result.size, result.first.size
55
52
 
56
53
  expect(rows).to eq(2)
@@ -3,7 +3,7 @@ require "spec_helper"
3
3
  describe HippieCSV do
4
4
 
5
5
  it "defines a version" do
6
- expect(HippieCSV::VERSION).to eq('0.0.1')
6
+ expect(HippieCSV::VERSION).to eq("0.0.3")
7
7
  end
8
8
 
9
9
  end
@@ -17,7 +17,8 @@ describe HippieCSV do
17
17
 
18
18
  describe ".parse" do
19
19
  it "encodes the string" do
20
- expect(subject::Support).to receive(:encode!).with(string)
20
+ expect(subject::Support).to receive(:encode).with(string)
21
+ allow(subject::Support).to receive(:maybe_parse).and_return(double)
21
22
 
22
23
  subject.parse(string)
23
24
  end
@@ -100,5 +101,13 @@ describe HippieCSV do
100
101
  import = subject.read(path)
101
102
  expect(import[0].count).to eq(9)
102
103
  end
104
+
105
+ it "works for a hard case" do
106
+ path = fixture_path(:accents_semicolon_windows_1252)
107
+
108
+ import = subject.read(path)
109
+ expect(import[0][1]).to eq("Jérome")
110
+ expect(import[1][0]).to eq("Héloïse")
111
+ end
103
112
  end
104
113
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hippie_csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stephen O'Brien
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-12 00:00:00.000000000 Z
11
+ date: 2015-06-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - '>='
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: charlock_holmes
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  description:
70
84
  email:
71
85
  - stephen@intercom.io
@@ -85,6 +99,7 @@ files:
85
99
  - lib/hippie_csv/errors.rb
86
100
  - lib/hippie_csv/support.rb
87
101
  - lib/hippie_csv/version.rb
102
+ - spec/fixtures/accents_semicolon_windows_1252.csv
88
103
  - spec/fixtures/dos_line_ending.csv
89
104
  - spec/fixtures/encoding.csv
90
105
  - spec/fixtures/escaped_quotes.csv
@@ -123,6 +138,7 @@ signing_key:
123
138
  specification_version: 4
124
139
  summary: Tolerant, liberal CSV parsing
125
140
  test_files:
141
+ - spec/fixtures/accents_semicolon_windows_1252.csv
126
142
  - spec/fixtures/dos_line_ending.csv
127
143
  - spec/fixtures/encoding.csv
128
144
  - spec/fixtures/escaped_quotes.csv