hippie_csv 0.0.11 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 07464f9018551eea7e183ef2ffd0e855b8a46d6a
4
- data.tar.gz: 98b28c4757768af0fe785127914bdc964a067cff
3
+ metadata.gz: aca3f93c2f4a521f946abc23a9bf258fa32e1e5f
4
+ data.tar.gz: e7bb585167d4a9cc7b351593650991e7383359a5
5
5
  SHA512:
6
- metadata.gz: 346448b5193bc9beeddee5ec1dfe066a795d0a8820b2dbf28910925bf461feaec1cbbcd886394bf8323d47d931cbfda6e540febe5d5a51935b6fc127c0b61573
7
- data.tar.gz: ec3278642148bc8597af5d5128fe2c9ac2231fa7b87c19dc40d3a0933ba0a41c0407464bcbe938609e3ea55c72587d39c3c50d56b04c280e4300bd8f5f27e44e
6
+ metadata.gz: 37be4a8ccff26295f9af6109207b8337c9bb0b9bddc392beb4ad7a668ff1102525dfb36e7eacdc462d5da6b61ac50de2681b2e6c3e3b3d2c8dbd46231dcd5748
7
+ data.tar.gz: bd0a83a02adf59c29fb7ebc8644b500d728552e902d04b4d5cb8262e8b485fa0c92d42b569ee6be6cafeeec5e65af566b9b809543fc0a7f96bc0d899f4f374cc
data/README.md CHANGED
@@ -31,14 +31,23 @@ Or install it yourself as:
31
31
 
32
32
  ## Usage
33
33
 
34
- Exposes two public methods: `read` (for paths to files), and `parse` (for
35
- strings).
34
+ Exposes three public methods:
35
+ 1. `.read` a file path to an array. Reads from the file all at once, building the whole CSV object in memory.
36
+ 2. `.parse` an in memory string to an array.
37
+ 3. `.stream` from a file path and parse line by line, calling a given block on each row.
38
+
39
+ **Note**: Processing large files using read or parse is a memory intensive operation. Use stream for parsing a CSV file line by line from the file to save memory. This method will use less memory but take longer, as we run each line through parse.
40
+
36
41
 
37
42
  ```ruby
38
43
  require 'hippie_csv'
39
44
 
40
45
  HippieCSV.read("path/to/data.csv")
41
46
 
47
+ HippieCSV.stream("path/to/data.csv") do |row|
48
+ # use row here...
49
+ end
50
+
42
51
  HippieCSV.parse(csv_string)
43
52
  ```
44
53
 
@@ -4,12 +4,15 @@ require "hippie_csv/errors"
4
4
 
5
5
  module HippieCSV
6
6
  def self.read(path)
7
- string = Support.file_path_to_string(path)
7
+ string = File.read(path, encoding: ENCODING_WITH_BOM)
8
8
  parse(string)
9
9
  end
10
10
 
11
11
  def self.parse(string)
12
- string = Support.encode(string)
13
12
  Support.maybe_parse(string) || (raise UnableToParseError)
14
13
  end
14
+
15
+ def self.stream(path, &block)
16
+ Support.maybe_stream(path, &block)
17
+ end
15
18
  end
@@ -4,10 +4,6 @@ require "rchardet"
4
4
  module HippieCSV
5
5
  module Support
6
6
  class << self
7
- def file_path_to_string(file_path)
8
- File.read(file_path, encoding: ENCODING_WITH_BOM)
9
- end
10
-
11
7
  def encode(string)
12
8
  string = ensure_valid_encoding(string)
13
9
 
@@ -19,8 +15,10 @@ module HippieCSV
19
15
  end
20
16
 
21
17
  def maybe_parse(string)
18
+ encoded_string = encode(string)
19
+
22
20
  QUOTE_CHARACTERS.find do |quote_character|
23
- [string, tolerate_escaping(string, quote_character), dump_quotes(string, quote_character)].find do |string_to_parse|
21
+ [encoded_string, tolerate_escaping(encoded_string, quote_character), dump_quotes(encoded_string, quote_character)].find do |string_to_parse|
24
22
  rescuing_malformed do
25
23
  return parse_csv(string_to_parse.squeeze("\n").strip, quote_character)
26
24
  end
@@ -36,6 +34,13 @@ module HippieCSV
36
34
  )
37
35
  end
38
36
 
37
+ def maybe_stream(path, &block)
38
+ File.foreach(path, encoding: ENCODING_WITH_BOM) do |line|
39
+ row = maybe_parse(line)
40
+ block.call(row.first) if row.first
41
+ end
42
+ end
43
+
39
44
  def dump_quotes(string, quote_character)
40
45
  string.gsub(quote_character, "")
41
46
  end
@@ -1,3 +1,3 @@
1
1
  module HippieCSV
2
- VERSION = "0.0.11"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -0,0 +1,2 @@
1
+ name,email
2
+ stephen,test@example.com
@@ -2,27 +2,6 @@ require "spec_helper"
2
2
 
3
3
  describe HippieCSV::Support do
4
4
 
5
- describe ".file_to_string" do
6
- let(:file_path) { fixture_path(:normal) }
7
- let(:result) { HippieCSV::Support.file_path_to_string(file_path) }
8
-
9
- it "provides a string" do
10
- expect(result.class).to eq String
11
- end
12
-
13
- it "reads the file" do
14
- expect(result.slice(0,8)).to eq 'id,email'
15
- end
16
-
17
- context "with a byte order mark" do
18
- let(:file_path) { fixture_path(:with_byte_order_mark) }
19
-
20
- it "works" do
21
- expect(result).to eq '"Name","Email Address","Date Added"'
22
- end
23
- end
24
- end
25
-
26
5
  describe ".encode" do
27
6
  context "with invalid byte sequence" do
28
7
  let(:string) { "\u0014\xFE\u000E\u0000" }
@@ -68,8 +47,23 @@ describe HippieCSV::Support do
68
47
  end
69
48
 
70
49
  describe ".maybe_parse" do
71
- it "needs to be written" do
72
- skip # TODO write this test
50
+ let(:file_path) { fixture_path(:small_file) }
51
+ it "works" do
52
+ expect(subject.maybe_parse(File.read(file_path))).to eq(
53
+ [["name", "email"], ["stephen", "test@example.com"]]
54
+ )
55
+ end
56
+ end
57
+
58
+ describe ".maybe_stream" do
59
+ let(:file_path) { fixture_path(:small_file) }
60
+ it "works" do
61
+ result = []
62
+ subject.maybe_stream(file_path) { |row| result << row }
63
+
64
+ expect(result).to eq(
65
+ [["name", "email"], ["stephen", "test@example.com"]]
66
+ )
73
67
  end
74
68
  end
75
69
 
@@ -3,7 +3,7 @@ require "spec_helper"
3
3
  describe HippieCSV do
4
4
 
5
5
  it "defines a version" do
6
- expect(HippieCSV::VERSION).to eq("0.0.11")
6
+ expect(HippieCSV::VERSION).to eq("0.1.0")
7
7
  end
8
8
 
9
9
  end
@@ -2,27 +2,28 @@ require "spec_helper"
2
2
  require "csv"
3
3
 
4
4
  describe HippieCSV do
5
+
5
6
  let(:string) { "test" }
6
7
 
7
8
  describe ".read" do
8
- let(:path) { double }
9
9
 
10
- it "converts to string and parses" do
11
- expect(subject::Support).to receive(:file_path_to_string).with(path).and_return(string)
12
- expect(subject).to receive(:parse).with(string)
10
+ it "reads and parses the file" do
11
+ path = fixture_path(:normal)
13
12
 
14
- subject.read(path)
13
+ result = subject.read(path)
14
+ expect(result.first[0..1]).to eq(["id", "email"])
15
15
  end
16
- end
17
16
 
18
- describe ".parse" do
19
- it "encodes the string" do
20
- expect(subject::Support).to receive(:encode).with(string)
21
- allow(subject::Support).to receive(:maybe_parse).and_return(double)
17
+ it "reads and parses the file with a byte order mark" do
18
+ path = fixture_path(:with_byte_order_mark)
22
19
 
23
- subject.parse(string)
20
+ result = subject.read(path)
21
+ expect(result).to eq([["Name", "Email Address", "Date Added"]])
24
22
  end
25
23
 
24
+ end
25
+
26
+ describe ".parse" do
26
27
  it "defers to support parse method" do
27
28
  result = double
28
29
  expect(subject::Support).to receive(:maybe_parse).with(string).and_return(result)
@@ -46,115 +47,156 @@ describe HippieCSV do
46
47
  end
47
48
  end
48
49
 
49
- context "integration cases: hard/encountered problems" do
50
- it "works when a BOM is present in the file" do
51
- path = fixture_path(:with_byte_order_mark)
52
- expect { CSV.read(path) }.to raise_error(CSV::MalformedCSVError)
50
+ describe ".stream" do
51
+ path = fixture_path(:normal)
52
+ let(:proc) { Proc.new {} }
53
53
 
54
- import = subject.read(path)
55
- expect(import[0]).to eq(["Name", "Email Address", "Date Added"])
56
- end
54
+ it "encodes the string" do
55
+ allow(subject::Support).to receive(:maybe_stream).and_return(double)
57
56
 
58
- it "works with a malformed CSV" do
59
- path = fixture_path(:malformed)
60
- expect { CSV.read(path) }.to raise_error(CSV::MalformedCSVError)
57
+ subject.stream(path, &proc)
58
+ end
61
59
 
62
- import = subject.read(path)
63
- expect(import[0]).to eq(%w(site lon lat max min precip snow snowdepth))
60
+ it "defers to support stream method" do
61
+ result = double
62
+ expect(subject::Support).to receive(:maybe_stream).with(path, &proc).and_return(result)
63
+ expect(subject.stream(path, &proc)).to eq(result)
64
64
  end
65
65
 
66
- it "works with odd encoding & emoji!" do
67
- path = fixture_path(:encoding)
68
- expect { CSV.read(path) }.to raise_error(ArgumentError)
66
+ it "works" do
67
+ path = fixture_path(:normal)
69
68
 
70
- import = subject.read(path)
71
- expect(import[0].count).to eq(4)
69
+ result = []
70
+ subject.stream(path) { |row| result << row }
71
+ expect(result[0]).to eq(["id", "email", "name", "country", "city", "created_at", "admin"])
72
72
  end
73
+ end
73
74
 
74
- it "works with an excel export" do
75
- path = fixture_path(:excel)
75
+ context "integration cases: hard/encountered problems" do
76
76
 
77
- import = subject.read(path)
78
- expect(import[0].count).to eq(24)
77
+ def read(path)
78
+ subject.read(path)
79
79
  end
80
80
 
81
- it "works with unescaped internal quotes" do
82
- path = fixture_path(:internal_quotes)
83
- expect { CSV.read(path) }.to raise_error(CSV::MalformedCSVError)
81
+ def stream(path)
82
+ [].tap do |rows|
83
+ subject.stream(path) do |row|
84
+ rows << row
85
+ end
86
+ end
87
+ end
84
88
 
85
- import = subject.read(path)
86
- expect(import[1][1]).to eq("123")
87
- expect(import[1][2]).to eq("James Jimmy Doe")
89
+ def subject_call_method(method, path)
90
+ send(method, path)
88
91
  end
89
92
 
90
- it "works with escaped quotes" do
91
- path = fixture_path(:escaped_quotes)
93
+ it "::read deals with a long, challenging file (and quickly)" do
94
+ start_time = Time.now
95
+ path = fixture_path(:never_ordered)
92
96
 
93
97
  import = subject.read(path)
94
- expect(import[0][1]).to eq("Lalo \"ElPapi\" Neymar")
95
- expect(import[0][2]).to eq("lalo@example.com")
98
+
99
+ expect(import[0].count).to eq(10)
100
+ expect(import.count).to eq(32803)
101
+ expect(Time.now).to be_within(5).of(start_time)
96
102
  end
97
103
 
98
- it "works with an invalid escaped quotes case" do
99
- path = fixture_path(:escaped_quotes_semicolons)
104
+ %w[read stream].each do |method|
105
+ it "::#{method} works when a BOM is present in the file" do
106
+ path = fixture_path(:with_byte_order_mark)
100
107
 
101
- import = subject.read(path)
102
- expect(import[0][0]).to eq("133")
103
- expect(import[0][1]).to eq("z3268856")
104
- expect(import[0][2]).to eq("stephen@example.com")
105
- end
108
+ import = subject_call_method(method, path)
109
+ expect(import[0]).to eq(["Name", "Email Address", "Date Added"])
110
+ end
106
111
 
107
- it "works for a complicated case involving bad newlines and quote chars" do
108
- path = fixture_path(:dos_line_ending)
112
+ it "::#{method} works with a malformed CSV" do
113
+ path = fixture_path(:malformed)
114
+ expect { CSV.read(path) }.to raise_error(CSV::MalformedCSVError)
109
115
 
110
- import = subject.read(path)
111
- expect(import[0].count).to eq(9)
112
- end
116
+ import = subject_call_method(method, path)
117
+ expect(import[0]).to eq(%w(site lon lat max min precip snow snowdepth))
118
+ end
113
119
 
114
- it "works for a hard case" do
115
- path = fixture_path(:accents_semicolon_windows_1252)
120
+ it "::#{method} works with odd encoding & emoji!" do
121
+ path = fixture_path(:encoding)
122
+ expect { CSV.read(path) }.to raise_error(ArgumentError)
116
123
 
117
- import = subject.read(path)
118
- expect(import[0][1]).to eq("Jérome")
119
- expect(import[1][0]).to eq("Héloise")
120
- end
124
+ import = subject_call_method(method, path)
125
+ expect(import[0].count).to eq(4)
126
+ end
121
127
 
122
- it "deals with a long, challenging file (and quickly)" do
123
- start_time = Time.now
124
- path = fixture_path(:never_ordered)
128
+ it "::#{method} works with an excel export" do
129
+ path = fixture_path(:excel)
125
130
 
126
- import = subject.read(path)
131
+ import = subject_call_method(method, path)
132
+ expect(import[0].count).to eq(24)
133
+ end
127
134
 
128
- expect(import[0].count).to eq(10)
129
- expect(import.count).to eq(32803)
130
- expect(Time.now).to be_within(5).of(start_time)
131
- end
135
+ it "::#{method} works with unescaped internal quotes" do
136
+ path = fixture_path(:internal_quotes)
137
+
138
+ import = subject_call_method(method, path)
139
+ expect(import[1][1]).to eq("123")
140
+ expect(import[1][2]).to eq("James Jimmy Doe")
141
+ end
132
142
 
133
- it "works when many invalid quote types contained" do
134
- path = fixture_path(:bad_quoting)
143
+ it "::#{method} works with escaped quotes" do
144
+ path = fixture_path(:escaped_quotes)
135
145
 
136
- expect { CSV.read(path) }.to raise_error(CSV::MalformedCSVError)
137
- expect {
138
- import = subject.read(path)
139
- expect(import.map(&:count).uniq).to eq([11])
140
- expect(import.count).to eq(8)
141
- }.not_to raise_error
142
- end
146
+ import = subject_call_method(method, path)
147
+ expect(import[0][1]).to eq("Lalo \"ElPapi\" Neymar")
148
+ expect(import[0][2]).to eq("lalo@example.com")
149
+ end
143
150
 
144
- it "strips leading/trailing blank lines" do
145
- path = fixture_path(:trailing_leading_blank_lines)
151
+ it "::#{method} works with an invalid escaped quotes case" do
152
+ path = fixture_path(:escaped_quotes_semicolons)
146
153
 
147
- import = subject.read(path)
148
- expect(import.first).not_to be_empty
149
- expect(import.last).not_to be_empty
150
- end
154
+ import = subject_call_method(method, path)
155
+ expect(import[0][0]).to eq("133")
156
+ expect(import[0][1]).to eq("z3268856")
157
+ expect(import[0][2]).to eq("stephen@example.com")
158
+ end
159
+
160
+ it "::#{method} works for a complicated case involving bad newlines and quote chars" do
161
+ path = fixture_path(:dos_line_ending)
162
+
163
+ import = subject_call_method(method, path)
164
+ expect(import[0].count).to eq(9)
165
+ end
166
+
167
+ it "::#{method} works for a hard case" do
168
+ path = fixture_path(:accents_semicolon_windows_1252)
169
+
170
+ import = subject_call_method(method, path)
171
+ expect(import[0][1]).to eq("Jérome")
172
+ expect(import[1][0]).to eq("Héloise")
173
+ end
174
+
175
+ it "::#{method} works when many invalid quote types contained" do
176
+ path = fixture_path(:bad_quoting)
177
+
178
+ expect {
179
+ import = subject_call_method(method, path)
180
+ expect(import.map(&:count).uniq).to eq([11])
181
+ expect(import.count).to eq(8)
182
+ }.not_to raise_error
183
+ end
184
+
185
+ it "::#{method} strips leading/trailing blank lines" do
186
+ path = fixture_path(:trailing_leading_blank_lines)
187
+
188
+ import = subject_call_method(method, path)
189
+ expect(import.first).not_to be_empty
190
+ expect(import.last).not_to be_empty
191
+ end
151
192
 
152
- it "maintains coherent column count when stripping blank lines" do
153
- [:blank_lines_crlf, :trailing_leading_blank_lines].each do |fixture_name|
154
- path = fixture_path(fixture_name)
193
+ it "::#{method} maintains coherent column count when stripping blank lines" do
194
+ [:blank_lines_crlf, :trailing_leading_blank_lines].each do |fixture_name|
195
+ path = fixture_path(fixture_name)
155
196
 
156
- import = subject.read(path)
157
- expect(import.map(&:length).uniq.size).to eq(1)
197
+ import = subject_call_method(method, path)
198
+ expect(import.map(&:length).uniq.size).to eq(1)
199
+ end
158
200
  end
159
201
  end
160
202
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hippie_csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.11
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stephen O'Brien
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-01 00:00:00.000000000 Z
11
+ date: 2017-10-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -111,6 +111,7 @@ files:
111
111
  - spec/fixtures/malformed.csv
112
112
  - spec/fixtures/never_ordered.csv
113
113
  - spec/fixtures/normal.csv
114
+ - spec/fixtures/small_file.csv
114
115
  - spec/fixtures/trailing_leading_blank_lines.csv
115
116
  - spec/fixtures/with_byte_order_mark.csv
116
117
  - spec/hippie_csv/constants_spec.rb
@@ -138,7 +139,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
138
139
  version: '0'
139
140
  requirements: []
140
141
  rubyforge_project:
141
- rubygems_version: 2.5.1
142
+ rubygems_version: 2.6.13
142
143
  signing_key:
143
144
  specification_version: 4
144
145
  summary: Tolerant, liberal CSV parsing
@@ -155,6 +156,7 @@ test_files:
155
156
  - spec/fixtures/malformed.csv
156
157
  - spec/fixtures/never_ordered.csv
157
158
  - spec/fixtures/normal.csv
159
+ - spec/fixtures/small_file.csv
158
160
  - spec/fixtures/trailing_leading_blank_lines.csv
159
161
  - spec/fixtures/with_byte_order_mark.csv
160
162
  - spec/hippie_csv/constants_spec.rb