csv_lazy 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +2 -0
- data/Gemfile.lock +2 -0
- data/VERSION +1 -1
- data/csv_lazy.gemspec +7 -3
- data/lib/csv_lazy.rb +70 -23
- data/spec/csv_lazy_spec.rb +37 -2
- data/spec/test2.csv +0 -0
- metadata +20 -3
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.5
|
data/csv_lazy.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "csv_lazy"
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.5"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Kasper Johansen"]
|
12
|
-
s.date = "2013-03-
|
12
|
+
s.date = "2013-03-25"
|
13
13
|
s.description = "A small CSV lib that skips whitespace-format-bugs and more."
|
14
14
|
s.email = "k@spernj.org"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -29,7 +29,8 @@ Gem::Specification.new do |s|
|
|
29
29
|
"lib/csv_lazy.rb",
|
30
30
|
"spec/csv_lazy_spec.rb",
|
31
31
|
"spec/spec_helper.rb",
|
32
|
-
"spec/test1.csv.gz"
|
32
|
+
"spec/test1.csv.gz",
|
33
|
+
"spec/test2.csv"
|
33
34
|
]
|
34
35
|
s.homepage = "http://github.com/kaspernj/csv_lazy"
|
35
36
|
s.licenses = ["MIT"]
|
@@ -41,17 +42,20 @@ Gem::Specification.new do |s|
|
|
41
42
|
s.specification_version = 3
|
42
43
|
|
43
44
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
45
|
+
s.add_runtime_dependency(%q<string_utils>, [">= 0"])
|
44
46
|
s.add_development_dependency(%q<rspec>, ["~> 2.8.0"])
|
45
47
|
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
46
48
|
s.add_development_dependency(%q<bundler>, [">= 1.0.0"])
|
47
49
|
s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
|
48
50
|
else
|
51
|
+
s.add_dependency(%q<string_utils>, [">= 0"])
|
49
52
|
s.add_dependency(%q<rspec>, ["~> 2.8.0"])
|
50
53
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
51
54
|
s.add_dependency(%q<bundler>, [">= 1.0.0"])
|
52
55
|
s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
|
53
56
|
end
|
54
57
|
else
|
58
|
+
s.add_dependency(%q<string_utils>, [">= 0"])
|
55
59
|
s.add_dependency(%q<rspec>, ["~> 2.8.0"])
|
56
60
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
57
61
|
s.add_dependency(%q<bundler>, [">= 1.0.0"])
|
data/lib/csv_lazy.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
require "string_utils"
|
3
|
+
|
1
4
|
#A simple library for parsing CSV-files through IO's. Solves corrupt file formats automatically like when files contains several spaces after a column and more.
|
2
5
|
class Csv_lazy
|
3
6
|
include Enumerable
|
@@ -13,7 +16,8 @@ class Csv_lazy
|
|
13
16
|
:quote_char => '"',
|
14
17
|
:row_sep => "\n",
|
15
18
|
:col_sep => ";",
|
16
|
-
:headers => false
|
19
|
+
:headers => false,
|
20
|
+
:buffer_length => 4096
|
17
21
|
}.merge(args)
|
18
22
|
|
19
23
|
@io = @args[:io]
|
@@ -22,9 +26,14 @@ class Csv_lazy
|
|
22
26
|
@debug = @args[:debug]
|
23
27
|
@encode = @args[:encode]
|
24
28
|
@mutex = Mutex.new
|
29
|
+
@buffer_length = @args[:buffer_length]
|
30
|
+
@escape_char = "\\"
|
31
|
+
@escaped_quote = "#{@escape_char}#{@args[:quote_char]}"
|
32
|
+
@escaped_quote_double = "#{@escape_char}#{@escape_char}#{@args[:quote_char]}"
|
33
|
+
|
25
34
|
#@debug = true
|
26
35
|
|
27
|
-
accepted = [:encode, :quote_char, :row_sep, :col_sep, :io, :debug, :headers]
|
36
|
+
accepted = [:encode, :quote_char, :row_sep, :col_sep, :io, :debug, :headers, :buffer_length]
|
28
37
|
@args.each do |key, val|
|
29
38
|
if accepted.index(key) == nil
|
30
39
|
raise "Unknown argument: '#{key}'."
|
@@ -60,24 +69,20 @@ class Csv_lazy
|
|
60
69
|
|
61
70
|
#Yields each row as an array.
|
62
71
|
def each
|
63
|
-
|
64
|
-
|
65
|
-
|
72
|
+
if block_given?
|
73
|
+
@mutex.synchronize do
|
74
|
+
while row = read_row
|
75
|
+
yield(row)
|
76
|
+
end
|
66
77
|
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
private
|
71
|
-
|
72
|
-
#Reads more content into the buffer.
|
73
|
-
def read_buffer
|
74
|
-
read = @io.gets
|
75
|
-
|
76
|
-
if !read
|
77
|
-
@eof = true
|
78
78
|
else
|
79
|
-
|
80
|
-
|
79
|
+
Enumerable.new do |yielder|
|
80
|
+
@mutex.synchronize do
|
81
|
+
while row = read_row
|
82
|
+
yielder << row
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
81
86
|
end
|
82
87
|
end
|
83
88
|
|
@@ -109,6 +114,20 @@ class Csv_lazy
|
|
109
114
|
end
|
110
115
|
end
|
111
116
|
|
117
|
+
private
|
118
|
+
|
119
|
+
#Reads more content into the buffer.
|
120
|
+
def read_buffer
|
121
|
+
read = @io.gets
|
122
|
+
|
123
|
+
if !read
|
124
|
+
@eof = true
|
125
|
+
else
|
126
|
+
read = read.encode(@encode) if @encode
|
127
|
+
@buffer << read
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
112
131
|
#Runs a regex against the buffer. If matched it also removes it from the buffer.
|
113
132
|
def read_remove_regex(regex)
|
114
133
|
if match = @buffer.match(regex)
|
@@ -130,23 +149,50 @@ class Csv_lazy
|
|
130
149
|
return false
|
131
150
|
end
|
132
151
|
|
152
|
+
def unescape(str)
|
153
|
+
return StringUtils.strtr(str, {
|
154
|
+
"\\\\" => "\\",
|
155
|
+
"\\t" => "\t",
|
156
|
+
"\\n" => "\n",
|
157
|
+
"\\\"" => "\""
|
158
|
+
})
|
159
|
+
end
|
160
|
+
|
133
161
|
#Adds the next column to the row. Returns true if more columns should be read or false if this was the end of the row.
|
134
162
|
def read_next_col
|
135
|
-
read_buffer if @buffer.length <
|
163
|
+
read_buffer if @buffer.length < @buffer_length
|
136
164
|
return false if @buffer.empty? and @eof
|
137
165
|
|
138
166
|
if @buffer.empty? or read_remove_regex(@regex_row_end)
|
139
167
|
return false
|
140
168
|
elsif match = read_remove_regex(@regex_begin_quote_char)
|
141
169
|
read = ""
|
170
|
+
col_content = ""
|
142
171
|
|
143
172
|
loop do
|
144
173
|
match_read = read_remove_regex(@regex_read_until_quote_char)
|
145
174
|
if !match_read
|
146
|
-
|
175
|
+
if @eof
|
176
|
+
add_col(@buffer) unless @buffer.empty?
|
177
|
+
@buffer = ""
|
178
|
+
break
|
179
|
+
else
|
180
|
+
read_buffer
|
181
|
+
end
|
147
182
|
else
|
148
|
-
|
149
|
-
|
183
|
+
all = match_read[0]
|
184
|
+
escaped_quote_char = all[-@escaped_quote.length, @escaped_quote.length]
|
185
|
+
double_escaped_quote_char = all[-@escaped_quote_double.length, @escaped_quote_double.length]
|
186
|
+
all_without_quote = match_read[1]
|
187
|
+
|
188
|
+
if escaped_quote_char == @escaped_quote and double_escaped_quote_char != @escaped_quote_double
|
189
|
+
#continue reading - the quote char is escaped.
|
190
|
+
col_content << all
|
191
|
+
else
|
192
|
+
col_content << match_read[1]
|
193
|
+
add_col(unescape(col_content))
|
194
|
+
break
|
195
|
+
end
|
150
196
|
end
|
151
197
|
end
|
152
198
|
|
@@ -182,9 +228,10 @@ class Csv_lazy
|
|
182
228
|
read_buffer
|
183
229
|
raise Errno::EAGAIN
|
184
230
|
else
|
185
|
-
raise "Dont know what to do with buffer: #{@buffer}"
|
231
|
+
raise "Dont know what to do with buffer: '#{@buffer}'."
|
186
232
|
end
|
187
233
|
rescue Errno::EAGAIN
|
234
|
+
puts "csv_lazy: Retry! Probably we ran out of buffer..." if @debug
|
188
235
|
retry
|
189
236
|
end
|
190
237
|
|
data/spec/csv_lazy_spec.rb
CHANGED
@@ -86,8 +86,8 @@ describe "CsvLazy" do
|
|
86
86
|
|
87
87
|
it "should be able to use headers and return hashes instead" do
|
88
88
|
cont = "\"name\",age\r\n"
|
89
|
-
cont
|
90
|
-
cont
|
89
|
+
cont << "\"Kasper Johansen\",27\r\n"
|
90
|
+
cont << "\"Christina Stoeckel\",\"25\"\r\n"
|
91
91
|
|
92
92
|
line = 0
|
93
93
|
Csv_lazy.new(:col_sep => ",", :io => StringIO.new(cont), :headers => true, :row_sep => "\r\n") do |csv|
|
@@ -109,4 +109,39 @@ describe "CsvLazy" do
|
|
109
109
|
|
110
110
|
line.should eql(2)
|
111
111
|
end
|
112
|
+
|
113
|
+
it "should be able to encode incoming strings from weird files without crashing" do
|
114
|
+
File.open("#{File.dirname(__FILE__)}/test2.csv", "rb", :encoding => "UTF-16LE") do |fp|
|
115
|
+
#Remove invalid UTF content.
|
116
|
+
fp.read(2)
|
117
|
+
|
118
|
+
Csv_lazy.new(:col_sep => ",", :io => fp, :headers => true, :row_sep => "\r\n", :quote_char => '"', :encode => "US-ASCII", :debug => false) do |csv|
|
119
|
+
csv.keys[0].should eql(:legacy_user_id)
|
120
|
+
csv.keys[1].should eql(:savings_percentage)
|
121
|
+
csv.keys[2].should eql(:active)
|
122
|
+
csv.keys.length.should eql(3)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
it "should do proper escaping" do
|
128
|
+
cont = "\"Test1\";\"Test2 \\\"Wee\\\"\"\r\n"
|
129
|
+
cont << "\"Test3\";\"Test4 \\\"Wee\\\"\";\"Test5 \\\"Wee\\\"\"\r\n"
|
130
|
+
|
131
|
+
csv = Csv_lazy.new(:col_sep => ";", :io => StringIO.new(cont), :row_sep => "\r\n")
|
132
|
+
|
133
|
+
row = csv.read_row
|
134
|
+
row[0].should eql("Test1")
|
135
|
+
row[1].should eql("Test2 \"Wee\"")
|
136
|
+
row.length.should eql(2)
|
137
|
+
|
138
|
+
row = csv.read_row
|
139
|
+
row[0].should eql("Test3")
|
140
|
+
row[1].should eql("Test4 \"Wee\"")
|
141
|
+
row[2].should eql("Test5 \"Wee\"")
|
142
|
+
row.length.should eql(3)
|
143
|
+
|
144
|
+
row = csv.read_row
|
145
|
+
row.should eql(false)
|
146
|
+
end
|
112
147
|
end
|
data/spec/test2.csv
ADDED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv_lazy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,8 +9,24 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-03-
|
12
|
+
date: 2013-03-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: string_utils
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
14
30
|
- !ruby/object:Gem::Dependency
|
15
31
|
name: rspec
|
16
32
|
requirement: !ruby/object:Gem::Requirement
|
@@ -96,6 +112,7 @@ files:
|
|
96
112
|
- spec/csv_lazy_spec.rb
|
97
113
|
- spec/spec_helper.rb
|
98
114
|
- spec/test1.csv.gz
|
115
|
+
- spec/test2.csv
|
99
116
|
homepage: http://github.com/kaspernj/csv_lazy
|
100
117
|
licenses:
|
101
118
|
- MIT
|
@@ -111,7 +128,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
111
128
|
version: '0'
|
112
129
|
segments:
|
113
130
|
- 0
|
114
|
-
hash:
|
131
|
+
hash: -1390404400491179462
|
115
132
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
116
133
|
none: false
|
117
134
|
requirements:
|