csv_lazy 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -3,6 +3,8 @@ source "http://rubygems.org"
3
3
  # Example:
4
4
  # gem "activesupport", ">= 2.3.5"
5
5
 
6
+ gem "string_utils"
7
+
6
8
  # Add dependencies to develop your gem here.
7
9
  # Include everything needed to run rake, tests, features, etc.
8
10
  group :development do
data/Gemfile.lock CHANGED
@@ -20,6 +20,7 @@ GEM
20
20
  rspec-expectations (2.8.0)
21
21
  diff-lcs (~> 1.1.2)
22
22
  rspec-mocks (2.8.0)
23
+ string_utils ()
23
24
 
24
25
  PLATFORMS
25
26
  ruby
@@ -29,3 +30,4 @@ DEPENDENCIES
29
30
  jeweler (~> 1.8.4)
30
31
  rdoc (~> 3.12)
31
32
  rspec (~> 2.8.0)
33
+ string_utils
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.4
1
+ 0.0.5
data/csv_lazy.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "csv_lazy"
8
- s.version = "0.0.4"
8
+ s.version = "0.0.5"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Kasper Johansen"]
12
- s.date = "2013-03-22"
12
+ s.date = "2013-03-25"
13
13
  s.description = "A small CSV lib that skips whitespace-format-bugs and more."
14
14
  s.email = "k@spernj.org"
15
15
  s.extra_rdoc_files = [
@@ -29,7 +29,8 @@ Gem::Specification.new do |s|
29
29
  "lib/csv_lazy.rb",
30
30
  "spec/csv_lazy_spec.rb",
31
31
  "spec/spec_helper.rb",
32
- "spec/test1.csv.gz"
32
+ "spec/test1.csv.gz",
33
+ "spec/test2.csv"
33
34
  ]
34
35
  s.homepage = "http://github.com/kaspernj/csv_lazy"
35
36
  s.licenses = ["MIT"]
@@ -41,17 +42,20 @@ Gem::Specification.new do |s|
41
42
  s.specification_version = 3
42
43
 
43
44
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
45
+ s.add_runtime_dependency(%q<string_utils>, [">= 0"])
44
46
  s.add_development_dependency(%q<rspec>, ["~> 2.8.0"])
45
47
  s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
46
48
  s.add_development_dependency(%q<bundler>, [">= 1.0.0"])
47
49
  s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
48
50
  else
51
+ s.add_dependency(%q<string_utils>, [">= 0"])
49
52
  s.add_dependency(%q<rspec>, ["~> 2.8.0"])
50
53
  s.add_dependency(%q<rdoc>, ["~> 3.12"])
51
54
  s.add_dependency(%q<bundler>, [">= 1.0.0"])
52
55
  s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
53
56
  end
54
57
  else
58
+ s.add_dependency(%q<string_utils>, [">= 0"])
55
59
  s.add_dependency(%q<rspec>, ["~> 2.8.0"])
56
60
  s.add_dependency(%q<rdoc>, ["~> 3.12"])
57
61
  s.add_dependency(%q<bundler>, [">= 1.0.0"])
data/lib/csv_lazy.rb CHANGED
@@ -1,3 +1,6 @@
1
+ #encoding: utf-8
2
+ require "string_utils"
3
+
1
4
  #A simple library for parsing CSV-files through IO's. Solves corrupt file formats automatically like when files contains several spaces after a column and more.
2
5
  class Csv_lazy
3
6
  include Enumerable
@@ -13,7 +16,8 @@ class Csv_lazy
13
16
  :quote_char => '"',
14
17
  :row_sep => "\n",
15
18
  :col_sep => ";",
16
- :headers => false
19
+ :headers => false,
20
+ :buffer_length => 4096
17
21
  }.merge(args)
18
22
 
19
23
  @io = @args[:io]
@@ -22,9 +26,14 @@ class Csv_lazy
22
26
  @debug = @args[:debug]
23
27
  @encode = @args[:encode]
24
28
  @mutex = Mutex.new
29
+ @buffer_length = @args[:buffer_length]
30
+ @escape_char = "\\"
31
+ @escaped_quote = "#{@escape_char}#{@args[:quote_char]}"
32
+ @escaped_quote_double = "#{@escape_char}#{@escape_char}#{@args[:quote_char]}"
33
+
25
34
  #@debug = true
26
35
 
27
- accepted = [:encode, :quote_char, :row_sep, :col_sep, :io, :debug, :headers]
36
+ accepted = [:encode, :quote_char, :row_sep, :col_sep, :io, :debug, :headers, :buffer_length]
28
37
  @args.each do |key, val|
29
38
  if accepted.index(key) == nil
30
39
  raise "Unknown argument: '#{key}'."
@@ -60,24 +69,20 @@ class Csv_lazy
60
69
 
61
70
  #Yields each row as an array.
62
71
  def each
63
- @mutex.synchronize do
64
- while row = read_row
65
- yield(row)
72
+ if block_given?
73
+ @mutex.synchronize do
74
+ while row = read_row
75
+ yield(row)
76
+ end
66
77
  end
67
- end
68
- end
69
-
70
- private
71
-
72
- #Reads more content into the buffer.
73
- def read_buffer
74
- read = @io.gets
75
-
76
- if !read
77
- @eof = true
78
78
  else
79
- read = read.encode(@encode) if @encode
80
- @buffer << read
79
+ Enumerable.new do |yielder|
80
+ @mutex.synchronize do
81
+ while row = read_row
82
+ yielder << row
83
+ end
84
+ end
85
+ end
81
86
  end
82
87
  end
83
88
 
@@ -109,6 +114,20 @@ class Csv_lazy
109
114
  end
110
115
  end
111
116
 
117
+ private
118
+
119
+ #Reads more content into the buffer.
120
+ def read_buffer
121
+ read = @io.gets
122
+
123
+ if !read
124
+ @eof = true
125
+ else
126
+ read = read.encode(@encode) if @encode
127
+ @buffer << read
128
+ end
129
+ end
130
+
112
131
  #Runs a regex against the buffer. If matched it also removes it from the buffer.
113
132
  def read_remove_regex(regex)
114
133
  if match = @buffer.match(regex)
@@ -130,23 +149,50 @@ class Csv_lazy
130
149
  return false
131
150
  end
132
151
 
152
+ def unescape(str)
153
+ return StringUtils.strtr(str, {
154
+ "\\\\" => "\\",
155
+ "\\t" => "\t",
156
+ "\\n" => "\n",
157
+ "\\\"" => "\""
158
+ })
159
+ end
160
+
133
161
  #Adds the next column to the row. Returns true if more columns should be read or false if this was the end of the row.
134
162
  def read_next_col
135
- read_buffer if @buffer.length < 4096
163
+ read_buffer if @buffer.length < @buffer_length
136
164
  return false if @buffer.empty? and @eof
137
165
 
138
166
  if @buffer.empty? or read_remove_regex(@regex_row_end)
139
167
  return false
140
168
  elsif match = read_remove_regex(@regex_begin_quote_char)
141
169
  read = ""
170
+ col_content = ""
142
171
 
143
172
  loop do
144
173
  match_read = read_remove_regex(@regex_read_until_quote_char)
145
174
  if !match_read
146
- read_buffer
175
+ if @eof
176
+ add_col(@buffer) unless @buffer.empty?
177
+ @buffer = ""
178
+ break
179
+ else
180
+ read_buffer
181
+ end
147
182
  else
148
- add_col(match_read[1])
149
- break
183
+ all = match_read[0]
184
+ escaped_quote_char = all[-@escaped_quote.length, @escaped_quote.length]
185
+ double_escaped_quote_char = all[-@escaped_quote_double.length, @escaped_quote_double.length]
186
+ all_without_quote = match_read[1]
187
+
188
+ if escaped_quote_char == @escaped_quote and double_escaped_quote_char != @escaped_quote_double
189
+ #continue reading - the quote char is escaped.
190
+ col_content << all
191
+ else
192
+ col_content << match_read[1]
193
+ add_col(unescape(col_content))
194
+ break
195
+ end
150
196
  end
151
197
  end
152
198
 
@@ -182,9 +228,10 @@ class Csv_lazy
182
228
  read_buffer
183
229
  raise Errno::EAGAIN
184
230
  else
185
- raise "Dont know what to do with buffer: #{@buffer}"
231
+ raise "Dont know what to do with buffer: '#{@buffer}'."
186
232
  end
187
233
  rescue Errno::EAGAIN
234
+ puts "csv_lazy: Retry! Probably we ran out of buffer..." if @debug
188
235
  retry
189
236
  end
190
237
 
@@ -86,8 +86,8 @@ describe "CsvLazy" do
86
86
 
87
87
  it "should be able to use headers and return hashes instead" do
88
88
  cont = "\"name\",age\r\n"
89
- cont += "\"Kasper Johansen\",27\r\n"
90
- cont += "\"Christina Stoeckel\",\"25\"\r\n"
89
+ cont << "\"Kasper Johansen\",27\r\n"
90
+ cont << "\"Christina Stoeckel\",\"25\"\r\n"
91
91
 
92
92
  line = 0
93
93
  Csv_lazy.new(:col_sep => ",", :io => StringIO.new(cont), :headers => true, :row_sep => "\r\n") do |csv|
@@ -109,4 +109,39 @@ describe "CsvLazy" do
109
109
 
110
110
  line.should eql(2)
111
111
  end
112
+
113
+ it "should be able to encode incoming strings from weird files without crashing" do
114
+ File.open("#{File.dirname(__FILE__)}/test2.csv", "rb", :encoding => "UTF-16LE") do |fp|
115
+ #Remove invalid UTF content.
116
+ fp.read(2)
117
+
118
+ Csv_lazy.new(:col_sep => ",", :io => fp, :headers => true, :row_sep => "\r\n", :quote_char => '"', :encode => "US-ASCII", :debug => false) do |csv|
119
+ csv.keys[0].should eql(:legacy_user_id)
120
+ csv.keys[1].should eql(:savings_percentage)
121
+ csv.keys[2].should eql(:active)
122
+ csv.keys.length.should eql(3)
123
+ end
124
+ end
125
+ end
126
+
127
+ it "should do proper escaping" do
128
+ cont = "\"Test1\";\"Test2 \\\"Wee\\\"\"\r\n"
129
+ cont << "\"Test3\";\"Test4 \\\"Wee\\\"\";\"Test5 \\\"Wee\\\"\"\r\n"
130
+
131
+ csv = Csv_lazy.new(:col_sep => ";", :io => StringIO.new(cont), :row_sep => "\r\n")
132
+
133
+ row = csv.read_row
134
+ row[0].should eql("Test1")
135
+ row[1].should eql("Test2 \"Wee\"")
136
+ row.length.should eql(2)
137
+
138
+ row = csv.read_row
139
+ row[0].should eql("Test3")
140
+ row[1].should eql("Test4 \"Wee\"")
141
+ row[2].should eql("Test5 \"Wee\"")
142
+ row.length.should eql(3)
143
+
144
+ row = csv.read_row
145
+ row.should eql(false)
146
+ end
112
147
  end
data/spec/test2.csv ADDED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv_lazy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,8 +9,24 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-03-22 00:00:00.000000000 Z
12
+ date: 2013-03-25 00:00:00.000000000 Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: string_utils
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
14
30
  - !ruby/object:Gem::Dependency
15
31
  name: rspec
16
32
  requirement: !ruby/object:Gem::Requirement
@@ -96,6 +112,7 @@ files:
96
112
  - spec/csv_lazy_spec.rb
97
113
  - spec/spec_helper.rb
98
114
  - spec/test1.csv.gz
115
+ - spec/test2.csv
99
116
  homepage: http://github.com/kaspernj/csv_lazy
100
117
  licenses:
101
118
  - MIT
@@ -111,7 +128,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
111
128
  version: '0'
112
129
  segments:
113
130
  - 0
114
- hash: 222799097840594694
131
+ hash: -1390404400491179462
115
132
  required_rubygems_version: !ruby/object:Gem::Requirement
116
133
  none: false
117
134
  requirements: