fastercsv 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +7 -0
- data/INSTALL +23 -0
- data/LICENSE +7 -0
- data/README +57 -0
- data/Rakefile +83 -0
- data/TODO +8 -0
- data/lib/faster_csv.rb +400 -0
- data/setup.rb +1360 -0
- data/test/tc_csv_parsing.rb +121 -0
- data/test/tc_csv_writing.rb +90 -0
- data/test/tc_features.rb +51 -0
- data/test/tc_interface.rb +124 -0
- data/test/tc_speed.rb +39 -0
- data/test/ts_all.rb +14 -0
- metadata +65 -0
data/CHANGELOG
ADDED
data/INSTALL
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
= Installing FasterCSV
|
2
|
+
|
3
|
+
RubyGems is the preferred easy install method for FasterCSV. However, you can
|
4
|
+
install FasterCSV manually as described below.
|
5
|
+
|
6
|
+
== Installing the Gem
|
7
|
+
|
8
|
+
FasterCSV is intended to be installed via the
|
9
|
+
RubyGems[http://rubyforge.org/projects/rubygems/] system. To get the latest
|
10
|
+
version, simply enter the following into your command prompt:
|
11
|
+
|
12
|
+
$ sudo gem install fastercsv
|
13
|
+
|
14
|
+
You must have RubyGems[http://rubyforge.org/projects/rubygems/] installed for
|
15
|
+
the above to work.
|
16
|
+
|
17
|
+
== Installing Manually
|
18
|
+
|
19
|
+
Download the latest version of FasterCSV from the
|
20
|
+
{RubyForge project page}[http://rubyforge.org/frs/?group_id=1102]. Navigate to
|
21
|
+
the root project directory and enter:
|
22
|
+
|
23
|
+
$ sudo ruby setup.rb
|
data/LICENSE
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
= License Terms
|
2
|
+
|
3
|
+
Distributed under the user's choice of the GPL[http://www.gnu.org/copyleft/gpl.html] (see COPYING for details) or the
|
4
|
+
{Ruby software license}[http://www.ruby-lang.org/en/LICENSE.txt] by
|
5
|
+
James Edward Gray II.
|
6
|
+
|
7
|
+
Please email James[mailto:james@grayproductions.net] with any questions.
|
data/README
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
= Read Me
|
2
|
+
|
3
|
+
by James Edward Gray II
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
Welcome to FasterCSV.
|
8
|
+
|
9
|
+
FasterCSV is intended as a replacement to Ruby's standard CSV library. It was designed to address concerns users of that library had and it has three primary goals:
|
10
|
+
|
11
|
+
1. Be significantly faster than CSV while remaining a pure Ruby library.
|
12
|
+
2. Use a smaller and easier to maintain code base.
|
13
|
+
3. Improve on the CSV interface.
|
14
|
+
|
15
|
+
Obviously, the last one is subjective. If you love CSV's interface, odds are
|
16
|
+
good this one won't suit you. I did try to defer to that interface whenever I
|
17
|
+
didn't have a compelling reason to change it though, so hopefully this won't be
|
18
|
+
too radically different.
|
19
|
+
|
20
|
+
== What's Different From CSV?
|
21
|
+
|
22
|
+
I'm sure I'll miss something, but I'll try to mention most of the major differences I am aware of, to help others quickly get up to speed:
|
23
|
+
|
24
|
+
=== CSV Parsing
|
25
|
+
|
26
|
+
* FasterCSV has a stricter parser and will throw MalformedCSVErrors on
|
27
|
+
problematic data.
|
28
|
+
* FasterCSV has a less liberal idea of a line ending than CSV. What you set as
|
29
|
+
the <tt>:row_sep</tt> is law.
|
30
|
+
* CSV returns empty lines as <tt>[nil]</tt>. FasterCSV calls them <tt>[]</tt>.
|
31
|
+
* FasterCSV has a much faster parser.
|
32
|
+
|
33
|
+
=== Interface
|
34
|
+
|
35
|
+
* FasterCSV uses Hash-style parameters to set options.
|
36
|
+
* FasterCSV does not have generate_row() or parse_row() from CSV.
|
37
|
+
* FasterCSV does not have CSV's Reader and Writer classes.
|
38
|
+
* FasterCSV::open() is more like Ruby's open() than CSV::open().
|
39
|
+
* FasterCSV objects support most standard IO methods.
|
40
|
+
* FasterCSV has a new() method used to wrap objects like String and IO for
|
41
|
+
reading and writing.
|
42
|
+
* FasterCSV::generate() is different from CSV::generate().
|
43
|
+
|
44
|
+
If you use this library and find yourself missing any functionality I have trimmed, please {let me know}[mailto:james@grayproductions.net].
|
45
|
+
|
46
|
+
== Documentation
|
47
|
+
|
48
|
+
See FasterCSV for documentation.
|
49
|
+
|
50
|
+
== Installing
|
51
|
+
|
52
|
+
See the INSTALL file for instructions.
|
53
|
+
|
54
|
+
== Questions and/or Comments
|
55
|
+
|
56
|
+
Feel free to email {James Edward Gray II}[mailto:james@grayproductions.net] with
|
57
|
+
any questions.
|
data/Rakefile
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
require "rake/rdoctask"
|
2
|
+
require "rake/testtask"
|
3
|
+
require "rake/gempackagetask"
|
4
|
+
|
5
|
+
require "rubygems"
|
6
|
+
|
7
|
+
task :default => [:test]
|
8
|
+
|
9
|
+
Rake::TestTask.new do |test|
|
10
|
+
test.libs << "test"
|
11
|
+
test.test_files = [ "test/ts_all.rb" ]
|
12
|
+
test.verbose = true
|
13
|
+
end
|
14
|
+
|
15
|
+
Rake::RDocTask.new do |rdoc|
|
16
|
+
rdoc.main = "README"
|
17
|
+
rdoc.rdoc_files.include( "README", "INSTALL",
|
18
|
+
"TODO", "CHANGELOG",
|
19
|
+
"AUTHORS", "COPYING",
|
20
|
+
"LICENSE", "lib/" )
|
21
|
+
rdoc.rdoc_dir = "doc/html"
|
22
|
+
rdoc.title = "FasterCSV Documentation"
|
23
|
+
end
|
24
|
+
|
25
|
+
desc "Upload current documentation to Rubyforge"
|
26
|
+
task :upload_docs => [:rdoc] do
|
27
|
+
sh "scp -r doc/html/* " +
|
28
|
+
"bbazzarrakk@rubyforge.org:/var/www/gforge-projects/fastercsv/"
|
29
|
+
end
|
30
|
+
|
31
|
+
desc "Show library's code statistics"
|
32
|
+
task :stats do
|
33
|
+
require 'code_statistics'
|
34
|
+
CodeStatistics.new( ["FasterCSV", "lib"],
|
35
|
+
["Units", "test"] ).to_s
|
36
|
+
end
|
37
|
+
|
38
|
+
desc "Time FasterCSV and CSV"
|
39
|
+
task :benchmark do
|
40
|
+
path = "test/test_data.csv"
|
41
|
+
sh %Q{time ruby -r csv -e 'CSV.foreach("#{path}") { |row| }'}
|
42
|
+
sh %Q{time ruby -r lib/faster_csv -e 'FasterCSV.foreach("#{path}") { |row| }'}
|
43
|
+
end
|
44
|
+
|
45
|
+
spec = Gem::Specification.new do |spec|
|
46
|
+
spec.name = "fastercsv"
|
47
|
+
spec.version = "0.1.0"
|
48
|
+
spec.platform = Gem::Platform::RUBY
|
49
|
+
spec.summary = "FasterCSV is CSV, but faster, smaller, and cleaner."
|
50
|
+
|
51
|
+
spec.files = Dir.glob("{lib,test}/**/*.rb").
|
52
|
+
reject { |item| item.include?(".svn") } +
|
53
|
+
["Rakefile", "setup.rb"]
|
54
|
+
spec.test_suite_file = "test/ts_all.rb"
|
55
|
+
|
56
|
+
spec.has_rdoc = true
|
57
|
+
spec.extra_rdoc_files = %w{README INSTALL TODO CHANGELOG LICENSE}
|
58
|
+
spec.rdoc_options << "--title" << "FasterCSV Documentation" <<
|
59
|
+
"--main" << "README"
|
60
|
+
|
61
|
+
spec.require_path = "lib"
|
62
|
+
spec.autorequire = "fastercsv"
|
63
|
+
|
64
|
+
spec.author = "James Edward Gray II"
|
65
|
+
spec.email = "james@grayproductions.net"
|
66
|
+
spec.rubyforge_project = "fastercsv"
|
67
|
+
spec.homepage = "http://fastercsv.rubyforge.org"
|
68
|
+
spec.description = <<END_DESC
|
69
|
+
FasterCSV is intended as a complete replacement to the CSV standard library. It
|
70
|
+
is significantly faster and smaller while still being pure Ruby code. It also
|
71
|
+
strives for a better interface.
|
72
|
+
END_DESC
|
73
|
+
end
|
74
|
+
|
75
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
76
|
+
pkg.need_zip = true
|
77
|
+
pkg.need_tar = true
|
78
|
+
end
|
79
|
+
|
80
|
+
desc "Add new files to Subversion"
|
81
|
+
task :add_to_svn do
|
82
|
+
sh %Q{svn status | ruby -nae 'system "svn add \#{$F[1]}" if $F[0] == "?"' }
|
83
|
+
end
|
data/TODO
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
= To Do List
|
2
|
+
|
3
|
+
The following is a list of planned expansions for FasterCSV, in no particular
|
4
|
+
order.
|
5
|
+
|
6
|
+
* Add support for accessing fields by headers (from first row of document).
|
7
|
+
* Add "convertors" for switching numbers to Integers or Floats, dates to Date or
|
8
|
+
Time objects, etc.
|
data/lib/faster_csv.rb
ADDED
@@ -0,0 +1,400 @@
|
|
1
|
+
#!/usr/local/bin/ruby -w
|
2
|
+
|
3
|
+
# = faster_csv.rb -- Faster CSV Reading and Writing
|
4
|
+
#
|
5
|
+
# Created by James Edward Gray II on 2005-10-31.
|
6
|
+
# Copyright 2005 Gray Productions. All rights reserved.
|
7
|
+
#
|
8
|
+
# See FasterCSV for documentation.
|
9
|
+
|
10
|
+
require "stringio"
|
11
|
+
require "forwardable"
|
12
|
+
|
13
|
+
#
|
14
|
+
# This class provides a complete interface to CSV files and data. It offers
|
15
|
+
# tools to enable you to read and write to and from Strings or IO objects, as
|
16
|
+
# needed.
|
17
|
+
#
|
18
|
+
# == Reading
|
19
|
+
#
|
20
|
+
# === From a File
|
21
|
+
#
|
22
|
+
# ==== A Line at a Time
|
23
|
+
#
|
24
|
+
# FasterCSV.foreach("path/to/file.csv") do |row|
|
25
|
+
# # use row here...
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# ==== All at Once
|
29
|
+
#
|
30
|
+
# arr_of_arrs = FasterCSV.read("path/to/file.csv")
|
31
|
+
#
|
32
|
+
# === From a String
|
33
|
+
#
|
34
|
+
# ==== A Line at a Time
|
35
|
+
#
|
36
|
+
# FasterCSV.parse("CSV,data,String") do |row|
|
37
|
+
# # use row here...
|
38
|
+
# end
|
39
|
+
#
|
40
|
+
# ==== All at Once
|
41
|
+
#
|
42
|
+
# arr_of_arrs = FasterCSV.parse("CSV,data,String")
|
43
|
+
#
|
44
|
+
# == Writing
|
45
|
+
#
|
46
|
+
# === To a File
|
47
|
+
#
|
48
|
+
# FasterCSV.open("path/to/file.csv", "w") do |csv|
|
49
|
+
# csv << ["row", "of", "CSV", "data"]
|
50
|
+
# csv << ["another", "row"]
|
51
|
+
# # ...
|
52
|
+
# end
|
53
|
+
#
|
54
|
+
# === To a String
|
55
|
+
#
|
56
|
+
# csv_string = FasterCSV.generate do |csv|
|
57
|
+
# csv << ["row", "of", "CSV", "data"]
|
58
|
+
# csv << ["another", "row"]
|
59
|
+
# # ...
|
60
|
+
# end
|
61
|
+
#
|
62
|
+
# == Convert a Single Line
|
63
|
+
#
|
64
|
+
# csv_string = generate_line(["row", "of", "CSV", "data"]) # to CSV
|
65
|
+
# csv_array = parse_line("CSV,data,String") # from CSV
|
66
|
+
#
|
67
|
+
class FasterCSV
|
68
|
+
# The error thrown when the parser encounters illegal CSV formatting.
|
69
|
+
class MalformedCSVError < RuntimeError; end
|
70
|
+
|
71
|
+
#
|
72
|
+
# The options used when no overrides are given by calling code. They are:
|
73
|
+
#
|
74
|
+
# <b><tt>:col_sep</tt></b>:: <tt>","</tt>
|
75
|
+
# <b><tt>:row_sep</tt></b>:: <tt>$/</tt>
|
76
|
+
#
|
77
|
+
DEFAULT_OPTIONS = {:col_sep => ",", :row_sep => $/}
|
78
|
+
|
79
|
+
#
|
80
|
+
# This method is intended as the primary interface for reading CSV files. You
|
81
|
+
# pass a +path+ and any +options+ you wish to set for the read. Each row of
|
82
|
+
# file will be passed to the provided +block+ in turn.
|
83
|
+
#
|
84
|
+
# The +options+ parameter can be anthing FasterCSV::new() understands.
|
85
|
+
#
|
86
|
+
def self.foreach( path, options = Hash.new, &block )
|
87
|
+
open(path, options) do |csv|
|
88
|
+
csv.each(&block)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
#
|
93
|
+
# This method wraps a String in a FasterCSV object which is passed to the
|
94
|
+
# provided block. You can use the block to append CSV rows to the String and
|
95
|
+
# when the block exits, the final String will be returned.
|
96
|
+
#
|
97
|
+
# The +options+ parameter can be anthing FasterCSV::new() understands.
|
98
|
+
#
|
99
|
+
def self.generate( options = Hash.new )
|
100
|
+
faster_csv = new("", options)
|
101
|
+
yield faster_csv
|
102
|
+
faster_csv.string
|
103
|
+
end
|
104
|
+
|
105
|
+
#
|
106
|
+
# This method is a shortcut for converting a single row (Array) into a CSV
|
107
|
+
# String.
|
108
|
+
#
|
109
|
+
# The +options+ parameter can be anthing FasterCSV::new() understands.
|
110
|
+
#
|
111
|
+
def self.generate_line( row, options = Hash.new )
|
112
|
+
(new("", options) << row).string
|
113
|
+
end
|
114
|
+
|
115
|
+
#
|
116
|
+
# :call-seq:
|
117
|
+
# open( *args, options = Hash.new ) { |faster_csv| ... }
|
118
|
+
# open( *args, options = Hash.new )
|
119
|
+
#
|
120
|
+
# This method opens an IO object, and wraps that with FasterCSV. This is
|
121
|
+
# intended as the primary interface for writing a CSV file.
|
122
|
+
#
|
123
|
+
# You may pass any +args+ Ruby's open() understands followed by an optional
|
124
|
+
# Hash containing any +options+ FasterCSV::new() understands.
|
125
|
+
#
|
126
|
+
# This method works like Ruby's open() call, in that it will pass a FasterCSV
|
127
|
+
# object to a provided block and close it when the block termminates, or it
|
128
|
+
# will return the FasterCSV object when no block is provided. (*Note*: This
|
129
|
+
# is different from the standard CSV library which passes rows to the block.
|
130
|
+
# Use FasterCSV::foreach() for that behavior.)
|
131
|
+
#
|
132
|
+
# An opened FasterCSV object will delegate to many IO methods, for
|
133
|
+
# convenience. You may call:
|
134
|
+
#
|
135
|
+
# * binmode()
|
136
|
+
# * close()
|
137
|
+
# * close_read()
|
138
|
+
# * close_write()
|
139
|
+
# * closed?()
|
140
|
+
# * eof()
|
141
|
+
# * eof?()
|
142
|
+
# * fcntl()
|
143
|
+
# * fileno()
|
144
|
+
# * flush()
|
145
|
+
# * fsync()
|
146
|
+
# * ioctl()
|
147
|
+
# * isatty()
|
148
|
+
# * lineno()
|
149
|
+
# * pid()
|
150
|
+
# * pos()
|
151
|
+
# * reopen()
|
152
|
+
# * rewind()
|
153
|
+
# * seek()
|
154
|
+
# * stat()
|
155
|
+
# * sync()
|
156
|
+
# * sync=()
|
157
|
+
# * tell()
|
158
|
+
# * to_i()
|
159
|
+
# * to_io()
|
160
|
+
# * tty?()
|
161
|
+
#
|
162
|
+
def self.open( *args )
|
163
|
+
# find the +options+ Hash
|
164
|
+
options = if args.last.is_a? Hash then args.pop else Hash.new end
|
165
|
+
# wrap a File opened with the remaining +args+
|
166
|
+
csv = new(File.open(*args), options)
|
167
|
+
|
168
|
+
# handle blocks like Ruby's open(), not like the CSV library
|
169
|
+
if block_given?
|
170
|
+
begin
|
171
|
+
yield csv
|
172
|
+
ensure
|
173
|
+
csv.close
|
174
|
+
end
|
175
|
+
else
|
176
|
+
csv
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
#
|
181
|
+
# :call-seq:
|
182
|
+
# parse( str, options ) { |row| ... }
|
183
|
+
# parse( str, options )
|
184
|
+
#
|
185
|
+
# This method can be used to easily parse CSV out of a String. You may either
|
186
|
+
# provide a +block+ which will be called with each row of the String in turn,
|
187
|
+
# or just use the returned Array of Arrays (when no +block+ is given).
|
188
|
+
#
|
189
|
+
# You pass your +str+ to read from, and an optional +options+ Hash containing
|
190
|
+
# anything FasterCSV::new() understands.
|
191
|
+
#
|
192
|
+
def self.parse( *args, &block )
|
193
|
+
csv = new(*args)
|
194
|
+
if block.nil? # slurp contents, if no block is given
|
195
|
+
begin
|
196
|
+
csv.read
|
197
|
+
ensure
|
198
|
+
csv.close
|
199
|
+
end
|
200
|
+
else # or pass each row to a provided block
|
201
|
+
csv.each(&block)
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
#
|
206
|
+
# Use to slurp a CSV file into an Array of Arrays. Pass the +path+ to the
|
207
|
+
# file and any +options+ FasterCSV::new() understands.
|
208
|
+
#
|
209
|
+
def self.read( path, options = Hash.new )
|
210
|
+
open(path, options) { |csv| csv.read }
|
211
|
+
end
|
212
|
+
|
213
|
+
# Alias for FasterCSV::read().
|
214
|
+
def self.readlines( path, options = Hash.new )
|
215
|
+
open(path, options) { |csv| csv.readlines }
|
216
|
+
end
|
217
|
+
|
218
|
+
#
|
219
|
+
# This method is a shortcut for converting a single line of a CSV String into
|
220
|
+
# a into an Array. Note that if +line+ contains multiple rows, anything
|
221
|
+
# beyond the first row is ignored.
|
222
|
+
#
|
223
|
+
# The +options+ parameter can be anthing FasterCSV::new() understands.
|
224
|
+
#
|
225
|
+
def self.parse_line( line, options = Hash.new )
|
226
|
+
new(line, options).shift
|
227
|
+
end
|
228
|
+
|
229
|
+
#
|
230
|
+
# This constructor will wrap either a String or IO object passed in +data+ for
|
231
|
+
# reading and/or writing. In addition to the FasterCSV instance methods,
|
232
|
+
# several IO methods are delegated. (See FasterCSV::open() for a complete
|
233
|
+
# list.) If you pass a String for +data+, you can later retrieve it (after
|
234
|
+
# writing to it, for example) with FasterCSV.string().
|
235
|
+
#
|
236
|
+
# You may set any reading and/or writing preferences in the +options+ Hash.
|
237
|
+
# Available options are:
|
238
|
+
#
|
239
|
+
# <b><tt>:col_sep</tt></b>:: The String placed between each field.
|
240
|
+
# <b><tt>:row_sep</tt></b>:: The String appended to the end of each row.
|
241
|
+
#
|
242
|
+
# See FasterCSV::DEFAULT_OPTIONS for the default settings.
|
243
|
+
#
|
244
|
+
# Options cannot be overriden in the instance methods for performance reasons,
|
245
|
+
# so be sure to set what you want here.
|
246
|
+
#
|
247
|
+
def initialize( data, options = Hash.new )
|
248
|
+
# build the options for this read/write
|
249
|
+
options = DEFAULT_OPTIONS.merge(options)
|
250
|
+
|
251
|
+
# create the IO object we will read from
|
252
|
+
@io = if data.is_a? String then StringIO.new(data) else data end
|
253
|
+
|
254
|
+
# store the selected separators
|
255
|
+
@col_sep = options[:col_sep]
|
256
|
+
@row_sep = options[:row_sep]
|
257
|
+
|
258
|
+
# prebuild Regexps for faster parsing
|
259
|
+
@parsers = [ /\A#{@col_sep}+/, # for empty leading fields
|
260
|
+
### The Primary Parser ###
|
261
|
+
/ \G(?:^|#{Regexp.escape(@col_sep)}) # anchor the match
|
262
|
+
(?: "((?>[^"]*)(?>""[^"]*)*)" # find quoted fields
|
263
|
+
| # ... or ...
|
264
|
+
([^"#{Regexp.escape(@col_sep)}]*) # unquoted fields
|
265
|
+
)/x,
|
266
|
+
### End Primary Parser ###
|
267
|
+
/#{@row_sep}\Z/ ] # safer than chomp!()
|
268
|
+
end
|
269
|
+
|
270
|
+
### IO and StringIO Delegation ###
|
271
|
+
|
272
|
+
extend Forwardable
|
273
|
+
def_delegators :@io, :binmode, :close, :close_read, :close_write, :closed?,
|
274
|
+
:eof, :eof?, :fcntl, :fileno, :flush, :fsync, :ioctl,
|
275
|
+
:isatty, :lineno, :pid, :pos, :reopen, :rewind, :seek,
|
276
|
+
:stat, :string, :sync, :sync=, :tell, :to_i, :to_io,
|
277
|
+
:tty?
|
278
|
+
|
279
|
+
### End Delegation ###
|
280
|
+
|
281
|
+
#
|
282
|
+
# The primary write method for wrapped Strings and IOs, +row+ (an Array) is
|
283
|
+
# converted to CSV and appended to the data source.
|
284
|
+
#
|
285
|
+
# The data source must be open for writing.
|
286
|
+
#
|
287
|
+
def <<( row )
|
288
|
+
@io << row.map do |field|
|
289
|
+
if field.nil? # reverse +nil+ fields as empty unquoted fields
|
290
|
+
""
|
291
|
+
else
|
292
|
+
field = String(field) # Stringify fields
|
293
|
+
# reverse empty fields as empty quoted fields
|
294
|
+
if field.empty? or field.count(%Q{\r\n#{@col_sep}"}).nonzero?
|
295
|
+
%Q{"#{field.gsub('"', '""')}"} # escape quoted fields
|
296
|
+
else
|
297
|
+
field # unquoted field
|
298
|
+
end
|
299
|
+
end
|
300
|
+
end.join(@col_sep) + @row_sep # add separators
|
301
|
+
|
302
|
+
self # for chaining
|
303
|
+
end
|
304
|
+
alias_method :add_row, :<<
|
305
|
+
alias_method :puts, :<<
|
306
|
+
|
307
|
+
include Enumerable
|
308
|
+
|
309
|
+
#
|
310
|
+
# Yields each row of the data source in turn.
|
311
|
+
#
|
312
|
+
# Support for Enumerable.
|
313
|
+
#
|
314
|
+
# The data source must be open for reading.
|
315
|
+
#
|
316
|
+
def each
|
317
|
+
while row = shift
|
318
|
+
yield row
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
#
|
323
|
+
# Slurps the remaining rows and returns an Array of Arrays.
|
324
|
+
#
|
325
|
+
# The data source must be open for reading.
|
326
|
+
#
|
327
|
+
def read
|
328
|
+
to_a
|
329
|
+
end
|
330
|
+
alias_method :readlines, :read
|
331
|
+
|
332
|
+
#
|
333
|
+
# The primary read method for wrapped Strings and IOs, a single row is pulled
|
334
|
+
# from the data source, parsed and returned as an Array of fields.
|
335
|
+
#
|
336
|
+
# The data source must be open for reading.
|
337
|
+
#
|
338
|
+
def shift
|
339
|
+
# begin with a blank line, so we can always add to it
|
340
|
+
line = ""
|
341
|
+
|
342
|
+
#
|
343
|
+
# it can take multiple calls to <tt>@io.gets()</tt> to get a full line,
|
344
|
+
# because of \r and/or \n characters embedded in quoted fields
|
345
|
+
#
|
346
|
+
loop do
|
347
|
+
# add another read to the line
|
348
|
+
line += @io.gets(@row_sep) rescue return nil
|
349
|
+
# copy the line so we can chop it up in parsing
|
350
|
+
parse = line.dup
|
351
|
+
parse.sub!(@parsers[2], "")
|
352
|
+
|
353
|
+
#
|
354
|
+
# I believe a blank line should be an <tt>Array.new</tt>, not
|
355
|
+
# CSV's <tt>[nil]</tt>
|
356
|
+
#
|
357
|
+
return Array.new if parse.empty?
|
358
|
+
|
359
|
+
#
|
360
|
+
# shave leading empty fields if needed, because the main parser chokes
|
361
|
+
# on these
|
362
|
+
#
|
363
|
+
csv = if parse.sub!(@parsers[0], "")
|
364
|
+
[nil] * $&.length
|
365
|
+
else
|
366
|
+
Array.new
|
367
|
+
end
|
368
|
+
#
|
369
|
+
# then parse the main fields with a hyper-tuned Regexp from
|
370
|
+
# Mastering Regular Expressions, Second Edition
|
371
|
+
#
|
372
|
+
parse.gsub!(@parsers[1]) do
|
373
|
+
csv << if $1.nil? # we found an unquoted field
|
374
|
+
if $2.empty? # switch empty unquoted fields to +nil+...
|
375
|
+
nil # for CSV compatibility
|
376
|
+
else
|
377
|
+
# I decided to take a strict approach to CSV parsing...
|
378
|
+
if $2.count("\r\n").zero? # verify correctness of field...
|
379
|
+
$2
|
380
|
+
else
|
381
|
+
# or throw an Exception
|
382
|
+
raise MalformedCSVError, 'Unquoted fields do not allow \r or \n.'
|
383
|
+
end
|
384
|
+
end
|
385
|
+
else # we found a quoted field...
|
386
|
+
$1.gsub('""', '"') # unescape contents
|
387
|
+
end
|
388
|
+
"" # gsub!'s replacement, clear the field
|
389
|
+
end
|
390
|
+
|
391
|
+
# if parse is empty?(), we found all the fields on the line...
|
392
|
+
break csv if parse.empty?
|
393
|
+
# if we're not empty?() but at eof?(), a quoted field wasn't closed...
|
394
|
+
raise MalformedCSVError, "Unclosed quoted field." if @io.eof?
|
395
|
+
# otherwise, we need to loop and pull some more data to complete the row
|
396
|
+
end
|
397
|
+
end
|
398
|
+
alias_method :gets, :shift
|
399
|
+
alias_method :readline, :shift
|
400
|
+
end
|