fastercsv 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +7 -0
- data/INSTALL +23 -0
- data/LICENSE +7 -0
- data/README +57 -0
- data/Rakefile +83 -0
- data/TODO +8 -0
- data/lib/faster_csv.rb +400 -0
- data/setup.rb +1360 -0
- data/test/tc_csv_parsing.rb +121 -0
- data/test/tc_csv_writing.rb +90 -0
- data/test/tc_features.rb +51 -0
- data/test/tc_interface.rb +124 -0
- data/test/tc_speed.rb +39 -0
- data/test/ts_all.rb +14 -0
- metadata +65 -0
data/CHANGELOG
ADDED
data/INSTALL
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
= Installing FasterCSV
|
2
|
+
|
3
|
+
RubyGems is the preferred easy install method for FasterCSV. However, you can
|
4
|
+
install FasterCSV manually as described below.
|
5
|
+
|
6
|
+
== Installing the Gem
|
7
|
+
|
8
|
+
FasterCSV is intended to be installed via the
|
9
|
+
RubyGems[http://rubyforge.org/projects/rubygems/] system. To get the latest
|
10
|
+
version, simply enter the following into your command prompt:
|
11
|
+
|
12
|
+
$ sudo gem install fastercsv
|
13
|
+
|
14
|
+
You must have RubyGems[http://rubyforge.org/projects/rubygems/] installed for
|
15
|
+
the above to work.
|
16
|
+
|
17
|
+
== Installing Manually
|
18
|
+
|
19
|
+
Download the latest version of FasterCSV from the
|
20
|
+
{RubyForge project page}[http://rubyforge.org/frs/?group_id=1102]. Navigate to
|
21
|
+
the root project directory and enter:
|
22
|
+
|
23
|
+
$ sudo ruby setup.rb
|
data/LICENSE
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
= License Terms
|
2
|
+
|
3
|
+
Distributed under the user's choice of the GPL[http://www.gnu.org/copyleft/gpl.html] (see COPYING for details) or the
|
4
|
+
{Ruby software license}[http://www.ruby-lang.org/en/LICENSE.txt] by
|
5
|
+
James Edward Gray II.
|
6
|
+
|
7
|
+
Please email James[mailto:james@grayproductions.net] with any questions.
|
data/README
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
= Read Me
|
2
|
+
|
3
|
+
by James Edward Gray II
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
Welcome to FasterCSV.
|
8
|
+
|
9
|
+
FasterCSV is intended as a replacement to Ruby's standard CSV library. It was designed to address concerns users of that library had and it has three primary goals:
|
10
|
+
|
11
|
+
1. Be significantly faster than CSV while remaining a pure Ruby library.
|
12
|
+
2. Use a smaller and easier to maintain code base.
|
13
|
+
3. Improve on the CSV interface.
|
14
|
+
|
15
|
+
Obviously, the last one is subjective. If you love CSV's interface, odds are
|
16
|
+
good this one won't suit you. I did try to defer to that interface whenever I
|
17
|
+
didn't have a compelling reason to change it though, so hopefully this won't be
|
18
|
+
too radically different.
|
19
|
+
|
20
|
+
== What's Different From CSV?
|
21
|
+
|
22
|
+
I'm sure I'll miss something, but I'll try to mention most of the major differences I am aware of, to help others quickly get up to speed:
|
23
|
+
|
24
|
+
=== CSV Parsing
|
25
|
+
|
26
|
+
* FasterCSV has a stricter parser and will throw MalformedCSVErrors on
|
27
|
+
problematic data.
|
28
|
+
* FasterCSV has a less liberal idea of a line ending than CSV. What you set as
|
29
|
+
the <tt>:row_sep</tt> is law.
|
30
|
+
* CSV returns empty lines as <tt>[nil]</tt>. FasterCSV calls them <tt>[]</tt>.
|
31
|
+
* FasterCSV has a much faster parser.
|
32
|
+
|
33
|
+
=== Interface
|
34
|
+
|
35
|
+
* FasterCSV uses Hash-style parameters to set options.
|
36
|
+
* FasterCSV does not have generate_row() or parse_row() from CSV.
|
37
|
+
* FasterCSV does not have CSV's Reader and Writer classes.
|
38
|
+
* FasterCSV::open() is more like Ruby's open() than CSV::open().
|
39
|
+
* FasterCSV objects support most standard IO methods.
|
40
|
+
* FasterCSV has a new() method used to wrap objects like String and IO for
|
41
|
+
reading and writing.
|
42
|
+
* FasterCSV::generate() is different from CSV::generate().
|
43
|
+
|
44
|
+
If you use this library and find yourself missing any functionality I have trimmed, please {let me know}[mailto:james@grayproductions.net].
|
45
|
+
|
46
|
+
== Documentation
|
47
|
+
|
48
|
+
See FasterCSV for documentation.
|
49
|
+
|
50
|
+
== Installing
|
51
|
+
|
52
|
+
See the INSTALL file for instructions.
|
53
|
+
|
54
|
+
== Questions and/or Comments
|
55
|
+
|
56
|
+
Feel free to email {James Edward Gray II}[mailto:james@grayproductions.net] with
|
57
|
+
any questions.
|
data/Rakefile
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
require "rake/rdoctask"
|
2
|
+
require "rake/testtask"
|
3
|
+
require "rake/gempackagetask"
|
4
|
+
|
5
|
+
require "rubygems"
|
6
|
+
|
7
|
+
task :default => [:test]
|
8
|
+
|
9
|
+
Rake::TestTask.new do |test|
|
10
|
+
test.libs << "test"
|
11
|
+
test.test_files = [ "test/ts_all.rb" ]
|
12
|
+
test.verbose = true
|
13
|
+
end
|
14
|
+
|
15
|
+
Rake::RDocTask.new do |rdoc|
|
16
|
+
rdoc.main = "README"
|
17
|
+
rdoc.rdoc_files.include( "README", "INSTALL",
|
18
|
+
"TODO", "CHANGELOG",
|
19
|
+
"AUTHORS", "COPYING",
|
20
|
+
"LICENSE", "lib/" )
|
21
|
+
rdoc.rdoc_dir = "doc/html"
|
22
|
+
rdoc.title = "FasterCSV Documentation"
|
23
|
+
end
|
24
|
+
|
25
|
+
desc "Upload current documentation to Rubyforge"
|
26
|
+
task :upload_docs => [:rdoc] do
|
27
|
+
sh "scp -r doc/html/* " +
|
28
|
+
"bbazzarrakk@rubyforge.org:/var/www/gforge-projects/fastercsv/"
|
29
|
+
end
|
30
|
+
|
31
|
+
desc "Show library's code statistics"
|
32
|
+
task :stats do
|
33
|
+
require 'code_statistics'
|
34
|
+
CodeStatistics.new( ["FasterCSV", "lib"],
|
35
|
+
["Units", "test"] ).to_s
|
36
|
+
end
|
37
|
+
|
38
|
+
desc "Time FasterCSV and CSV"
|
39
|
+
task :benchmark do
|
40
|
+
path = "test/test_data.csv"
|
41
|
+
sh %Q{time ruby -r csv -e 'CSV.foreach("#{path}") { |row| }'}
|
42
|
+
sh %Q{time ruby -r lib/faster_csv -e 'FasterCSV.foreach("#{path}") { |row| }'}
|
43
|
+
end
|
44
|
+
|
45
|
+
spec = Gem::Specification.new do |spec|
|
46
|
+
spec.name = "fastercsv"
|
47
|
+
spec.version = "0.1.0"
|
48
|
+
spec.platform = Gem::Platform::RUBY
|
49
|
+
spec.summary = "FasterCSV is CSV, but faster, smaller, and cleaner."
|
50
|
+
|
51
|
+
spec.files = Dir.glob("{lib,test}/**/*.rb").
|
52
|
+
reject { |item| item.include?(".svn") } +
|
53
|
+
["Rakefile", "setup.rb"]
|
54
|
+
spec.test_suite_file = "test/ts_all.rb"
|
55
|
+
|
56
|
+
spec.has_rdoc = true
|
57
|
+
spec.extra_rdoc_files = %w{README INSTALL TODO CHANGELOG LICENSE}
|
58
|
+
spec.rdoc_options << "--title" << "FasterCSV Documentation" <<
|
59
|
+
"--main" << "README"
|
60
|
+
|
61
|
+
spec.require_path = "lib"
|
62
|
+
spec.autorequire = "fastercsv"
|
63
|
+
|
64
|
+
spec.author = "James Edward Gray II"
|
65
|
+
spec.email = "james@grayproductions.net"
|
66
|
+
spec.rubyforge_project = "fastercsv"
|
67
|
+
spec.homepage = "http://fastercsv.rubyforge.org"
|
68
|
+
spec.description = <<END_DESC
|
69
|
+
FasterCSV is intended as a complete replacement to the CSV standard library. It
|
70
|
+
is significantly faster and smaller while still being pure Ruby code. It also
|
71
|
+
strives for a better interface.
|
72
|
+
END_DESC
|
73
|
+
end
|
74
|
+
|
75
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
76
|
+
pkg.need_zip = true
|
77
|
+
pkg.need_tar = true
|
78
|
+
end
|
79
|
+
|
80
|
+
desc "Add new files to Subversion"
|
81
|
+
task :add_to_svn do
|
82
|
+
sh %Q{svn status | ruby -nae 'system "svn add \#{$F[1]}" if $F[0] == "?"' }
|
83
|
+
end
|
data/TODO
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
= To Do List
|
2
|
+
|
3
|
+
The following is a list of planned expansions for FasterCSV, in no particular
|
4
|
+
order.
|
5
|
+
|
6
|
+
* Add support for accessing fields by headers (from first row of document).
|
7
|
+
* Add "convertors" for switching numbers to Integers or Floats, dates to Date or
|
8
|
+
Time objects, etc.
|
data/lib/faster_csv.rb
ADDED
@@ -0,0 +1,400 @@
|
|
1
|
+
#!/usr/local/bin/ruby -w
|
2
|
+
|
3
|
+
# = faster_csv.rb -- Faster CSV Reading and Writing
|
4
|
+
#
|
5
|
+
# Created by James Edward Gray II on 2005-10-31.
|
6
|
+
# Copyright 2005 Gray Productions. All rights reserved.
|
7
|
+
#
|
8
|
+
# See FasterCSV for documentation.
|
9
|
+
|
10
|
+
require "stringio"
|
11
|
+
require "forwardable"
|
12
|
+
|
13
|
+
#
|
14
|
+
# This class provides a complete interface to CSV files and data. It offers
|
15
|
+
# tools to enable you to read and write to and from Strings or IO objects, as
|
16
|
+
# needed.
|
17
|
+
#
|
18
|
+
# == Reading
|
19
|
+
#
|
20
|
+
# === From a File
|
21
|
+
#
|
22
|
+
# ==== A Line at a Time
|
23
|
+
#
|
24
|
+
# FasterCSV.foreach("path/to/file.csv") do |row|
|
25
|
+
# # use row here...
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# ==== All at Once
|
29
|
+
#
|
30
|
+
# arr_of_arrs = FasterCSV.read("path/to/file.csv")
|
31
|
+
#
|
32
|
+
# === From a String
|
33
|
+
#
|
34
|
+
# ==== A Line at a Time
|
35
|
+
#
|
36
|
+
# FasterCSV.parse("CSV,data,String") do |row|
|
37
|
+
# # use row here...
|
38
|
+
# end
|
39
|
+
#
|
40
|
+
# ==== All at Once
|
41
|
+
#
|
42
|
+
# arr_of_arrs = FasterCSV.parse("CSV,data,String")
|
43
|
+
#
|
44
|
+
# == Writing
|
45
|
+
#
|
46
|
+
# === To a File
|
47
|
+
#
|
48
|
+
# FasterCSV.open("path/to/file.csv", "w") do |csv|
|
49
|
+
# csv << ["row", "of", "CSV", "data"]
|
50
|
+
# csv << ["another", "row"]
|
51
|
+
# # ...
|
52
|
+
# end
|
53
|
+
#
|
54
|
+
# === To a String
|
55
|
+
#
|
56
|
+
# csv_string = FasterCSV.generate do |csv|
|
57
|
+
# csv << ["row", "of", "CSV", "data"]
|
58
|
+
# csv << ["another", "row"]
|
59
|
+
# # ...
|
60
|
+
# end
|
61
|
+
#
|
62
|
+
# == Convert a Single Line
|
63
|
+
#
|
64
|
+
# csv_string = generate_line(["row", "of", "CSV", "data"]) # to CSV
|
65
|
+
# csv_array = parse_line("CSV,data,String") # from CSV
|
66
|
+
#
|
67
|
+
class FasterCSV
|
68
|
+
# The error thrown when the parser encounters illegal CSV formatting.
|
69
|
+
class MalformedCSVError < RuntimeError; end
|
70
|
+
|
71
|
+
#
|
72
|
+
# The options used when no overrides are given by calling code. They are:
|
73
|
+
#
|
74
|
+
# <b><tt>:col_sep</tt></b>:: <tt>","</tt>
|
75
|
+
# <b><tt>:row_sep</tt></b>:: <tt>$/</tt>
|
76
|
+
#
|
77
|
+
DEFAULT_OPTIONS = {:col_sep => ",", :row_sep => $/}
|
78
|
+
|
79
|
+
#
|
80
|
+
# This method is intended as the primary interface for reading CSV files. You
|
81
|
+
# pass a +path+ and any +options+ you wish to set for the read. Each row of
|
82
|
+
# file will be passed to the provided +block+ in turn.
|
83
|
+
#
|
84
|
+
# The +options+ parameter can be anthing FasterCSV::new() understands.
|
85
|
+
#
|
86
|
+
def self.foreach( path, options = Hash.new, &block )
|
87
|
+
open(path, options) do |csv|
|
88
|
+
csv.each(&block)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
#
|
93
|
+
# This method wraps a String in a FasterCSV object which is passed to the
|
94
|
+
# provided block. You can use the block to append CSV rows to the String and
|
95
|
+
# when the block exits, the final String will be returned.
|
96
|
+
#
|
97
|
+
# The +options+ parameter can be anthing FasterCSV::new() understands.
|
98
|
+
#
|
99
|
+
def self.generate( options = Hash.new )
|
100
|
+
faster_csv = new("", options)
|
101
|
+
yield faster_csv
|
102
|
+
faster_csv.string
|
103
|
+
end
|
104
|
+
|
105
|
+
#
|
106
|
+
# This method is a shortcut for converting a single row (Array) into a CSV
|
107
|
+
# String.
|
108
|
+
#
|
109
|
+
# The +options+ parameter can be anthing FasterCSV::new() understands.
|
110
|
+
#
|
111
|
+
def self.generate_line( row, options = Hash.new )
|
112
|
+
(new("", options) << row).string
|
113
|
+
end
|
114
|
+
|
115
|
+
#
|
116
|
+
# :call-seq:
|
117
|
+
# open( *args, options = Hash.new ) { |faster_csv| ... }
|
118
|
+
# open( *args, options = Hash.new )
|
119
|
+
#
|
120
|
+
# This method opens an IO object, and wraps that with FasterCSV. This is
|
121
|
+
# intended as the primary interface for writing a CSV file.
|
122
|
+
#
|
123
|
+
# You may pass any +args+ Ruby's open() understands followed by an optional
|
124
|
+
# Hash containing any +options+ FasterCSV::new() understands.
|
125
|
+
#
|
126
|
+
# This method works like Ruby's open() call, in that it will pass a FasterCSV
|
127
|
+
# object to a provided block and close it when the block termminates, or it
|
128
|
+
# will return the FasterCSV object when no block is provided. (*Note*: This
|
129
|
+
# is different from the standard CSV library which passes rows to the block.
|
130
|
+
# Use FasterCSV::foreach() for that behavior.)
|
131
|
+
#
|
132
|
+
# An opened FasterCSV object will delegate to many IO methods, for
|
133
|
+
# convenience. You may call:
|
134
|
+
#
|
135
|
+
# * binmode()
|
136
|
+
# * close()
|
137
|
+
# * close_read()
|
138
|
+
# * close_write()
|
139
|
+
# * closed?()
|
140
|
+
# * eof()
|
141
|
+
# * eof?()
|
142
|
+
# * fcntl()
|
143
|
+
# * fileno()
|
144
|
+
# * flush()
|
145
|
+
# * fsync()
|
146
|
+
# * ioctl()
|
147
|
+
# * isatty()
|
148
|
+
# * lineno()
|
149
|
+
# * pid()
|
150
|
+
# * pos()
|
151
|
+
# * reopen()
|
152
|
+
# * rewind()
|
153
|
+
# * seek()
|
154
|
+
# * stat()
|
155
|
+
# * sync()
|
156
|
+
# * sync=()
|
157
|
+
# * tell()
|
158
|
+
# * to_i()
|
159
|
+
# * to_io()
|
160
|
+
# * tty?()
|
161
|
+
#
|
162
|
+
def self.open( *args )
|
163
|
+
# find the +options+ Hash
|
164
|
+
options = if args.last.is_a? Hash then args.pop else Hash.new end
|
165
|
+
# wrap a File opened with the remaining +args+
|
166
|
+
csv = new(File.open(*args), options)
|
167
|
+
|
168
|
+
# handle blocks like Ruby's open(), not like the CSV library
|
169
|
+
if block_given?
|
170
|
+
begin
|
171
|
+
yield csv
|
172
|
+
ensure
|
173
|
+
csv.close
|
174
|
+
end
|
175
|
+
else
|
176
|
+
csv
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
#
|
181
|
+
# :call-seq:
|
182
|
+
# parse( str, options ) { |row| ... }
|
183
|
+
# parse( str, options )
|
184
|
+
#
|
185
|
+
# This method can be used to easily parse CSV out of a String. You may either
|
186
|
+
# provide a +block+ which will be called with each row of the String in turn,
|
187
|
+
# or just use the returned Array of Arrays (when no +block+ is given).
|
188
|
+
#
|
189
|
+
# You pass your +str+ to read from, and an optional +options+ Hash containing
|
190
|
+
# anything FasterCSV::new() understands.
|
191
|
+
#
|
192
|
+
def self.parse( *args, &block )
|
193
|
+
csv = new(*args)
|
194
|
+
if block.nil? # slurp contents, if no block is given
|
195
|
+
begin
|
196
|
+
csv.read
|
197
|
+
ensure
|
198
|
+
csv.close
|
199
|
+
end
|
200
|
+
else # or pass each row to a provided block
|
201
|
+
csv.each(&block)
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
#
|
206
|
+
# Use to slurp a CSV file into an Array of Arrays. Pass the +path+ to the
|
207
|
+
# file and any +options+ FasterCSV::new() understands.
|
208
|
+
#
|
209
|
+
def self.read( path, options = Hash.new )
|
210
|
+
open(path, options) { |csv| csv.read }
|
211
|
+
end
|
212
|
+
|
213
|
+
# Alias for FasterCSV::read().
|
214
|
+
def self.readlines( path, options = Hash.new )
|
215
|
+
open(path, options) { |csv| csv.readlines }
|
216
|
+
end
|
217
|
+
|
218
|
+
#
|
219
|
+
# This method is a shortcut for converting a single line of a CSV String into
|
220
|
+
# a into an Array. Note that if +line+ contains multiple rows, anything
|
221
|
+
# beyond the first row is ignored.
|
222
|
+
#
|
223
|
+
# The +options+ parameter can be anthing FasterCSV::new() understands.
|
224
|
+
#
|
225
|
+
def self.parse_line( line, options = Hash.new )
|
226
|
+
new(line, options).shift
|
227
|
+
end
|
228
|
+
|
229
|
+
#
|
230
|
+
# This constructor will wrap either a String or IO object passed in +data+ for
|
231
|
+
# reading and/or writing. In addition to the FasterCSV instance methods,
|
232
|
+
# several IO methods are delegated. (See FasterCSV::open() for a complete
|
233
|
+
# list.) If you pass a String for +data+, you can later retrieve it (after
|
234
|
+
# writing to it, for example) with FasterCSV.string().
|
235
|
+
#
|
236
|
+
# You may set any reading and/or writing preferences in the +options+ Hash.
|
237
|
+
# Available options are:
|
238
|
+
#
|
239
|
+
# <b><tt>:col_sep</tt></b>:: The String placed between each field.
|
240
|
+
# <b><tt>:row_sep</tt></b>:: The String appended to the end of each row.
|
241
|
+
#
|
242
|
+
# See FasterCSV::DEFAULT_OPTIONS for the default settings.
|
243
|
+
#
|
244
|
+
# Options cannot be overriden in the instance methods for performance reasons,
|
245
|
+
# so be sure to set what you want here.
|
246
|
+
#
|
247
|
+
def initialize( data, options = Hash.new )
|
248
|
+
# build the options for this read/write
|
249
|
+
options = DEFAULT_OPTIONS.merge(options)
|
250
|
+
|
251
|
+
# create the IO object we will read from
|
252
|
+
@io = if data.is_a? String then StringIO.new(data) else data end
|
253
|
+
|
254
|
+
# store the selected separators
|
255
|
+
@col_sep = options[:col_sep]
|
256
|
+
@row_sep = options[:row_sep]
|
257
|
+
|
258
|
+
# prebuild Regexps for faster parsing
|
259
|
+
@parsers = [ /\A#{@col_sep}+/, # for empty leading fields
|
260
|
+
### The Primary Parser ###
|
261
|
+
/ \G(?:^|#{Regexp.escape(@col_sep)}) # anchor the match
|
262
|
+
(?: "((?>[^"]*)(?>""[^"]*)*)" # find quoted fields
|
263
|
+
| # ... or ...
|
264
|
+
([^"#{Regexp.escape(@col_sep)}]*) # unquoted fields
|
265
|
+
)/x,
|
266
|
+
### End Primary Parser ###
|
267
|
+
/#{@row_sep}\Z/ ] # safer than chomp!()
|
268
|
+
end
|
269
|
+
|
270
|
+
### IO and StringIO Delegation ###
|
271
|
+
|
272
|
+
extend Forwardable
|
273
|
+
def_delegators :@io, :binmode, :close, :close_read, :close_write, :closed?,
|
274
|
+
:eof, :eof?, :fcntl, :fileno, :flush, :fsync, :ioctl,
|
275
|
+
:isatty, :lineno, :pid, :pos, :reopen, :rewind, :seek,
|
276
|
+
:stat, :string, :sync, :sync=, :tell, :to_i, :to_io,
|
277
|
+
:tty?
|
278
|
+
|
279
|
+
### End Delegation ###
|
280
|
+
|
281
|
+
#
|
282
|
+
# The primary write method for wrapped Strings and IOs, +row+ (an Array) is
|
283
|
+
# converted to CSV and appended to the data source.
|
284
|
+
#
|
285
|
+
# The data source must be open for writing.
|
286
|
+
#
|
287
|
+
def <<( row )
|
288
|
+
@io << row.map do |field|
|
289
|
+
if field.nil? # reverse +nil+ fields as empty unquoted fields
|
290
|
+
""
|
291
|
+
else
|
292
|
+
field = String(field) # Stringify fields
|
293
|
+
# reverse empty fields as empty quoted fields
|
294
|
+
if field.empty? or field.count(%Q{\r\n#{@col_sep}"}).nonzero?
|
295
|
+
%Q{"#{field.gsub('"', '""')}"} # escape quoted fields
|
296
|
+
else
|
297
|
+
field # unquoted field
|
298
|
+
end
|
299
|
+
end
|
300
|
+
end.join(@col_sep) + @row_sep # add separators
|
301
|
+
|
302
|
+
self # for chaining
|
303
|
+
end
|
304
|
+
alias_method :add_row, :<<
|
305
|
+
alias_method :puts, :<<
|
306
|
+
|
307
|
+
include Enumerable
|
308
|
+
|
309
|
+
#
|
310
|
+
# Yields each row of the data source in turn.
|
311
|
+
#
|
312
|
+
# Support for Enumerable.
|
313
|
+
#
|
314
|
+
# The data source must be open for reading.
|
315
|
+
#
|
316
|
+
def each
|
317
|
+
while row = shift
|
318
|
+
yield row
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
#
|
323
|
+
# Slurps the remaining rows and returns an Array of Arrays.
|
324
|
+
#
|
325
|
+
# The data source must be open for reading.
|
326
|
+
#
|
327
|
+
def read
|
328
|
+
to_a
|
329
|
+
end
|
330
|
+
alias_method :readlines, :read
|
331
|
+
|
332
|
+
#
|
333
|
+
# The primary read method for wrapped Strings and IOs, a single row is pulled
|
334
|
+
# from the data source, parsed and returned as an Array of fields.
|
335
|
+
#
|
336
|
+
# The data source must be open for reading.
|
337
|
+
#
|
338
|
+
def shift
|
339
|
+
# begin with a blank line, so we can always add to it
|
340
|
+
line = ""
|
341
|
+
|
342
|
+
#
|
343
|
+
# it can take multiple calls to <tt>@io.gets()</tt> to get a full line,
|
344
|
+
# because of \r and/or \n characters embedded in quoted fields
|
345
|
+
#
|
346
|
+
loop do
|
347
|
+
# add another read to the line
|
348
|
+
line += @io.gets(@row_sep) rescue return nil
|
349
|
+
# copy the line so we can chop it up in parsing
|
350
|
+
parse = line.dup
|
351
|
+
parse.sub!(@parsers[2], "")
|
352
|
+
|
353
|
+
#
|
354
|
+
# I believe a blank line should be an <tt>Array.new</tt>, not
|
355
|
+
# CSV's <tt>[nil]</tt>
|
356
|
+
#
|
357
|
+
return Array.new if parse.empty?
|
358
|
+
|
359
|
+
#
|
360
|
+
# shave leading empty fields if needed, because the main parser chokes
|
361
|
+
# on these
|
362
|
+
#
|
363
|
+
csv = if parse.sub!(@parsers[0], "")
|
364
|
+
[nil] * $&.length
|
365
|
+
else
|
366
|
+
Array.new
|
367
|
+
end
|
368
|
+
#
|
369
|
+
# then parse the main fields with a hyper-tuned Regexp from
|
370
|
+
# Mastering Regular Expressions, Second Edition
|
371
|
+
#
|
372
|
+
parse.gsub!(@parsers[1]) do
|
373
|
+
csv << if $1.nil? # we found an unquoted field
|
374
|
+
if $2.empty? # switch empty unquoted fields to +nil+...
|
375
|
+
nil # for CSV compatibility
|
376
|
+
else
|
377
|
+
# I decided to take a strict approach to CSV parsing...
|
378
|
+
if $2.count("\r\n").zero? # verify correctness of field...
|
379
|
+
$2
|
380
|
+
else
|
381
|
+
# or throw an Exception
|
382
|
+
raise MalformedCSVError, 'Unquoted fields do not allow \r or \n.'
|
383
|
+
end
|
384
|
+
end
|
385
|
+
else # we found a quoted field...
|
386
|
+
$1.gsub('""', '"') # unescape contents
|
387
|
+
end
|
388
|
+
"" # gsub!'s replacement, clear the field
|
389
|
+
end
|
390
|
+
|
391
|
+
# if parse is empty?(), we found all the fields on the line...
|
392
|
+
break csv if parse.empty?
|
393
|
+
# if we're not empty?() but at eof?(), a quoted field wasn't closed...
|
394
|
+
raise MalformedCSVError, "Unclosed quoted field." if @io.eof?
|
395
|
+
# otherwise, we need to loop and pull some more data to complete the row
|
396
|
+
end
|
397
|
+
end
|
398
|
+
alias_method :gets, :shift
|
399
|
+
alias_method :readline, :shift
|
400
|
+
end
|