fastercsv 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ = Change Log
2
+
3
+ Below is a complete listing of changes for each revision of FasterCSV.
4
+
5
+ == 0.1.0
6
+
7
+ * Initial public release.
data/INSTALL ADDED
@@ -0,0 +1,23 @@
1
+ = Installing FasterCSV
2
+
3
+ RubyGems is the preferred easy install method for FasterCSV. However, you can
4
+ install FasterCSV manually as described below.
5
+
6
+ == Installing the Gem
7
+
8
+ FasterCSV is intended to be installed via the
9
+ RubyGems[http://rubyforge.org/projects/rubygems/] system. To get the latest
10
+ version, simply enter the following into your command prompt:
11
+
12
+ $ sudo gem install fastercsv
13
+
14
+ You must have RubyGems[http://rubyforge.org/projects/rubygems/] installed for
15
+ the above to work.
16
+
17
+ == Installing Manually
18
+
19
+ Download the latest version of FasterCSV from the
20
+ {RubyForge project page}[http://rubyforge.org/frs/?group_id=1102]. Navigate to
21
+ the root project directory and enter:
22
+
23
+ $ sudo ruby setup.rb
data/LICENSE ADDED
@@ -0,0 +1,7 @@
1
+ = License Terms
2
+
3
+ Distributed under the user's choice of the GPL[http://www.gnu.org/copyleft/gpl.html] (see COPYING for details) or the
4
+ {Ruby software license}[http://www.ruby-lang.org/en/LICENSE.txt] by
5
+ James Edward Gray II.
6
+
7
+ Please email James[mailto:james@grayproductions.net] with any questions.
data/README ADDED
@@ -0,0 +1,57 @@
1
+ = Read Me
2
+
3
+ by James Edward Gray II
4
+
5
+ == Description
6
+
7
+ Welcome to FasterCSV.
8
+
9
+ FasterCSV is intended as a replacement to Ruby's standard CSV library. It was designed to address concerns users of that library had and it has three primary goals:
10
+
11
+ 1. Be significantly faster than CSV while remaining a pure Ruby library.
12
+ 2. Use a smaller and easier to maintain code base.
13
+ 3. Improve on the CSV interface.
14
+
15
+ Obviously, the last one is subjective. If you love CSV's interface, odds are
16
+ good this one won't suit you. I did try to defer to that interface whenever I
17
+ didn't have a compelling reason to change it though, so hopefully this won't be
18
+ too radically different.
19
+
20
+ == What's Different From CSV?
21
+
22
+ I'm sure I'll miss something, but I'll try to mention most of the major differences I am aware of, to help others quickly get up to speed:
23
+
24
+ === CSV Parsing
25
+
26
+ * FasterCSV has a stricter parser and will throw MalformedCSVErrors on
27
+ problematic data.
28
+ * FasterCSV has a less liberal idea of a line ending than CSV. What you set as
29
+ the <tt>:row_sep</tt> is law.
30
+ * CSV returns empty lines as <tt>[nil]</tt>. FasterCSV calls them <tt>[]</tt>.
31
+ * FasterCSV has a much faster parser.
32
+
33
+ === Interface
34
+
35
+ * FasterCSV uses Hash-style parameters to set options.
36
+ * FasterCSV does not have generate_row() or parse_row() from CSV.
37
+ * FasterCSV does not have CSV's Reader and Writer classes.
38
+ * FasterCSV::open() is more like Ruby's open() than CSV::open().
39
+ * FasterCSV objects support most standard IO methods.
40
+ * FasterCSV has a new() method used to wrap objects like String and IO for
41
+ reading and writing.
42
+ * FasterCSV::generate() is different from CSV::generate().
43
+
44
+ If you use this library and find yourself missing any functionality I have trimmed, please {let me know}[mailto:james@grayproductions.net].
45
+
46
+ == Documentation
47
+
48
+ See FasterCSV for documentation.
49
+
50
+ == Installing
51
+
52
+ See the INSTALL file for instructions.
53
+
54
+ == Questions and/or Comments
55
+
56
+ Feel free to email {James Edward Gray II}[mailto:james@grayproductions.net] with
57
+ any questions.
@@ -0,0 +1,83 @@
1
+ require "rake/rdoctask"
2
+ require "rake/testtask"
3
+ require "rake/gempackagetask"
4
+
5
+ require "rubygems"
6
+
7
+ task :default => [:test]
8
+
9
+ Rake::TestTask.new do |test|
10
+ test.libs << "test"
11
+ test.test_files = [ "test/ts_all.rb" ]
12
+ test.verbose = true
13
+ end
14
+
15
+ Rake::RDocTask.new do |rdoc|
16
+ rdoc.main = "README"
17
+ rdoc.rdoc_files.include( "README", "INSTALL",
18
+ "TODO", "CHANGELOG",
19
+ "AUTHORS", "COPYING",
20
+ "LICENSE", "lib/" )
21
+ rdoc.rdoc_dir = "doc/html"
22
+ rdoc.title = "FasterCSV Documentation"
23
+ end
24
+
25
+ desc "Upload current documentation to Rubyforge"
26
+ task :upload_docs => [:rdoc] do
27
+ sh "scp -r doc/html/* " +
28
+ "bbazzarrakk@rubyforge.org:/var/www/gforge-projects/fastercsv/"
29
+ end
30
+
31
+ desc "Show library's code statistics"
32
+ task :stats do
33
+ require 'code_statistics'
34
+ CodeStatistics.new( ["FasterCSV", "lib"],
35
+ ["Units", "test"] ).to_s
36
+ end
37
+
38
+ desc "Time FasterCSV and CSV"
39
+ task :benchmark do
40
+ path = "test/test_data.csv"
41
+ sh %Q{time ruby -r csv -e 'CSV.foreach("#{path}") { |row| }'}
42
+ sh %Q{time ruby -r lib/faster_csv -e 'FasterCSV.foreach("#{path}") { |row| }'}
43
+ end
44
+
45
+ spec = Gem::Specification.new do |spec|
46
+ spec.name = "fastercsv"
47
+ spec.version = "0.1.0"
48
+ spec.platform = Gem::Platform::RUBY
49
+ spec.summary = "FasterCSV is CSV, but faster, smaller, and cleaner."
50
+
51
+ spec.files = Dir.glob("{lib,test}/**/*.rb").
52
+ reject { |item| item.include?(".svn") } +
53
+ ["Rakefile", "setup.rb"]
54
+ spec.test_suite_file = "test/ts_all.rb"
55
+
56
+ spec.has_rdoc = true
57
+ spec.extra_rdoc_files = %w{README INSTALL TODO CHANGELOG LICENSE}
58
+ spec.rdoc_options << "--title" << "FasterCSV Documentation" <<
59
+ "--main" << "README"
60
+
61
+ spec.require_path = "lib"
62
+ spec.autorequire = "fastercsv"
63
+
64
+ spec.author = "James Edward Gray II"
65
+ spec.email = "james@grayproductions.net"
66
+ spec.rubyforge_project = "fastercsv"
67
+ spec.homepage = "http://fastercsv.rubyforge.org"
68
+ spec.description = <<END_DESC
69
+ FasterCSV is intended as a complete replacement to the CSV standard library. It
70
+ is significantly faster and smaller while still being pure Ruby code. It also
71
+ strives for a better interface.
72
+ END_DESC
73
+ end
74
+
75
+ Rake::GemPackageTask.new(spec) do |pkg|
76
+ pkg.need_zip = true
77
+ pkg.need_tar = true
78
+ end
79
+
80
+ desc "Add new files to Subversion"
81
+ task :add_to_svn do
82
+ sh %Q{svn status | ruby -nae 'system "svn add \#{$F[1]}" if $F[0] == "?"' }
83
+ end
data/TODO ADDED
@@ -0,0 +1,8 @@
1
+ = To Do List
2
+
3
+ The following is a list of planned expansions for FasterCSV, in no particular
4
+ order.
5
+
6
+ * Add support for accessing fields by headers (from first row of document).
7
+ * Add "convertors" for switching numbers to Integers or Floats, dates to Date or
8
+ Time objects, etc.
@@ -0,0 +1,400 @@
1
+ #!/usr/local/bin/ruby -w
2
+
3
+ # = faster_csv.rb -- Faster CSV Reading and Writing
4
+ #
5
+ # Created by James Edward Gray II on 2005-10-31.
6
+ # Copyright 2005 Gray Productions. All rights reserved.
7
+ #
8
+ # See FasterCSV for documentation.
9
+
10
+ require "stringio"
11
+ require "forwardable"
12
+
13
+ #
14
+ # This class provides a complete interface to CSV files and data. It offers
15
+ # tools to enable you to read and write to and from Strings or IO objects, as
16
+ # needed.
17
+ #
18
+ # == Reading
19
+ #
20
+ # === From a File
21
+ #
22
+ # ==== A Line at a Time
23
+ #
24
+ # FasterCSV.foreach("path/to/file.csv") do |row|
25
+ # # use row here...
26
+ # end
27
+ #
28
+ # ==== All at Once
29
+ #
30
+ # arr_of_arrs = FasterCSV.read("path/to/file.csv")
31
+ #
32
+ # === From a String
33
+ #
34
+ # ==== A Line at a Time
35
+ #
36
+ # FasterCSV.parse("CSV,data,String") do |row|
37
+ # # use row here...
38
+ # end
39
+ #
40
+ # ==== All at Once
41
+ #
42
+ # arr_of_arrs = FasterCSV.parse("CSV,data,String")
43
+ #
44
+ # == Writing
45
+ #
46
+ # === To a File
47
+ #
48
+ # FasterCSV.open("path/to/file.csv", "w") do |csv|
49
+ # csv << ["row", "of", "CSV", "data"]
50
+ # csv << ["another", "row"]
51
+ # # ...
52
+ # end
53
+ #
54
+ # === To a String
55
+ #
56
+ # csv_string = FasterCSV.generate do |csv|
57
+ # csv << ["row", "of", "CSV", "data"]
58
+ # csv << ["another", "row"]
59
+ # # ...
60
+ # end
61
+ #
62
+ # == Convert a Single Line
63
+ #
64
+ # csv_string = generate_line(["row", "of", "CSV", "data"]) # to CSV
65
+ # csv_array = parse_line("CSV,data,String") # from CSV
66
+ #
67
+ class FasterCSV
68
+ # The error thrown when the parser encounters illegal CSV formatting.
69
+ class MalformedCSVError < RuntimeError; end
70
+
71
+ #
72
+ # The options used when no overrides are given by calling code. They are:
73
+ #
74
+ # <b><tt>:col_sep</tt></b>:: <tt>","</tt>
75
+ # <b><tt>:row_sep</tt></b>:: <tt>$/</tt>
76
+ #
77
+ DEFAULT_OPTIONS = {:col_sep => ",", :row_sep => $/}
78
+
79
+ #
80
+ # This method is intended as the primary interface for reading CSV files. You
81
+ # pass a +path+ and any +options+ you wish to set for the read. Each row of
82
+ # file will be passed to the provided +block+ in turn.
83
+ #
84
+ # The +options+ parameter can be anthing FasterCSV::new() understands.
85
+ #
86
+ def self.foreach( path, options = Hash.new, &block )
87
+ open(path, options) do |csv|
88
+ csv.each(&block)
89
+ end
90
+ end
91
+
92
+ #
93
+ # This method wraps a String in a FasterCSV object which is passed to the
94
+ # provided block. You can use the block to append CSV rows to the String and
95
+ # when the block exits, the final String will be returned.
96
+ #
97
+ # The +options+ parameter can be anthing FasterCSV::new() understands.
98
+ #
99
+ def self.generate( options = Hash.new )
100
+ faster_csv = new("", options)
101
+ yield faster_csv
102
+ faster_csv.string
103
+ end
104
+
105
+ #
106
+ # This method is a shortcut for converting a single row (Array) into a CSV
107
+ # String.
108
+ #
109
+ # The +options+ parameter can be anthing FasterCSV::new() understands.
110
+ #
111
+ def self.generate_line( row, options = Hash.new )
112
+ (new("", options) << row).string
113
+ end
114
+
115
+ #
116
+ # :call-seq:
117
+ # open( *args, options = Hash.new ) { |faster_csv| ... }
118
+ # open( *args, options = Hash.new )
119
+ #
120
+ # This method opens an IO object, and wraps that with FasterCSV. This is
121
+ # intended as the primary interface for writing a CSV file.
122
+ #
123
+ # You may pass any +args+ Ruby's open() understands followed by an optional
124
+ # Hash containing any +options+ FasterCSV::new() understands.
125
+ #
126
+ # This method works like Ruby's open() call, in that it will pass a FasterCSV
127
+ # object to a provided block and close it when the block termminates, or it
128
+ # will return the FasterCSV object when no block is provided. (*Note*: This
129
+ # is different from the standard CSV library which passes rows to the block.
130
+ # Use FasterCSV::foreach() for that behavior.)
131
+ #
132
+ # An opened FasterCSV object will delegate to many IO methods, for
133
+ # convenience. You may call:
134
+ #
135
+ # * binmode()
136
+ # * close()
137
+ # * close_read()
138
+ # * close_write()
139
+ # * closed?()
140
+ # * eof()
141
+ # * eof?()
142
+ # * fcntl()
143
+ # * fileno()
144
+ # * flush()
145
+ # * fsync()
146
+ # * ioctl()
147
+ # * isatty()
148
+ # * lineno()
149
+ # * pid()
150
+ # * pos()
151
+ # * reopen()
152
+ # * rewind()
153
+ # * seek()
154
+ # * stat()
155
+ # * sync()
156
+ # * sync=()
157
+ # * tell()
158
+ # * to_i()
159
+ # * to_io()
160
+ # * tty?()
161
+ #
162
+ def self.open( *args )
163
+ # find the +options+ Hash
164
+ options = if args.last.is_a? Hash then args.pop else Hash.new end
165
+ # wrap a File opened with the remaining +args+
166
+ csv = new(File.open(*args), options)
167
+
168
+ # handle blocks like Ruby's open(), not like the CSV library
169
+ if block_given?
170
+ begin
171
+ yield csv
172
+ ensure
173
+ csv.close
174
+ end
175
+ else
176
+ csv
177
+ end
178
+ end
179
+
180
+ #
181
+ # :call-seq:
182
+ # parse( str, options ) { |row| ... }
183
+ # parse( str, options )
184
+ #
185
+ # This method can be used to easily parse CSV out of a String. You may either
186
+ # provide a +block+ which will be called with each row of the String in turn,
187
+ # or just use the returned Array of Arrays (when no +block+ is given).
188
+ #
189
+ # You pass your +str+ to read from, and an optional +options+ Hash containing
190
+ # anything FasterCSV::new() understands.
191
+ #
192
+ def self.parse( *args, &block )
193
+ csv = new(*args)
194
+ if block.nil? # slurp contents, if no block is given
195
+ begin
196
+ csv.read
197
+ ensure
198
+ csv.close
199
+ end
200
+ else # or pass each row to a provided block
201
+ csv.each(&block)
202
+ end
203
+ end
204
+
205
+ #
206
+ # Use to slurp a CSV file into an Array of Arrays. Pass the +path+ to the
207
+ # file and any +options+ FasterCSV::new() understands.
208
+ #
209
+ def self.read( path, options = Hash.new )
210
+ open(path, options) { |csv| csv.read }
211
+ end
212
+
213
+ # Alias for FasterCSV::read().
214
+ def self.readlines( path, options = Hash.new )
215
+ open(path, options) { |csv| csv.readlines }
216
+ end
217
+
218
+ #
219
+ # This method is a shortcut for converting a single line of a CSV String into
220
+ # a into an Array. Note that if +line+ contains multiple rows, anything
221
+ # beyond the first row is ignored.
222
+ #
223
+ # The +options+ parameter can be anthing FasterCSV::new() understands.
224
+ #
225
+ def self.parse_line( line, options = Hash.new )
226
+ new(line, options).shift
227
+ end
228
+
229
+ #
230
+ # This constructor will wrap either a String or IO object passed in +data+ for
231
+ # reading and/or writing. In addition to the FasterCSV instance methods,
232
+ # several IO methods are delegated. (See FasterCSV::open() for a complete
233
+ # list.) If you pass a String for +data+, you can later retrieve it (after
234
+ # writing to it, for example) with FasterCSV.string().
235
+ #
236
+ # You may set any reading and/or writing preferences in the +options+ Hash.
237
+ # Available options are:
238
+ #
239
+ # <b><tt>:col_sep</tt></b>:: The String placed between each field.
240
+ # <b><tt>:row_sep</tt></b>:: The String appended to the end of each row.
241
+ #
242
+ # See FasterCSV::DEFAULT_OPTIONS for the default settings.
243
+ #
244
+ # Options cannot be overriden in the instance methods for performance reasons,
245
+ # so be sure to set what you want here.
246
+ #
247
+ def initialize( data, options = Hash.new )
248
+ # build the options for this read/write
249
+ options = DEFAULT_OPTIONS.merge(options)
250
+
251
+ # create the IO object we will read from
252
+ @io = if data.is_a? String then StringIO.new(data) else data end
253
+
254
+ # store the selected separators
255
+ @col_sep = options[:col_sep]
256
+ @row_sep = options[:row_sep]
257
+
258
+ # prebuild Regexps for faster parsing
259
+ @parsers = [ /\A#{@col_sep}+/, # for empty leading fields
260
+ ### The Primary Parser ###
261
+ / \G(?:^|#{Regexp.escape(@col_sep)}) # anchor the match
262
+ (?: "((?>[^"]*)(?>""[^"]*)*)" # find quoted fields
263
+ | # ... or ...
264
+ ([^"#{Regexp.escape(@col_sep)}]*) # unquoted fields
265
+ )/x,
266
+ ### End Primary Parser ###
267
+ /#{@row_sep}\Z/ ] # safer than chomp!()
268
+ end
269
+
270
+ ### IO and StringIO Delegation ###
271
+
272
+ extend Forwardable
273
+ def_delegators :@io, :binmode, :close, :close_read, :close_write, :closed?,
274
+ :eof, :eof?, :fcntl, :fileno, :flush, :fsync, :ioctl,
275
+ :isatty, :lineno, :pid, :pos, :reopen, :rewind, :seek,
276
+ :stat, :string, :sync, :sync=, :tell, :to_i, :to_io,
277
+ :tty?
278
+
279
+ ### End Delegation ###
280
+
281
+ #
282
+ # The primary write method for wrapped Strings and IOs, +row+ (an Array) is
283
+ # converted to CSV and appended to the data source.
284
+ #
285
+ # The data source must be open for writing.
286
+ #
287
+ def <<( row )
288
+ @io << row.map do |field|
289
+ if field.nil? # reverse +nil+ fields as empty unquoted fields
290
+ ""
291
+ else
292
+ field = String(field) # Stringify fields
293
+ # reverse empty fields as empty quoted fields
294
+ if field.empty? or field.count(%Q{\r\n#{@col_sep}"}).nonzero?
295
+ %Q{"#{field.gsub('"', '""')}"} # escape quoted fields
296
+ else
297
+ field # unquoted field
298
+ end
299
+ end
300
+ end.join(@col_sep) + @row_sep # add separators
301
+
302
+ self # for chaining
303
+ end
304
+ alias_method :add_row, :<<
305
+ alias_method :puts, :<<
306
+
307
+ include Enumerable
308
+
309
+ #
310
+ # Yields each row of the data source in turn.
311
+ #
312
+ # Support for Enumerable.
313
+ #
314
+ # The data source must be open for reading.
315
+ #
316
+ def each
317
+ while row = shift
318
+ yield row
319
+ end
320
+ end
321
+
322
+ #
323
+ # Slurps the remaining rows and returns an Array of Arrays.
324
+ #
325
+ # The data source must be open for reading.
326
+ #
327
+ def read
328
+ to_a
329
+ end
330
+ alias_method :readlines, :read
331
+
332
+ #
333
+ # The primary read method for wrapped Strings and IOs, a single row is pulled
334
+ # from the data source, parsed and returned as an Array of fields.
335
+ #
336
+ # The data source must be open for reading.
337
+ #
338
+ def shift
339
+ # begin with a blank line, so we can always add to it
340
+ line = ""
341
+
342
+ #
343
+ # it can take multiple calls to <tt>@io.gets()</tt> to get a full line,
344
+ # because of \r and/or \n characters embedded in quoted fields
345
+ #
346
+ loop do
347
+ # add another read to the line
348
+ line += @io.gets(@row_sep) rescue return nil
349
+ # copy the line so we can chop it up in parsing
350
+ parse = line.dup
351
+ parse.sub!(@parsers[2], "")
352
+
353
+ #
354
+ # I believe a blank line should be an <tt>Array.new</tt>, not
355
+ # CSV's <tt>[nil]</tt>
356
+ #
357
+ return Array.new if parse.empty?
358
+
359
+ #
360
+ # shave leading empty fields if needed, because the main parser chokes
361
+ # on these
362
+ #
363
+ csv = if parse.sub!(@parsers[0], "")
364
+ [nil] * $&.length
365
+ else
366
+ Array.new
367
+ end
368
+ #
369
+ # then parse the main fields with a hyper-tuned Regexp from
370
+ # Mastering Regular Expressions, Second Edition
371
+ #
372
+ parse.gsub!(@parsers[1]) do
373
+ csv << if $1.nil? # we found an unquoted field
374
+ if $2.empty? # switch empty unquoted fields to +nil+...
375
+ nil # for CSV compatibility
376
+ else
377
+ # I decided to take a strict approach to CSV parsing...
378
+ if $2.count("\r\n").zero? # verify correctness of field...
379
+ $2
380
+ else
381
+ # or throw an Exception
382
+ raise MalformedCSVError, 'Unquoted fields do not allow \r or \n.'
383
+ end
384
+ end
385
+ else # we found a quoted field...
386
+ $1.gsub('""', '"') # unescape contents
387
+ end
388
+ "" # gsub!'s replacement, clear the field
389
+ end
390
+
391
+ # if parse is empty?(), we found all the fields on the line...
392
+ break csv if parse.empty?
393
+ # if we're not empty?() but at eof?(), a quoted field wasn't closed...
394
+ raise MalformedCSVError, "Unclosed quoted field." if @io.eof?
395
+ # otherwise, we need to loop and pull some more data to complete the row
396
+ end
397
+ end
398
+ alias_method :gets, :shift
399
+ alias_method :readline, :shift
400
+ end