csv_madness 0.0.4 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,31 +2,61 @@ module CsvMadness
2
2
  class Sheet
3
3
  COLUMN_TYPES = {
4
4
  number: Proc.new do |cell, record|
5
- if (cell || "").strip.match(/^\d*$/)
6
- cell.to_i
7
- else
8
- cell.to_f
5
+ rval = cell
6
+
7
+ unless cell.nil? || (cell.is_a?(String) && cell.length == 0)
8
+
9
+ begin
10
+ rval = Integer(cell)
11
+ rescue
12
+ # do nothing
13
+ end
14
+
15
+ unless rval.is_a?(Integer)
16
+ begin
17
+ rval = Float(cell)
18
+ rescue
19
+ # do nothing
20
+ end
21
+ end
9
22
  end
23
+
24
+ rval
10
25
  end,
11
26
 
12
27
  integer: Proc.new do |cell, record|
13
- cell.to_i
28
+ begin
29
+ Integer(cell)
30
+ rescue
31
+ cell
32
+ end
14
33
  end,
15
34
 
16
35
  float: Proc.new do |cell, record|
17
- cell.to_f
36
+ begin
37
+ Float(cell)
38
+ rescue
39
+ cell
40
+ end
18
41
  end,
19
42
 
20
43
  date: Proc.new do |cell, record|
21
44
  begin
22
- parse = Time.parse( cell )
45
+ parse = Time.parse( cell || "" )
23
46
  rescue ArgumentError
24
- parse = "Invalid Time Format: <#{cell}>"
47
+ if cell =~ /^Invalid Time Format: /
48
+ parse = cell
49
+ else
50
+ parse = "Invalid Time Format: <#{cell}>"
51
+ end
25
52
  end
53
+
26
54
  parse
27
55
  end
28
56
  }
29
57
 
58
+ FORBIDDEN_COLUMN_NAMES = [:to_s] # breaks things hard when you use them. Probably not comprehensive, sadly.
59
+
30
60
  # Used to make getter/setter names out of the original header strings.
31
61
  # " hello;: world! " => :hello_world
32
62
  def self.getter_name( name )
@@ -108,7 +138,7 @@ module CsvMadness
108
138
  end
109
139
  end
110
140
 
111
- attr_reader :columns, :index_columns, :records, :spreadsheet_file, :record_class
141
+ attr_reader :columns, :index_columns, :records, :spreadsheet_file, :record_class, :opts
112
142
  # opts:
113
143
  # index: ( [:id, :id2 ] )
114
144
  # columns you want mapped for quick
@@ -123,21 +153,88 @@ module CsvMadness
123
153
  #
124
154
  # header: false
125
155
  # anything else, we assume the csv file has a header row
126
- def initialize( spreadsheet, opts = {} )
127
- if spreadsheet.is_a?(Array)
128
- @spreadsheet_file = nil
129
-
156
+ def initialize( *args )
157
+ if args.last.is_a?(Hash)
158
+ @opts = args.pop
130
159
  else
131
- @spreadsheet_file = self.class.find_spreadsheet_in_filesystem( spreadsheet )
160
+ @opts = {}
132
161
  end
133
- @opts = opts
162
+
163
+ firstarg = args.shift
164
+
165
+ case firstarg
166
+ when NilClass
167
+ @spreadsheet_file = nil
168
+ @opts[:columns] ||= []
169
+ when String, FunWith::Files::FilePath, Pathname
170
+ @spreadsheet_file = self.class.find_spreadsheet_in_filesystem( firstarg )
171
+ when Array
172
+ @spreadsheet_file = nil
173
+ @opts[:columns] ||= firstarg
174
+ end
175
+
134
176
  @opts[:header] = (@opts[:header] == false ? false : true) # true unless already explicitly set to false
135
177
 
136
178
  reload_spreadsheet
137
179
  end
138
180
 
181
+ def <<( record )
182
+ self.add_record( record )
183
+ end
184
+
185
+ def add_record( record )
186
+ case record
187
+ when Array
188
+ # CSV::Row.new( column names, column_entries ) (in same order as columns, natch)
189
+ record = CSV::Row.new( self.columns, record )
190
+ when Hash
191
+ header = []
192
+ fields = []
193
+
194
+ for col in self.columns
195
+ header << col
196
+ fields << record[col]
197
+ end
198
+
199
+ record = CSV::Row.new( header, fields )
200
+ when CSV::Row
201
+ # do nothing
202
+ else
203
+ raise "sheet.add_record() doesn't take objects of type #{record.inspect}" unless record.respond_to?(:csv_data)
204
+ record = record.csv_data
205
+ end
206
+
207
+ record = @record_class.new( record )
208
+ @records << record
209
+ add_to_indexes( record )
210
+ end
211
+
212
+ # record can be the row number (integer from 0...@records.length)
213
+ # record can be the record itself (anonymous class)
214
+ def remove_record( record )
215
+ record = @records[record] if record.is_a?(Integer)
216
+ return if record.nil?
217
+
218
+ self.remove_from_index( record )
219
+ @records.delete( record )
220
+ end
221
+
222
+ # Here's the deal: you hand us a block, and we'll remove the records for which
223
+ # it yields _true_.
224
+ def remove_records( records = nil, &block )
225
+ if block_given?
226
+ for record in @records
227
+ remove_record( record ) if yield( record ) == true
228
+ end
229
+ else # records should be an array
230
+ for record in records
231
+ self.remove_record( record )
232
+ end
233
+ end
234
+ end
235
+
139
236
  def reload_spreadsheet( opts = @opts )
140
- load_csv
237
+ load_csv if @spreadsheet_file
141
238
  set_initial_columns( opts[:columns] )
142
239
  create_record_class
143
240
  package
@@ -191,7 +288,47 @@ module CsvMadness
191
288
  reindex
192
289
  @records
193
290
  end
194
-
291
+
292
+
293
+ # give a copy of the current spreadsheet, but with no records
294
+ def blanked()
295
+ sheet = self.class.new
296
+ sheet.columns = @columns.clone
297
+ sheet.index_columns = @index_columns.clone
298
+ sheet.records = []
299
+ sheet.spreadsheet_file = nil
300
+ sheet.create_data_accessor_module
301
+ sheet.create_record_class
302
+ sheet.opts = @opts.clone
303
+ sheet.reindex
304
+
305
+ sheet
306
+ end
307
+
308
+ # give a block, and get back a hash.
309
+ # The hash keys are the results of the block.
310
+ # The hash values are copies of the spreadsheets, with only the records
311
+ # which caused the block to return the key.
312
+ def split( &block )
313
+ sheets = Hash.new
314
+
315
+ for record in @records
316
+ result_key = yield record
317
+ ( sheets[result_key] ||= self.blanked() ) << record
318
+ end
319
+
320
+ sheets
321
+ # sheet_args = self.blanked
322
+ # for key, record_set in records
323
+ # sheet = self.clone
324
+ # sheet.records =
325
+ #
326
+ # records[key] = sheet
327
+ # end
328
+ #
329
+ # records
330
+ end
331
+
195
332
  def column col
196
333
  @records.map(&col)
197
334
  end
@@ -231,6 +368,7 @@ module CsvMadness
231
368
  # If no block given, adds an empty column
232
369
  def add_column( column, &block )
233
370
  raise "Column already exists: #{column}" if @columns.include?( column )
371
+ raise "#{column} is in the list FORBIDDEN_COLUMN_NAMES" if FORBIDDEN_COLUMN_NAMES.include?(column)
234
372
  @columns << column
235
373
 
236
374
  # add empty column to each row
@@ -316,8 +454,15 @@ module CsvMadness
316
454
  end
317
455
  end
318
456
 
457
+ def length
458
+ self.records.length
459
+ end
460
+
319
461
  protected
462
+ attr_writer :columns, :index_columns, :records, :spreadsheet_file, :record_class, :opts
463
+
320
464
  def load_csv
465
+
321
466
  # encoding seems to solve a specific problem with a specific spreadsheet, at an unknown cost.
322
467
  @csv = CSV.new( File.read(@spreadsheet_file).force_encoding("ISO-8859-1").encode("UTF-8"),
323
468
  { write_headers: true,
@@ -325,21 +470,34 @@ module CsvMadness
325
470
  end
326
471
 
327
472
  def add_to_index( col, key, record )
328
- @indexes[col][key] = record
473
+ (@indexes[col] ||= {})[key] = record
329
474
  end
330
475
 
331
- # Reindexes the record lookup tables.
332
- def reindex
333
- @indexes = {}
334
- for col in @index_columns
335
- @indexes[col] = {}
336
-
337
- for record in @records
476
+ def add_to_indexes( records )
477
+ if records.is_a?( Array )
478
+ for record in records
479
+ add_to_indexes( record )
480
+ end
481
+ else
482
+ record = records
483
+ for col in @index_columns
338
484
  add_to_index( col, record.send(col), record )
339
485
  end
340
486
  end
341
487
  end
342
488
 
489
+ def remove_from_index( record )
490
+ for col in @index_columns
491
+ @indexes[col].delete( record.send(col) )
492
+ end
493
+ end
494
+
495
+ # Reindexes the record lookup tables.
496
+ def reindex
497
+ @indexes = {}
498
+ add_to_indexes( @records )
499
+ end
500
+
343
501
  # shouldn't require reindex
344
502
  def rename_index_column( column, new_name )
345
503
  @index_columns[ @index_columns.index( column ) ] = new_name
@@ -381,18 +539,23 @@ module CsvMadness
381
539
  # prints a warning and a comparison of the columns to the headers.
382
540
  def set_initial_columns( columns = nil )
383
541
  if columns.nil?
384
- if @opts[:header] == false #
385
- @columns = (0...csv_column_count).map{ |i| :"col#{i}" }
542
+ if @opts[:header] == false
543
+ columns = (0...csv_column_count).map{ |i| :"col#{i}" }
386
544
  else
387
- @columns = fetch_csv_headers.map{ |name| self.class.getter_name( name ) }
545
+ columns = fetch_csv_headers.map{ |name| self.class.getter_name( name ) }
388
546
  end
389
547
  else
390
- @columns = columns
391
- unless @columns.length == csv_column_count
392
- puts "Warning <#{@spreadsheet_file}>: columns array does not match the number of columns in the spreadsheet."
548
+ unless !@csv || columns.length == csv_column_count
549
+ $stderr.puts "Warning <#{@spreadsheet_file}>: columns array does not match the number of columns in the spreadsheet."
393
550
  compare_columns_to_headers
394
551
  end
395
552
  end
553
+
554
+ for column in columns
555
+ raise "#{column} is in the list FORBIDDEN_COLUMN_NAMES" if FORBIDDEN_COLUMN_NAMES.include?(column)
556
+ end
557
+
558
+ @columns = columns
396
559
  end
397
560
 
398
561
  # Printout so the user can see which CSV columns are being matched to which
@@ -402,7 +565,7 @@ module CsvMadness
402
565
  headers = fetch_csv_headers
403
566
 
404
567
  for i in 0...([@columns, headers].map(&:length).max)
405
- puts "\t#{i}: #{@columns[i]} ==> #{headers[i]}"
568
+ $stdout.puts "\t#{i}: #{@columns[i]} ==> #{headers[i]}"
406
569
  end
407
570
  end
408
571
 
@@ -410,7 +573,7 @@ module CsvMadness
410
573
  # Create objects that respond to the recipe-named methods
411
574
  def package
412
575
  @records = []
413
- @csv.each do |row|
576
+ (@csv || []).each do |row|
414
577
  @records << @record_class.new( row )
415
578
  end
416
579
  end
@@ -0,0 +1,2 @@
1
+ "to_s"
2
+ "string!"
@@ -0,0 +1,11 @@
1
+ "id","fname","lname","party"
2
+ "1","Mary","Moore","D"
3
+ "2","Bill","Paxton","R"
4
+ "3","Charles","Darwin","I"
5
+ "4","Chuck","Norris","D"
6
+ "5","Annabelle","Lecter","R"
7
+ "6","Mortimer","Bradford","D"
8
+ "7","Wilford","Brimley","I"
9
+ "8","Cala","Wilcox","R"
10
+ "9","Horace","Wilcox","R"
11
+ "10","Jacob","Buford","D"
@@ -0,0 +1,3 @@
1
+ "id","number","integer","float","date"
2
+ ,,,,
3
+ 12,134.2,100,123.4,2013-01-13
@@ -0,0 +1,5 @@
1
+ "id","fname","lname","age","born"
2
+ "1",,"Moore",,"1986-04-08 15:06:10"
3
+ "2","Bill","Paxton","39","1974-02-22"
4
+ "3","Charles","Darwin",,"Invalid Date"
5
+ "4","Chuck","Norris","57"
@@ -1,30 +1,37 @@
1
- require 'rubygems'
2
- require 'bundler'
3
-
4
- begin
5
- Bundler.setup(:default, :development)
6
- rescue Bundler::BundlerError => e
7
- $stderr.puts e.message
8
- $stderr.puts "Run `bundle install` to install missing gems"
9
- exit e.status_code
10
- end
11
-
12
- require 'test/unit'
13
- require 'shoulda'
1
+ # require 'rubygems'
2
+ # require 'bundler'
3
+ #
4
+ # begin
5
+ # Bundler.setup(:default, :development)
6
+ # rescue Bundler::BundlerError => e
7
+ # $stderr.puts e.message
8
+ # $stderr.puts "Run `bundle install` to install missing gems"
9
+ # exit e.status_code
10
+ # end
11
+ #
12
+ # require 'test/unit'
13
+ # require 'shoulda'
14
14
 
15
15
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
16
16
  $LOAD_PATH.unshift(File.dirname(__FILE__))
17
17
 
18
+ require 'fun_with_testing'
18
19
  require 'csv_madness'
19
20
 
20
- class Test::Unit::TestCase
21
- end
21
+ # class Test::Unit::TestCase
22
+ # end
22
23
 
23
- class MadTestCase < Test::Unit::TestCase
24
+ class MadTestCase < FunWith::Testing::TestCase # Test::Unit::TestCase
25
+ include FunWith::Testing::Assertions::Basics
26
+
24
27
  MARY_ID = "1"
25
28
  BILL_ID = "2"
26
29
  DARWIN_ID = "3"
27
30
  CHUCK_ID = "4"
31
+
32
+ def setup
33
+ set_spreadsheet_paths
34
+ end
28
35
 
29
36
  def load_mary
30
37
  id = @simple.index_columns.first
@@ -78,9 +85,9 @@ class MadTestCase < Test::Unit::TestCase
78
85
  end
79
86
 
80
87
  def set_spreadsheet_paths
81
- @csv_search_path = Pathname.new( __FILE__ ).dirname.join("csv")
82
- @csv_output_path = @csv_search_path.join("out")
83
- CsvMadness::Sheet.add_search_path( @csv_search_path )
88
+ @csv_load_path = CsvMadness.root( "test", "csv" )
89
+ @csv_output_path = CsvMadness.root( "test", "csv", "out" )
90
+ CsvMadness::Sheet.add_search_path( @csv_load_path )
84
91
  CsvMadness::Sheet.add_search_path( @csv_output_path )
85
92
  end
86
93
 
@@ -0,0 +1,33 @@
1
+ require 'helper'
2
+
3
+ class TestBuilder < MadTestCase
4
+ context "testing simple cases" do
5
+ should "spreadsheetize integers" do
6
+ integers = [65, 66, 67, 68, 69, 70]
7
+ sb = CsvMadness::Builder.new do |s|
8
+ s.column( :even, "even?" )
9
+ s.column( :odd, "odd?" )
10
+ s.column( :hashh, "hash" )
11
+ s.column( :hashhash, "hash.hash" )
12
+ s.column( :chr )
13
+ s.column( :not_a_valid_method )
14
+ end
15
+ #
16
+ ss = sb.build( integers )
17
+
18
+ for record in ss.records
19
+ assert_kind_of( CsvMadness::Record, ss.records.first )
20
+ for col in [:even, :odd, :hashh, :hashhash, :chr]
21
+ assert_respond_to record, col
22
+ end
23
+ end
24
+
25
+ assert_matches ss.records.first.not_a_valid_method, /^ERROR: undefined method `not_a_valid_method'/
26
+
27
+ ss = sb.build( integers, :on_error => :ignore )
28
+
29
+ assert_equal "", ss.records.first.not_a_valid_method
30
+
31
+ end
32
+ end
33
+ end