csv_madness 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,31 +2,61 @@ module CsvMadness
2
2
  class Sheet
3
3
  COLUMN_TYPES = {
4
4
  number: Proc.new do |cell, record|
5
- if (cell || "").strip.match(/^\d*$/)
6
- cell.to_i
7
- else
8
- cell.to_f
5
+ rval = cell
6
+
7
+ unless cell.nil? || (cell.is_a?(String) && cell.length == 0)
8
+
9
+ begin
10
+ rval = Integer(cell)
11
+ rescue
12
+ # do nothing
13
+ end
14
+
15
+ unless rval.is_a?(Integer)
16
+ begin
17
+ rval = Float(cell)
18
+ rescue
19
+ # do nothing
20
+ end
21
+ end
9
22
  end
23
+
24
+ rval
10
25
  end,
11
26
 
12
27
  integer: Proc.new do |cell, record|
13
- cell.to_i
28
+ begin
29
+ Integer(cell)
30
+ rescue
31
+ cell
32
+ end
14
33
  end,
15
34
 
16
35
  float: Proc.new do |cell, record|
17
- cell.to_f
36
+ begin
37
+ Float(cell)
38
+ rescue
39
+ cell
40
+ end
18
41
  end,
19
42
 
20
43
  date: Proc.new do |cell, record|
21
44
  begin
22
- parse = Time.parse( cell )
45
+ parse = Time.parse( cell || "" )
23
46
  rescue ArgumentError
24
- parse = "Invalid Time Format: <#{cell}>"
47
+ if cell =~ /^Invalid Time Format: /
48
+ parse = cell
49
+ else
50
+ parse = "Invalid Time Format: <#{cell}>"
51
+ end
25
52
  end
53
+
26
54
  parse
27
55
  end
28
56
  }
29
57
 
58
+ FORBIDDEN_COLUMN_NAMES = [:to_s] # breaks things hard when you use them. Probably not comprehensive, sadly.
59
+
30
60
  # Used to make getter/setter names out of the original header strings.
31
61
  # " hello;: world! " => :hello_world
32
62
  def self.getter_name( name )
@@ -108,7 +138,7 @@ module CsvMadness
108
138
  end
109
139
  end
110
140
 
111
- attr_reader :columns, :index_columns, :records, :spreadsheet_file, :record_class
141
+ attr_reader :columns, :index_columns, :records, :spreadsheet_file, :record_class, :opts
112
142
  # opts:
113
143
  # index: ( [:id, :id2 ] )
114
144
  # columns you want mapped for quick
@@ -123,21 +153,88 @@ module CsvMadness
123
153
  #
124
154
  # header: false
125
155
  # anything else, we assume the csv file has a header row
126
- def initialize( spreadsheet, opts = {} )
127
- if spreadsheet.is_a?(Array)
128
- @spreadsheet_file = nil
129
-
156
+ def initialize( *args )
157
+ if args.last.is_a?(Hash)
158
+ @opts = args.pop
130
159
  else
131
- @spreadsheet_file = self.class.find_spreadsheet_in_filesystem( spreadsheet )
160
+ @opts = {}
132
161
  end
133
- @opts = opts
162
+
163
+ firstarg = args.shift
164
+
165
+ case firstarg
166
+ when NilClass
167
+ @spreadsheet_file = nil
168
+ @opts[:columns] ||= []
169
+ when String, FunWith::Files::FilePath, Pathname
170
+ @spreadsheet_file = self.class.find_spreadsheet_in_filesystem( firstarg )
171
+ when Array
172
+ @spreadsheet_file = nil
173
+ @opts[:columns] ||= firstarg
174
+ end
175
+
134
176
  @opts[:header] = (@opts[:header] == false ? false : true) # true unless already explicitly set to false
135
177
 
136
178
  reload_spreadsheet
137
179
  end
138
180
 
181
+ def <<( record )
182
+ self.add_record( record )
183
+ end
184
+
185
+ def add_record( record )
186
+ case record
187
+ when Array
188
+ # CSV::Row.new( column names, column_entries ) (in same order as columns, natch)
189
+ record = CSV::Row.new( self.columns, record )
190
+ when Hash
191
+ header = []
192
+ fields = []
193
+
194
+ for col in self.columns
195
+ header << col
196
+ fields << record[col]
197
+ end
198
+
199
+ record = CSV::Row.new( header, fields )
200
+ when CSV::Row
201
+ # do nothing
202
+ else
203
+ raise "sheet.add_record() doesn't take objects of type #{record.inspect}" unless record.respond_to?(:csv_data)
204
+ record = record.csv_data
205
+ end
206
+
207
+ record = @record_class.new( record )
208
+ @records << record
209
+ add_to_indexes( record )
210
+ end
211
+
212
+ # record can be the row number (integer from 0...@records.length)
213
+ # record can be the record itself (anonymous class)
214
+ def remove_record( record )
215
+ record = @records[record] if record.is_a?(Integer)
216
+ return if record.nil?
217
+
218
+ self.remove_from_index( record )
219
+ @records.delete( record )
220
+ end
221
+
222
+ # Here's the deal: you hand us a block, and we'll remove the records for which
223
+ # it yields _true_.
224
+ def remove_records( records = nil, &block )
225
+ if block_given?
226
+ for record in @records
227
+ remove_record( record ) if yield( record ) == true
228
+ end
229
+ else # records should be an array
230
+ for record in records
231
+ self.remove_record( record )
232
+ end
233
+ end
234
+ end
235
+
139
236
  def reload_spreadsheet( opts = @opts )
140
- load_csv
237
+ load_csv if @spreadsheet_file
141
238
  set_initial_columns( opts[:columns] )
142
239
  create_record_class
143
240
  package
@@ -191,7 +288,47 @@ module CsvMadness
191
288
  reindex
192
289
  @records
193
290
  end
194
-
291
+
292
+
293
+ # give a copy of the current spreadsheet, but with no records
294
+ def blanked()
295
+ sheet = self.class.new
296
+ sheet.columns = @columns.clone
297
+ sheet.index_columns = @index_columns.clone
298
+ sheet.records = []
299
+ sheet.spreadsheet_file = nil
300
+ sheet.create_data_accessor_module
301
+ sheet.create_record_class
302
+ sheet.opts = @opts.clone
303
+ sheet.reindex
304
+
305
+ sheet
306
+ end
307
+
308
+ # give a block, and get back a hash.
309
+ # The hash keys are the results of the block.
310
+ # The hash values are copies of the spreadsheets, with only the records
311
+ # which caused the block to return the key.
312
+ def split( &block )
313
+ sheets = Hash.new
314
+
315
+ for record in @records
316
+ result_key = yield record
317
+ ( sheets[result_key] ||= self.blanked() ) << record
318
+ end
319
+
320
+ sheets
321
+ # sheet_args = self.blanked
322
+ # for key, record_set in records
323
+ # sheet = self.clone
324
+ # sheet.records =
325
+ #
326
+ # records[key] = sheet
327
+ # end
328
+ #
329
+ # records
330
+ end
331
+
195
332
  def column col
196
333
  @records.map(&col)
197
334
  end
@@ -231,6 +368,7 @@ module CsvMadness
231
368
  # If no block given, adds an empty column
232
369
  def add_column( column, &block )
233
370
  raise "Column already exists: #{column}" if @columns.include?( column )
371
+ raise "#{column} is in the list FORBIDDEN_COLUMN_NAMES" if FORBIDDEN_COLUMN_NAMES.include?(column)
234
372
  @columns << column
235
373
 
236
374
  # add empty column to each row
@@ -316,8 +454,15 @@ module CsvMadness
316
454
  end
317
455
  end
318
456
 
457
+ def length
458
+ self.records.length
459
+ end
460
+
319
461
  protected
462
+ attr_writer :columns, :index_columns, :records, :spreadsheet_file, :record_class, :opts
463
+
320
464
  def load_csv
465
+
321
466
  # encoding seems to solve a specific problem with a specific spreadsheet, at an unknown cost.
322
467
  @csv = CSV.new( File.read(@spreadsheet_file).force_encoding("ISO-8859-1").encode("UTF-8"),
323
468
  { write_headers: true,
@@ -325,21 +470,34 @@ module CsvMadness
325
470
  end
326
471
 
327
472
  def add_to_index( col, key, record )
328
- @indexes[col][key] = record
473
+ (@indexes[col] ||= {})[key] = record
329
474
  end
330
475
 
331
- # Reindexes the record lookup tables.
332
- def reindex
333
- @indexes = {}
334
- for col in @index_columns
335
- @indexes[col] = {}
336
-
337
- for record in @records
476
+ def add_to_indexes( records )
477
+ if records.is_a?( Array )
478
+ for record in records
479
+ add_to_indexes( record )
480
+ end
481
+ else
482
+ record = records
483
+ for col in @index_columns
338
484
  add_to_index( col, record.send(col), record )
339
485
  end
340
486
  end
341
487
  end
342
488
 
489
+ def remove_from_index( record )
490
+ for col in @index_columns
491
+ @indexes[col].delete( record.send(col) )
492
+ end
493
+ end
494
+
495
+ # Reindexes the record lookup tables.
496
+ def reindex
497
+ @indexes = {}
498
+ add_to_indexes( @records )
499
+ end
500
+
343
501
  # shouldn't require reindex
344
502
  def rename_index_column( column, new_name )
345
503
  @index_columns[ @index_columns.index( column ) ] = new_name
@@ -381,18 +539,23 @@ module CsvMadness
381
539
  # prints a warning and a comparison of the columns to the headers.
382
540
  def set_initial_columns( columns = nil )
383
541
  if columns.nil?
384
- if @opts[:header] == false #
385
- @columns = (0...csv_column_count).map{ |i| :"col#{i}" }
542
+ if @opts[:header] == false
543
+ columns = (0...csv_column_count).map{ |i| :"col#{i}" }
386
544
  else
387
- @columns = fetch_csv_headers.map{ |name| self.class.getter_name( name ) }
545
+ columns = fetch_csv_headers.map{ |name| self.class.getter_name( name ) }
388
546
  end
389
547
  else
390
- @columns = columns
391
- unless @columns.length == csv_column_count
392
- puts "Warning <#{@spreadsheet_file}>: columns array does not match the number of columns in the spreadsheet."
548
+ unless !@csv || columns.length == csv_column_count
549
+ $stderr.puts "Warning <#{@spreadsheet_file}>: columns array does not match the number of columns in the spreadsheet."
393
550
  compare_columns_to_headers
394
551
  end
395
552
  end
553
+
554
+ for column in columns
555
+ raise "#{column} is in the list FORBIDDEN_COLUMN_NAMES" if FORBIDDEN_COLUMN_NAMES.include?(column)
556
+ end
557
+
558
+ @columns = columns
396
559
  end
397
560
 
398
561
  # Printout so the user can see which CSV columns are being matched to which
@@ -402,7 +565,7 @@ module CsvMadness
402
565
  headers = fetch_csv_headers
403
566
 
404
567
  for i in 0...([@columns, headers].map(&:length).max)
405
- puts "\t#{i}: #{@columns[i]} ==> #{headers[i]}"
568
+ $stdout.puts "\t#{i}: #{@columns[i]} ==> #{headers[i]}"
406
569
  end
407
570
  end
408
571
 
@@ -410,7 +573,7 @@ module CsvMadness
410
573
  # Create objects that respond to the recipe-named methods
411
574
  def package
412
575
  @records = []
413
- @csv.each do |row|
576
+ (@csv || []).each do |row|
414
577
  @records << @record_class.new( row )
415
578
  end
416
579
  end
@@ -0,0 +1,2 @@
1
+ "to_s"
2
+ "string!"
@@ -0,0 +1,11 @@
1
+ "id","fname","lname","party"
2
+ "1","Mary","Moore","D"
3
+ "2","Bill","Paxton","R"
4
+ "3","Charles","Darwin","I"
5
+ "4","Chuck","Norris","D"
6
+ "5","Annabelle","Lecter","R"
7
+ "6","Mortimer","Bradford","D"
8
+ "7","Wilford","Brimley","I"
9
+ "8","Cala","Wilcox","R"
10
+ "9","Horace","Wilcox","R"
11
+ "10","Jacob","Buford","D"
@@ -0,0 +1,3 @@
1
+ "id","number","integer","float","date"
2
+ ,,,,
3
+ 12,134.2,100,123.4,2013-01-13
@@ -0,0 +1,5 @@
1
+ "id","fname","lname","age","born"
2
+ "1",,"Moore",,"1986-04-08 15:06:10"
3
+ "2","Bill","Paxton","39","1974-02-22"
4
+ "3","Charles","Darwin",,"Invalid Date"
5
+ "4","Chuck","Norris","57"
@@ -1,30 +1,37 @@
1
- require 'rubygems'
2
- require 'bundler'
3
-
4
- begin
5
- Bundler.setup(:default, :development)
6
- rescue Bundler::BundlerError => e
7
- $stderr.puts e.message
8
- $stderr.puts "Run `bundle install` to install missing gems"
9
- exit e.status_code
10
- end
11
-
12
- require 'test/unit'
13
- require 'shoulda'
1
+ # require 'rubygems'
2
+ # require 'bundler'
3
+ #
4
+ # begin
5
+ # Bundler.setup(:default, :development)
6
+ # rescue Bundler::BundlerError => e
7
+ # $stderr.puts e.message
8
+ # $stderr.puts "Run `bundle install` to install missing gems"
9
+ # exit e.status_code
10
+ # end
11
+ #
12
+ # require 'test/unit'
13
+ # require 'shoulda'
14
14
 
15
15
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
16
16
  $LOAD_PATH.unshift(File.dirname(__FILE__))
17
17
 
18
+ require 'fun_with_testing'
18
19
  require 'csv_madness'
19
20
 
20
- class Test::Unit::TestCase
21
- end
21
+ # class Test::Unit::TestCase
22
+ # end
22
23
 
23
- class MadTestCase < Test::Unit::TestCase
24
+ class MadTestCase < FunWith::Testing::TestCase # Test::Unit::TestCase
25
+ include FunWith::Testing::Assertions::Basics
26
+
24
27
  MARY_ID = "1"
25
28
  BILL_ID = "2"
26
29
  DARWIN_ID = "3"
27
30
  CHUCK_ID = "4"
31
+
32
+ def setup
33
+ set_spreadsheet_paths
34
+ end
28
35
 
29
36
  def load_mary
30
37
  id = @simple.index_columns.first
@@ -78,9 +85,9 @@ class MadTestCase < Test::Unit::TestCase
78
85
  end
79
86
 
80
87
  def set_spreadsheet_paths
81
- @csv_search_path = Pathname.new( __FILE__ ).dirname.join("csv")
82
- @csv_output_path = @csv_search_path.join("out")
83
- CsvMadness::Sheet.add_search_path( @csv_search_path )
88
+ @csv_load_path = CsvMadness.root( "test", "csv" )
89
+ @csv_output_path = CsvMadness.root( "test", "csv", "out" )
90
+ CsvMadness::Sheet.add_search_path( @csv_load_path )
84
91
  CsvMadness::Sheet.add_search_path( @csv_output_path )
85
92
  end
86
93
 
@@ -0,0 +1,33 @@
1
+ require 'helper'
2
+
3
+ class TestBuilder < MadTestCase
4
+ context "testing simple cases" do
5
+ should "spreadsheetize integers" do
6
+ integers = [65, 66, 67, 68, 69, 70]
7
+ sb = CsvMadness::Builder.new do |s|
8
+ s.column( :even, "even?" )
9
+ s.column( :odd, "odd?" )
10
+ s.column( :hashh, "hash" )
11
+ s.column( :hashhash, "hash.hash" )
12
+ s.column( :chr )
13
+ s.column( :not_a_valid_method )
14
+ end
15
+ #
16
+ ss = sb.build( integers )
17
+
18
+ for record in ss.records
19
+ assert_kind_of( CsvMadness::Record, ss.records.first )
20
+ for col in [:even, :odd, :hashh, :hashhash, :chr]
21
+ assert_respond_to record, col
22
+ end
23
+ end
24
+
25
+ assert_matches ss.records.first.not_a_valid_method, /^ERROR: undefined method `not_a_valid_method'/
26
+
27
+ ss = sb.build( integers, :on_error => :ignore )
28
+
29
+ assert_equal "", ss.records.first.not_a_valid_method
30
+
31
+ end
32
+ end
33
+ end