csv_madness 0.0.4 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/CHANGELOG.markdown +21 -3
- data/Gemfile +10 -7
- data/README.rdoc +96 -63
- data/Rakefile +7 -15
- data/VERSION +1 -1
- data/lib/csv_madness.rb +4 -13
- data/lib/csv_madness/builder.rb +97 -0
- data/lib/csv_madness/gem_api.rb +12 -0
- data/lib/csv_madness/record.rb +38 -1
- data/lib/csv_madness/sheet.rb +196 -33
- data/test/csv/forbidden_column.csv +2 -0
- data/test/csv/splitter.csv +11 -0
- data/test/csv/test_column_types.csv +3 -0
- data/test/csv/with_nils.csv +5 -0
- data/test/helper.rb +26 -19
- data/test/test_builder.rb +33 -0
- data/test/test_csv_madness.rb +2 -3
- data/test/test_merging_columns.rb +40 -0
- data/test/test_reloading_spreadsheet.rb +30 -0
- data/test/test_sheet.rb +102 -3
- metadata +26 -85
data/lib/csv_madness/sheet.rb
CHANGED
@@ -2,31 +2,61 @@ module CsvMadness
|
|
2
2
|
class Sheet
|
3
3
|
COLUMN_TYPES = {
|
4
4
|
number: Proc.new do |cell, record|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
rval = cell
|
6
|
+
|
7
|
+
unless cell.nil? || (cell.is_a?(String) && cell.length == 0)
|
8
|
+
|
9
|
+
begin
|
10
|
+
rval = Integer(cell)
|
11
|
+
rescue
|
12
|
+
# do nothing
|
13
|
+
end
|
14
|
+
|
15
|
+
unless rval.is_a?(Integer)
|
16
|
+
begin
|
17
|
+
rval = Float(cell)
|
18
|
+
rescue
|
19
|
+
# do nothing
|
20
|
+
end
|
21
|
+
end
|
9
22
|
end
|
23
|
+
|
24
|
+
rval
|
10
25
|
end,
|
11
26
|
|
12
27
|
integer: Proc.new do |cell, record|
|
13
|
-
|
28
|
+
begin
|
29
|
+
Integer(cell)
|
30
|
+
rescue
|
31
|
+
cell
|
32
|
+
end
|
14
33
|
end,
|
15
34
|
|
16
35
|
float: Proc.new do |cell, record|
|
17
|
-
|
36
|
+
begin
|
37
|
+
Float(cell)
|
38
|
+
rescue
|
39
|
+
cell
|
40
|
+
end
|
18
41
|
end,
|
19
42
|
|
20
43
|
date: Proc.new do |cell, record|
|
21
44
|
begin
|
22
|
-
parse = Time.parse( cell )
|
45
|
+
parse = Time.parse( cell || "" )
|
23
46
|
rescue ArgumentError
|
24
|
-
|
47
|
+
if cell =~ /^Invalid Time Format: /
|
48
|
+
parse = cell
|
49
|
+
else
|
50
|
+
parse = "Invalid Time Format: <#{cell}>"
|
51
|
+
end
|
25
52
|
end
|
53
|
+
|
26
54
|
parse
|
27
55
|
end
|
28
56
|
}
|
29
57
|
|
58
|
+
FORBIDDEN_COLUMN_NAMES = [:to_s] # breaks things hard when you use them. Probably not comprehensive, sadly.
|
59
|
+
|
30
60
|
# Used to make getter/setter names out of the original header strings.
|
31
61
|
# " hello;: world! " => :hello_world
|
32
62
|
def self.getter_name( name )
|
@@ -108,7 +138,7 @@ module CsvMadness
|
|
108
138
|
end
|
109
139
|
end
|
110
140
|
|
111
|
-
attr_reader :columns, :index_columns, :records, :spreadsheet_file, :record_class
|
141
|
+
attr_reader :columns, :index_columns, :records, :spreadsheet_file, :record_class, :opts
|
112
142
|
# opts:
|
113
143
|
# index: ( [:id, :id2 ] )
|
114
144
|
# columns you want mapped for quick
|
@@ -123,21 +153,88 @@ module CsvMadness
|
|
123
153
|
#
|
124
154
|
# header: false
|
125
155
|
# anything else, we assume the csv file has a header row
|
126
|
-
def initialize(
|
127
|
-
if
|
128
|
-
@
|
129
|
-
|
156
|
+
def initialize( *args )
|
157
|
+
if args.last.is_a?(Hash)
|
158
|
+
@opts = args.pop
|
130
159
|
else
|
131
|
-
@
|
160
|
+
@opts = {}
|
132
161
|
end
|
133
|
-
|
162
|
+
|
163
|
+
firstarg = args.shift
|
164
|
+
|
165
|
+
case firstarg
|
166
|
+
when NilClass
|
167
|
+
@spreadsheet_file = nil
|
168
|
+
@opts[:columns] ||= []
|
169
|
+
when String, FunWith::Files::FilePath, Pathname
|
170
|
+
@spreadsheet_file = self.class.find_spreadsheet_in_filesystem( firstarg )
|
171
|
+
when Array
|
172
|
+
@spreadsheet_file = nil
|
173
|
+
@opts[:columns] ||= firstarg
|
174
|
+
end
|
175
|
+
|
134
176
|
@opts[:header] = (@opts[:header] == false ? false : true) # true unless already explicitly set to false
|
135
177
|
|
136
178
|
reload_spreadsheet
|
137
179
|
end
|
138
180
|
|
181
|
+
def <<( record )
|
182
|
+
self.add_record( record )
|
183
|
+
end
|
184
|
+
|
185
|
+
def add_record( record )
|
186
|
+
case record
|
187
|
+
when Array
|
188
|
+
# CSV::Row.new( column names, column_entries ) (in same order as columns, natch)
|
189
|
+
record = CSV::Row.new( self.columns, record )
|
190
|
+
when Hash
|
191
|
+
header = []
|
192
|
+
fields = []
|
193
|
+
|
194
|
+
for col in self.columns
|
195
|
+
header << col
|
196
|
+
fields << record[col]
|
197
|
+
end
|
198
|
+
|
199
|
+
record = CSV::Row.new( header, fields )
|
200
|
+
when CSV::Row
|
201
|
+
# do nothing
|
202
|
+
else
|
203
|
+
raise "sheet.add_record() doesn't take objects of type #{record.inspect}" unless record.respond_to?(:csv_data)
|
204
|
+
record = record.csv_data
|
205
|
+
end
|
206
|
+
|
207
|
+
record = @record_class.new( record )
|
208
|
+
@records << record
|
209
|
+
add_to_indexes( record )
|
210
|
+
end
|
211
|
+
|
212
|
+
# record can be the row number (integer from 0...@records.length)
|
213
|
+
# record can be the record itself (anonymous class)
|
214
|
+
def remove_record( record )
|
215
|
+
record = @records[record] if record.is_a?(Integer)
|
216
|
+
return if record.nil?
|
217
|
+
|
218
|
+
self.remove_from_index( record )
|
219
|
+
@records.delete( record )
|
220
|
+
end
|
221
|
+
|
222
|
+
# Here's the deal: you hand us a block, and we'll remove the records for which
|
223
|
+
# it yields _true_.
|
224
|
+
def remove_records( records = nil, &block )
|
225
|
+
if block_given?
|
226
|
+
for record in @records
|
227
|
+
remove_record( record ) if yield( record ) == true
|
228
|
+
end
|
229
|
+
else # records should be an array
|
230
|
+
for record in records
|
231
|
+
self.remove_record( record )
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
139
236
|
def reload_spreadsheet( opts = @opts )
|
140
|
-
load_csv
|
237
|
+
load_csv if @spreadsheet_file
|
141
238
|
set_initial_columns( opts[:columns] )
|
142
239
|
create_record_class
|
143
240
|
package
|
@@ -191,7 +288,47 @@ module CsvMadness
|
|
191
288
|
reindex
|
192
289
|
@records
|
193
290
|
end
|
194
|
-
|
291
|
+
|
292
|
+
|
293
|
+
# give a copy of the current spreadsheet, but with no records
|
294
|
+
def blanked()
|
295
|
+
sheet = self.class.new
|
296
|
+
sheet.columns = @columns.clone
|
297
|
+
sheet.index_columns = @index_columns.clone
|
298
|
+
sheet.records = []
|
299
|
+
sheet.spreadsheet_file = nil
|
300
|
+
sheet.create_data_accessor_module
|
301
|
+
sheet.create_record_class
|
302
|
+
sheet.opts = @opts.clone
|
303
|
+
sheet.reindex
|
304
|
+
|
305
|
+
sheet
|
306
|
+
end
|
307
|
+
|
308
|
+
# give a block, and get back a hash.
|
309
|
+
# The hash keys are the results of the block.
|
310
|
+
# The hash values are copies of the spreadsheets, with only the records
|
311
|
+
# which caused the block to return the key.
|
312
|
+
def split( &block )
|
313
|
+
sheets = Hash.new
|
314
|
+
|
315
|
+
for record in @records
|
316
|
+
result_key = yield record
|
317
|
+
( sheets[result_key] ||= self.blanked() ) << record
|
318
|
+
end
|
319
|
+
|
320
|
+
sheets
|
321
|
+
# sheet_args = self.blanked
|
322
|
+
# for key, record_set in records
|
323
|
+
# sheet = self.clone
|
324
|
+
# sheet.records =
|
325
|
+
#
|
326
|
+
# records[key] = sheet
|
327
|
+
# end
|
328
|
+
#
|
329
|
+
# records
|
330
|
+
end
|
331
|
+
|
195
332
|
def column col
|
196
333
|
@records.map(&col)
|
197
334
|
end
|
@@ -231,6 +368,7 @@ module CsvMadness
|
|
231
368
|
# If no block given, adds an empty column
|
232
369
|
def add_column( column, &block )
|
233
370
|
raise "Column already exists: #{column}" if @columns.include?( column )
|
371
|
+
raise "#{column} is in the list FORBIDDEN_COLUMN_NAMES" if FORBIDDEN_COLUMN_NAMES.include?(column)
|
234
372
|
@columns << column
|
235
373
|
|
236
374
|
# add empty column to each row
|
@@ -316,8 +454,15 @@ module CsvMadness
|
|
316
454
|
end
|
317
455
|
end
|
318
456
|
|
457
|
+
def length
|
458
|
+
self.records.length
|
459
|
+
end
|
460
|
+
|
319
461
|
protected
|
462
|
+
attr_writer :columns, :index_columns, :records, :spreadsheet_file, :record_class, :opts
|
463
|
+
|
320
464
|
def load_csv
|
465
|
+
|
321
466
|
# encoding seems to solve a specific problem with a specific spreadsheet, at an unknown cost.
|
322
467
|
@csv = CSV.new( File.read(@spreadsheet_file).force_encoding("ISO-8859-1").encode("UTF-8"),
|
323
468
|
{ write_headers: true,
|
@@ -325,21 +470,34 @@ module CsvMadness
|
|
325
470
|
end
|
326
471
|
|
327
472
|
def add_to_index( col, key, record )
|
328
|
-
@indexes[col][key] = record
|
473
|
+
(@indexes[col] ||= {})[key] = record
|
329
474
|
end
|
330
475
|
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
476
|
+
def add_to_indexes( records )
|
477
|
+
if records.is_a?( Array )
|
478
|
+
for record in records
|
479
|
+
add_to_indexes( record )
|
480
|
+
end
|
481
|
+
else
|
482
|
+
record = records
|
483
|
+
for col in @index_columns
|
338
484
|
add_to_index( col, record.send(col), record )
|
339
485
|
end
|
340
486
|
end
|
341
487
|
end
|
342
488
|
|
489
|
+
def remove_from_index( record )
|
490
|
+
for col in @index_columns
|
491
|
+
@indexes[col].delete( record.send(col) )
|
492
|
+
end
|
493
|
+
end
|
494
|
+
|
495
|
+
# Reindexes the record lookup tables.
|
496
|
+
def reindex
|
497
|
+
@indexes = {}
|
498
|
+
add_to_indexes( @records )
|
499
|
+
end
|
500
|
+
|
343
501
|
# shouldn't require reindex
|
344
502
|
def rename_index_column( column, new_name )
|
345
503
|
@index_columns[ @index_columns.index( column ) ] = new_name
|
@@ -381,18 +539,23 @@ module CsvMadness
|
|
381
539
|
# prints a warning and a comparison of the columns to the headers.
|
382
540
|
def set_initial_columns( columns = nil )
|
383
541
|
if columns.nil?
|
384
|
-
if @opts[:header] == false
|
385
|
-
|
542
|
+
if @opts[:header] == false
|
543
|
+
columns = (0...csv_column_count).map{ |i| :"col#{i}" }
|
386
544
|
else
|
387
|
-
|
545
|
+
columns = fetch_csv_headers.map{ |name| self.class.getter_name( name ) }
|
388
546
|
end
|
389
547
|
else
|
390
|
-
|
391
|
-
|
392
|
-
puts "Warning <#{@spreadsheet_file}>: columns array does not match the number of columns in the spreadsheet."
|
548
|
+
unless !@csv || columns.length == csv_column_count
|
549
|
+
$stderr.puts "Warning <#{@spreadsheet_file}>: columns array does not match the number of columns in the spreadsheet."
|
393
550
|
compare_columns_to_headers
|
394
551
|
end
|
395
552
|
end
|
553
|
+
|
554
|
+
for column in columns
|
555
|
+
raise "#{column} is in the list FORBIDDEN_COLUMN_NAMES" if FORBIDDEN_COLUMN_NAMES.include?(column)
|
556
|
+
end
|
557
|
+
|
558
|
+
@columns = columns
|
396
559
|
end
|
397
560
|
|
398
561
|
# Printout so the user can see which CSV columns are being matched to which
|
@@ -402,7 +565,7 @@ module CsvMadness
|
|
402
565
|
headers = fetch_csv_headers
|
403
566
|
|
404
567
|
for i in 0...([@columns, headers].map(&:length).max)
|
405
|
-
puts "\t#{i}: #{@columns[i]} ==> #{headers[i]}"
|
568
|
+
$stdout.puts "\t#{i}: #{@columns[i]} ==> #{headers[i]}"
|
406
569
|
end
|
407
570
|
end
|
408
571
|
|
@@ -410,7 +573,7 @@ module CsvMadness
|
|
410
573
|
# Create objects that respond to the recipe-named methods
|
411
574
|
def package
|
412
575
|
@records = []
|
413
|
-
@csv.each do |row|
|
576
|
+
(@csv || []).each do |row|
|
414
577
|
@records << @record_class.new( row )
|
415
578
|
end
|
416
579
|
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
"id","fname","lname","party"
|
2
|
+
"1","Mary","Moore","D"
|
3
|
+
"2","Bill","Paxton","R"
|
4
|
+
"3","Charles","Darwin","I"
|
5
|
+
"4","Chuck","Norris","D"
|
6
|
+
"5","Annabelle","Lecter","R"
|
7
|
+
"6","Mortimer","Bradford","D"
|
8
|
+
"7","Wilford","Brimley","I"
|
9
|
+
"8","Cala","Wilcox","R"
|
10
|
+
"9","Horace","Wilcox","R"
|
11
|
+
"10","Jacob","Buford","D"
|
data/test/helper.rb
CHANGED
@@ -1,30 +1,37 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'bundler'
|
3
|
-
|
4
|
-
begin
|
5
|
-
|
6
|
-
rescue Bundler::BundlerError => e
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
end
|
11
|
-
|
12
|
-
require 'test/unit'
|
13
|
-
require 'shoulda'
|
1
|
+
# require 'rubygems'
|
2
|
+
# require 'bundler'
|
3
|
+
#
|
4
|
+
# begin
|
5
|
+
# Bundler.setup(:default, :development)
|
6
|
+
# rescue Bundler::BundlerError => e
|
7
|
+
# $stderr.puts e.message
|
8
|
+
# $stderr.puts "Run `bundle install` to install missing gems"
|
9
|
+
# exit e.status_code
|
10
|
+
# end
|
11
|
+
#
|
12
|
+
# require 'test/unit'
|
13
|
+
# require 'shoulda'
|
14
14
|
|
15
15
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
16
16
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
17
17
|
|
18
|
+
require 'fun_with_testing'
|
18
19
|
require 'csv_madness'
|
19
20
|
|
20
|
-
class Test::Unit::TestCase
|
21
|
-
end
|
21
|
+
# class Test::Unit::TestCase
|
22
|
+
# end
|
22
23
|
|
23
|
-
class MadTestCase < Test::Unit::TestCase
|
24
|
+
class MadTestCase < FunWith::Testing::TestCase # Test::Unit::TestCase
|
25
|
+
include FunWith::Testing::Assertions::Basics
|
26
|
+
|
24
27
|
MARY_ID = "1"
|
25
28
|
BILL_ID = "2"
|
26
29
|
DARWIN_ID = "3"
|
27
30
|
CHUCK_ID = "4"
|
31
|
+
|
32
|
+
def setup
|
33
|
+
set_spreadsheet_paths
|
34
|
+
end
|
28
35
|
|
29
36
|
def load_mary
|
30
37
|
id = @simple.index_columns.first
|
@@ -78,9 +85,9 @@ class MadTestCase < Test::Unit::TestCase
|
|
78
85
|
end
|
79
86
|
|
80
87
|
def set_spreadsheet_paths
|
81
|
-
@
|
82
|
-
@csv_output_path =
|
83
|
-
CsvMadness::Sheet.add_search_path( @
|
88
|
+
@csv_load_path = CsvMadness.root( "test", "csv" )
|
89
|
+
@csv_output_path = CsvMadness.root( "test", "csv", "out" )
|
90
|
+
CsvMadness::Sheet.add_search_path( @csv_load_path )
|
84
91
|
CsvMadness::Sheet.add_search_path( @csv_output_path )
|
85
92
|
end
|
86
93
|
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestBuilder < MadTestCase
|
4
|
+
context "testing simple cases" do
|
5
|
+
should "spreadsheetize integers" do
|
6
|
+
integers = [65, 66, 67, 68, 69, 70]
|
7
|
+
sb = CsvMadness::Builder.new do |s|
|
8
|
+
s.column( :even, "even?" )
|
9
|
+
s.column( :odd, "odd?" )
|
10
|
+
s.column( :hashh, "hash" )
|
11
|
+
s.column( :hashhash, "hash.hash" )
|
12
|
+
s.column( :chr )
|
13
|
+
s.column( :not_a_valid_method )
|
14
|
+
end
|
15
|
+
#
|
16
|
+
ss = sb.build( integers )
|
17
|
+
|
18
|
+
for record in ss.records
|
19
|
+
assert_kind_of( CsvMadness::Record, ss.records.first )
|
20
|
+
for col in [:even, :odd, :hashh, :hashhash, :chr]
|
21
|
+
assert_respond_to record, col
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
assert_matches ss.records.first.not_a_valid_method, /^ERROR: undefined method `not_a_valid_method'/
|
26
|
+
|
27
|
+
ss = sb.build( integers, :on_error => :ignore )
|
28
|
+
|
29
|
+
assert_equal "", ss.records.first.not_a_valid_method
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|