csv_madness 0.0.4 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/CHANGELOG.markdown +21 -3
- data/Gemfile +10 -7
- data/README.rdoc +96 -63
- data/Rakefile +7 -15
- data/VERSION +1 -1
- data/lib/csv_madness.rb +4 -13
- data/lib/csv_madness/builder.rb +97 -0
- data/lib/csv_madness/gem_api.rb +12 -0
- data/lib/csv_madness/record.rb +38 -1
- data/lib/csv_madness/sheet.rb +196 -33
- data/test/csv/forbidden_column.csv +2 -0
- data/test/csv/splitter.csv +11 -0
- data/test/csv/test_column_types.csv +3 -0
- data/test/csv/with_nils.csv +5 -0
- data/test/helper.rb +26 -19
- data/test/test_builder.rb +33 -0
- data/test/test_csv_madness.rb +2 -3
- data/test/test_merging_columns.rb +40 -0
- data/test/test_reloading_spreadsheet.rb +30 -0
- data/test/test_sheet.rb +102 -3
- metadata +26 -85
data/lib/csv_madness/sheet.rb
CHANGED
@@ -2,31 +2,61 @@ module CsvMadness
|
|
2
2
|
class Sheet
|
3
3
|
COLUMN_TYPES = {
|
4
4
|
number: Proc.new do |cell, record|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
rval = cell
|
6
|
+
|
7
|
+
unless cell.nil? || (cell.is_a?(String) && cell.length == 0)
|
8
|
+
|
9
|
+
begin
|
10
|
+
rval = Integer(cell)
|
11
|
+
rescue
|
12
|
+
# do nothing
|
13
|
+
end
|
14
|
+
|
15
|
+
unless rval.is_a?(Integer)
|
16
|
+
begin
|
17
|
+
rval = Float(cell)
|
18
|
+
rescue
|
19
|
+
# do nothing
|
20
|
+
end
|
21
|
+
end
|
9
22
|
end
|
23
|
+
|
24
|
+
rval
|
10
25
|
end,
|
11
26
|
|
12
27
|
integer: Proc.new do |cell, record|
|
13
|
-
|
28
|
+
begin
|
29
|
+
Integer(cell)
|
30
|
+
rescue
|
31
|
+
cell
|
32
|
+
end
|
14
33
|
end,
|
15
34
|
|
16
35
|
float: Proc.new do |cell, record|
|
17
|
-
|
36
|
+
begin
|
37
|
+
Float(cell)
|
38
|
+
rescue
|
39
|
+
cell
|
40
|
+
end
|
18
41
|
end,
|
19
42
|
|
20
43
|
date: Proc.new do |cell, record|
|
21
44
|
begin
|
22
|
-
parse = Time.parse( cell )
|
45
|
+
parse = Time.parse( cell || "" )
|
23
46
|
rescue ArgumentError
|
24
|
-
|
47
|
+
if cell =~ /^Invalid Time Format: /
|
48
|
+
parse = cell
|
49
|
+
else
|
50
|
+
parse = "Invalid Time Format: <#{cell}>"
|
51
|
+
end
|
25
52
|
end
|
53
|
+
|
26
54
|
parse
|
27
55
|
end
|
28
56
|
}
|
29
57
|
|
58
|
+
FORBIDDEN_COLUMN_NAMES = [:to_s] # breaks things hard when you use them. Probably not comprehensive, sadly.
|
59
|
+
|
30
60
|
# Used to make getter/setter names out of the original header strings.
|
31
61
|
# " hello;: world! " => :hello_world
|
32
62
|
def self.getter_name( name )
|
@@ -108,7 +138,7 @@ module CsvMadness
|
|
108
138
|
end
|
109
139
|
end
|
110
140
|
|
111
|
-
attr_reader :columns, :index_columns, :records, :spreadsheet_file, :record_class
|
141
|
+
attr_reader :columns, :index_columns, :records, :spreadsheet_file, :record_class, :opts
|
112
142
|
# opts:
|
113
143
|
# index: ( [:id, :id2 ] )
|
114
144
|
# columns you want mapped for quick
|
@@ -123,21 +153,88 @@ module CsvMadness
|
|
123
153
|
#
|
124
154
|
# header: false
|
125
155
|
# anything else, we assume the csv file has a header row
|
126
|
-
def initialize(
|
127
|
-
if
|
128
|
-
@
|
129
|
-
|
156
|
+
def initialize( *args )
|
157
|
+
if args.last.is_a?(Hash)
|
158
|
+
@opts = args.pop
|
130
159
|
else
|
131
|
-
@
|
160
|
+
@opts = {}
|
132
161
|
end
|
133
|
-
|
162
|
+
|
163
|
+
firstarg = args.shift
|
164
|
+
|
165
|
+
case firstarg
|
166
|
+
when NilClass
|
167
|
+
@spreadsheet_file = nil
|
168
|
+
@opts[:columns] ||= []
|
169
|
+
when String, FunWith::Files::FilePath, Pathname
|
170
|
+
@spreadsheet_file = self.class.find_spreadsheet_in_filesystem( firstarg )
|
171
|
+
when Array
|
172
|
+
@spreadsheet_file = nil
|
173
|
+
@opts[:columns] ||= firstarg
|
174
|
+
end
|
175
|
+
|
134
176
|
@opts[:header] = (@opts[:header] == false ? false : true) # true unless already explicitly set to false
|
135
177
|
|
136
178
|
reload_spreadsheet
|
137
179
|
end
|
138
180
|
|
181
|
+
def <<( record )
|
182
|
+
self.add_record( record )
|
183
|
+
end
|
184
|
+
|
185
|
+
def add_record( record )
|
186
|
+
case record
|
187
|
+
when Array
|
188
|
+
# CSV::Row.new( column names, column_entries ) (in same order as columns, natch)
|
189
|
+
record = CSV::Row.new( self.columns, record )
|
190
|
+
when Hash
|
191
|
+
header = []
|
192
|
+
fields = []
|
193
|
+
|
194
|
+
for col in self.columns
|
195
|
+
header << col
|
196
|
+
fields << record[col]
|
197
|
+
end
|
198
|
+
|
199
|
+
record = CSV::Row.new( header, fields )
|
200
|
+
when CSV::Row
|
201
|
+
# do nothing
|
202
|
+
else
|
203
|
+
raise "sheet.add_record() doesn't take objects of type #{record.inspect}" unless record.respond_to?(:csv_data)
|
204
|
+
record = record.csv_data
|
205
|
+
end
|
206
|
+
|
207
|
+
record = @record_class.new( record )
|
208
|
+
@records << record
|
209
|
+
add_to_indexes( record )
|
210
|
+
end
|
211
|
+
|
212
|
+
# record can be the row number (integer from 0...@records.length)
|
213
|
+
# record can be the record itself (anonymous class)
|
214
|
+
def remove_record( record )
|
215
|
+
record = @records[record] if record.is_a?(Integer)
|
216
|
+
return if record.nil?
|
217
|
+
|
218
|
+
self.remove_from_index( record )
|
219
|
+
@records.delete( record )
|
220
|
+
end
|
221
|
+
|
222
|
+
# Here's the deal: you hand us a block, and we'll remove the records for which
|
223
|
+
# it yields _true_.
|
224
|
+
def remove_records( records = nil, &block )
|
225
|
+
if block_given?
|
226
|
+
for record in @records
|
227
|
+
remove_record( record ) if yield( record ) == true
|
228
|
+
end
|
229
|
+
else # records should be an array
|
230
|
+
for record in records
|
231
|
+
self.remove_record( record )
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
139
236
|
def reload_spreadsheet( opts = @opts )
|
140
|
-
load_csv
|
237
|
+
load_csv if @spreadsheet_file
|
141
238
|
set_initial_columns( opts[:columns] )
|
142
239
|
create_record_class
|
143
240
|
package
|
@@ -191,7 +288,47 @@ module CsvMadness
|
|
191
288
|
reindex
|
192
289
|
@records
|
193
290
|
end
|
194
|
-
|
291
|
+
|
292
|
+
|
293
|
+
# give a copy of the current spreadsheet, but with no records
|
294
|
+
def blanked()
|
295
|
+
sheet = self.class.new
|
296
|
+
sheet.columns = @columns.clone
|
297
|
+
sheet.index_columns = @index_columns.clone
|
298
|
+
sheet.records = []
|
299
|
+
sheet.spreadsheet_file = nil
|
300
|
+
sheet.create_data_accessor_module
|
301
|
+
sheet.create_record_class
|
302
|
+
sheet.opts = @opts.clone
|
303
|
+
sheet.reindex
|
304
|
+
|
305
|
+
sheet
|
306
|
+
end
|
307
|
+
|
308
|
+
# give a block, and get back a hash.
|
309
|
+
# The hash keys are the results of the block.
|
310
|
+
# The hash values are copies of the spreadsheets, with only the records
|
311
|
+
# which caused the block to return the key.
|
312
|
+
def split( &block )
|
313
|
+
sheets = Hash.new
|
314
|
+
|
315
|
+
for record in @records
|
316
|
+
result_key = yield record
|
317
|
+
( sheets[result_key] ||= self.blanked() ) << record
|
318
|
+
end
|
319
|
+
|
320
|
+
sheets
|
321
|
+
# sheet_args = self.blanked
|
322
|
+
# for key, record_set in records
|
323
|
+
# sheet = self.clone
|
324
|
+
# sheet.records =
|
325
|
+
#
|
326
|
+
# records[key] = sheet
|
327
|
+
# end
|
328
|
+
#
|
329
|
+
# records
|
330
|
+
end
|
331
|
+
|
195
332
|
def column col
|
196
333
|
@records.map(&col)
|
197
334
|
end
|
@@ -231,6 +368,7 @@ module CsvMadness
|
|
231
368
|
# If no block given, adds an empty column
|
232
369
|
def add_column( column, &block )
|
233
370
|
raise "Column already exists: #{column}" if @columns.include?( column )
|
371
|
+
raise "#{column} is in the list FORBIDDEN_COLUMN_NAMES" if FORBIDDEN_COLUMN_NAMES.include?(column)
|
234
372
|
@columns << column
|
235
373
|
|
236
374
|
# add empty column to each row
|
@@ -316,8 +454,15 @@ module CsvMadness
|
|
316
454
|
end
|
317
455
|
end
|
318
456
|
|
457
|
+
def length
|
458
|
+
self.records.length
|
459
|
+
end
|
460
|
+
|
319
461
|
protected
|
462
|
+
attr_writer :columns, :index_columns, :records, :spreadsheet_file, :record_class, :opts
|
463
|
+
|
320
464
|
def load_csv
|
465
|
+
|
321
466
|
# encoding seems to solve a specific problem with a specific spreadsheet, at an unknown cost.
|
322
467
|
@csv = CSV.new( File.read(@spreadsheet_file).force_encoding("ISO-8859-1").encode("UTF-8"),
|
323
468
|
{ write_headers: true,
|
@@ -325,21 +470,34 @@ module CsvMadness
|
|
325
470
|
end
|
326
471
|
|
327
472
|
def add_to_index( col, key, record )
|
328
|
-
@indexes[col][key] = record
|
473
|
+
(@indexes[col] ||= {})[key] = record
|
329
474
|
end
|
330
475
|
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
476
|
+
def add_to_indexes( records )
|
477
|
+
if records.is_a?( Array )
|
478
|
+
for record in records
|
479
|
+
add_to_indexes( record )
|
480
|
+
end
|
481
|
+
else
|
482
|
+
record = records
|
483
|
+
for col in @index_columns
|
338
484
|
add_to_index( col, record.send(col), record )
|
339
485
|
end
|
340
486
|
end
|
341
487
|
end
|
342
488
|
|
489
|
+
def remove_from_index( record )
|
490
|
+
for col in @index_columns
|
491
|
+
@indexes[col].delete( record.send(col) )
|
492
|
+
end
|
493
|
+
end
|
494
|
+
|
495
|
+
# Reindexes the record lookup tables.
|
496
|
+
def reindex
|
497
|
+
@indexes = {}
|
498
|
+
add_to_indexes( @records )
|
499
|
+
end
|
500
|
+
|
343
501
|
# shouldn't require reindex
|
344
502
|
def rename_index_column( column, new_name )
|
345
503
|
@index_columns[ @index_columns.index( column ) ] = new_name
|
@@ -381,18 +539,23 @@ module CsvMadness
|
|
381
539
|
# prints a warning and a comparison of the columns to the headers.
|
382
540
|
def set_initial_columns( columns = nil )
|
383
541
|
if columns.nil?
|
384
|
-
if @opts[:header] == false
|
385
|
-
|
542
|
+
if @opts[:header] == false
|
543
|
+
columns = (0...csv_column_count).map{ |i| :"col#{i}" }
|
386
544
|
else
|
387
|
-
|
545
|
+
columns = fetch_csv_headers.map{ |name| self.class.getter_name( name ) }
|
388
546
|
end
|
389
547
|
else
|
390
|
-
|
391
|
-
|
392
|
-
puts "Warning <#{@spreadsheet_file}>: columns array does not match the number of columns in the spreadsheet."
|
548
|
+
unless !@csv || columns.length == csv_column_count
|
549
|
+
$stderr.puts "Warning <#{@spreadsheet_file}>: columns array does not match the number of columns in the spreadsheet."
|
393
550
|
compare_columns_to_headers
|
394
551
|
end
|
395
552
|
end
|
553
|
+
|
554
|
+
for column in columns
|
555
|
+
raise "#{column} is in the list FORBIDDEN_COLUMN_NAMES" if FORBIDDEN_COLUMN_NAMES.include?(column)
|
556
|
+
end
|
557
|
+
|
558
|
+
@columns = columns
|
396
559
|
end
|
397
560
|
|
398
561
|
# Printout so the user can see which CSV columns are being matched to which
|
@@ -402,7 +565,7 @@ module CsvMadness
|
|
402
565
|
headers = fetch_csv_headers
|
403
566
|
|
404
567
|
for i in 0...([@columns, headers].map(&:length).max)
|
405
|
-
puts "\t#{i}: #{@columns[i]} ==> #{headers[i]}"
|
568
|
+
$stdout.puts "\t#{i}: #{@columns[i]} ==> #{headers[i]}"
|
406
569
|
end
|
407
570
|
end
|
408
571
|
|
@@ -410,7 +573,7 @@ module CsvMadness
|
|
410
573
|
# Create objects that respond to the recipe-named methods
|
411
574
|
def package
|
412
575
|
@records = []
|
413
|
-
@csv.each do |row|
|
576
|
+
(@csv || []).each do |row|
|
414
577
|
@records << @record_class.new( row )
|
415
578
|
end
|
416
579
|
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
"id","fname","lname","party"
|
2
|
+
"1","Mary","Moore","D"
|
3
|
+
"2","Bill","Paxton","R"
|
4
|
+
"3","Charles","Darwin","I"
|
5
|
+
"4","Chuck","Norris","D"
|
6
|
+
"5","Annabelle","Lecter","R"
|
7
|
+
"6","Mortimer","Bradford","D"
|
8
|
+
"7","Wilford","Brimley","I"
|
9
|
+
"8","Cala","Wilcox","R"
|
10
|
+
"9","Horace","Wilcox","R"
|
11
|
+
"10","Jacob","Buford","D"
|
data/test/helper.rb
CHANGED
@@ -1,30 +1,37 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'bundler'
|
3
|
-
|
4
|
-
begin
|
5
|
-
|
6
|
-
rescue Bundler::BundlerError => e
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
end
|
11
|
-
|
12
|
-
require 'test/unit'
|
13
|
-
require 'shoulda'
|
1
|
+
# require 'rubygems'
|
2
|
+
# require 'bundler'
|
3
|
+
#
|
4
|
+
# begin
|
5
|
+
# Bundler.setup(:default, :development)
|
6
|
+
# rescue Bundler::BundlerError => e
|
7
|
+
# $stderr.puts e.message
|
8
|
+
# $stderr.puts "Run `bundle install` to install missing gems"
|
9
|
+
# exit e.status_code
|
10
|
+
# end
|
11
|
+
#
|
12
|
+
# require 'test/unit'
|
13
|
+
# require 'shoulda'
|
14
14
|
|
15
15
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
16
16
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
17
17
|
|
18
|
+
require 'fun_with_testing'
|
18
19
|
require 'csv_madness'
|
19
20
|
|
20
|
-
class Test::Unit::TestCase
|
21
|
-
end
|
21
|
+
# class Test::Unit::TestCase
|
22
|
+
# end
|
22
23
|
|
23
|
-
class MadTestCase < Test::Unit::TestCase
|
24
|
+
class MadTestCase < FunWith::Testing::TestCase # Test::Unit::TestCase
|
25
|
+
include FunWith::Testing::Assertions::Basics
|
26
|
+
|
24
27
|
MARY_ID = "1"
|
25
28
|
BILL_ID = "2"
|
26
29
|
DARWIN_ID = "3"
|
27
30
|
CHUCK_ID = "4"
|
31
|
+
|
32
|
+
def setup
|
33
|
+
set_spreadsheet_paths
|
34
|
+
end
|
28
35
|
|
29
36
|
def load_mary
|
30
37
|
id = @simple.index_columns.first
|
@@ -78,9 +85,9 @@ class MadTestCase < Test::Unit::TestCase
|
|
78
85
|
end
|
79
86
|
|
80
87
|
def set_spreadsheet_paths
|
81
|
-
@
|
82
|
-
@csv_output_path =
|
83
|
-
CsvMadness::Sheet.add_search_path( @
|
88
|
+
@csv_load_path = CsvMadness.root( "test", "csv" )
|
89
|
+
@csv_output_path = CsvMadness.root( "test", "csv", "out" )
|
90
|
+
CsvMadness::Sheet.add_search_path( @csv_load_path )
|
84
91
|
CsvMadness::Sheet.add_search_path( @csv_output_path )
|
85
92
|
end
|
86
93
|
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestBuilder < MadTestCase
|
4
|
+
context "testing simple cases" do
|
5
|
+
should "spreadsheetize integers" do
|
6
|
+
integers = [65, 66, 67, 68, 69, 70]
|
7
|
+
sb = CsvMadness::Builder.new do |s|
|
8
|
+
s.column( :even, "even?" )
|
9
|
+
s.column( :odd, "odd?" )
|
10
|
+
s.column( :hashh, "hash" )
|
11
|
+
s.column( :hashhash, "hash.hash" )
|
12
|
+
s.column( :chr )
|
13
|
+
s.column( :not_a_valid_method )
|
14
|
+
end
|
15
|
+
#
|
16
|
+
ss = sb.build( integers )
|
17
|
+
|
18
|
+
for record in ss.records
|
19
|
+
assert_kind_of( CsvMadness::Record, ss.records.first )
|
20
|
+
for col in [:even, :odd, :hashh, :hashhash, :chr]
|
21
|
+
assert_respond_to record, col
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
assert_matches ss.records.first.not_a_valid_method, /^ERROR: undefined method `not_a_valid_method'/
|
26
|
+
|
27
|
+
ss = sb.build( integers, :on_error => :ignore )
|
28
|
+
|
29
|
+
assert_equal "", ss.records.first.not_a_valid_method
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|