RubyGems - csv_madness - Versions diffs - 0.0.4 → 0.0.6 - Mend

csv_madness 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

checksums.yaml +15 -0
data/CHANGELOG.markdown +21 -3
data/Gemfile +10 -7
data/README.rdoc +96 -63
data/Rakefile +7 -15
data/VERSION +1 -1
data/lib/csv_madness.rb +4 -13
data/lib/csv_madness/builder.rb +97 -0
data/lib/csv_madness/gem_api.rb +12 -0
data/lib/csv_madness/record.rb +38 -1
data/lib/csv_madness/sheet.rb +196 -33
data/test/csv/forbidden_column.csv +2 -0
data/test/csv/splitter.csv +11 -0
data/test/csv/test_column_types.csv +3 -0
data/test/csv/with_nils.csv +5 -0
data/test/helper.rb +26 -19
data/test/test_builder.rb +33 -0
data/test/test_csv_madness.rb +2 -3
data/test/test_merging_columns.rb +40 -0
data/test/test_reloading_spreadsheet.rb +30 -0
data/test/test_sheet.rb +102 -3
metadata +26 -85

data/lib/csv_madness/sheet.rb CHANGED

@@ -2,31 +2,61 @@ module CsvMadness
   class Sheet
     COLUMN_TYPES = {
       number: Proc.new do |cell, record|
-        if (cell || "").strip.match(/^\d*$/)
-          cell.to_i
-        else
-          cell.to_f
+        rval = cell
+        unless cell.nil? || (cell.is_a?(String) && cell.length == 0)
+          begin
+            rval = Integer(cell)
+          rescue
+            # do nothing
+          end
+          unless rval.is_a?(Integer)
+            begin
+              rval = Float(cell)
+            rescue
+              # do nothing
+            end
+          end
         end
+        rval
       end,
       integer: Proc.new do |cell, record|
-        cell.to_i
+        begin
+          Integer(cell)
+        rescue
+          cell
+        end
       end,
       float:   Proc.new do |cell, record|
-        cell.to_f
+        begin
+          Float(cell)
+        rescue
+          cell
+        end
       end,
       date:    Proc.new do |cell, record|
         begin
-          parse = Time.parse( cell )
+          parse = Time.parse( cell || "" )
         rescue ArgumentError
-          parse = "Invalid Time Format: <#{cell}>"
+          if cell =~ /^Invalid Time Format: /
+            parse = cell
+          else
+            parse = "Invalid Time Format: <#{cell}>"
+          end
         end
         parse
       end
     }
+    FORBIDDEN_COLUMN_NAMES = [:to_s]  # breaks things hard when you use them.  Probably not comprehensive, sadly.
     # Used to make getter/setter names out of the original header strings.
     # " hello;: world! " => :hello_world
     def self.getter_name( name )
@@ -108,7 +138,7 @@ module CsvMadness
       end
     end
-    attr_reader :columns, :index_columns, :records, :spreadsheet_file, :record_class
+    attr_reader :columns, :index_columns, :records, :spreadsheet_file, :record_class, :opts
     # opts:
     #   index: ( [:id, :id2 ] )
     #       columns you want mapped for quick
@@ -123,21 +153,88 @@ module CsvMadness
     #
     #   header:   false
     #       anything else, we assume the csv file has a header row
-    def initialize( spreadsheet, opts = {} )
-      if spreadsheet.is_a?(Array)
-        @spreadsheet_file = nil
+    def initialize( *args )
+      if args.last.is_a?(Hash)
+        @opts = args.pop
       else
-        @spreadsheet_file = self.class.find_spreadsheet_in_filesystem( spreadsheet )
+        @opts = {}
       end
-      @opts = opts
+      firstarg = args.shift
+      case firstarg
+      when NilClass
+        @spreadsheet_file = nil
+        @opts[:columns] ||= []
+      when String, FunWith::Files::FilePath, Pathname
+        @spreadsheet_file = self.class.find_spreadsheet_in_filesystem( firstarg )
+      when Array
+        @spreadsheet_file = nil
+        @opts[:columns] ||= firstarg
+      end
       @opts[:header] = (@opts[:header] == false ? false : true)  # true unless already explicitly set to false
       reload_spreadsheet
     end
+    def <<( record )
+      self.add_record( record )
+    end
+    def add_record( record )
+      case record
+      when Array
+        # CSV::Row.new( column names, column_entries ) (in same order as columns, natch)
+        record = CSV::Row.new( self.columns, record )
+      when Hash
+        header = []
+        fields = []
+        for col in self.columns
+          header << col
+          fields << record[col]
+        end
+        record = CSV::Row.new( header, fields )
+      when CSV::Row
+        # do nothing
+      else
+        raise "sheet.add_record() doesn't take objects of type #{record.inspect}" unless record.respond_to?(:csv_data)
+        record = record.csv_data
+      end
+      record = @record_class.new( record )
+      @records << record
+      add_to_indexes( record )
+    end
+    # record can be the row number (integer from 0...@records.length)
+    # record can be the record itself (anonymous class)
+    def remove_record( record )
+      record = @records[record] if record.is_a?(Integer)
+      return if record.nil?
+      self.remove_from_index( record )
+      @records.delete( record )
+    end
+    # Here's the deal: you hand us a block, and we'll remove the records for which
+    # it yields _true_.
+    def remove_records( records = nil, &block )
+      if block_given?
+        for record in @records
+          remove_record( record ) if yield( record ) == true
+        end
+      else # records should be an array
+        for record in records
+          self.remove_record( record )
+        end
+      end
+    end
     def reload_spreadsheet( opts = @opts )
-      load_csv
+      load_csv if @spreadsheet_file
       set_initial_columns( opts[:columns] )
       create_record_class
       package
@@ -191,7 +288,47 @@ module CsvMadness
       reindex
       @records
     end
+    # give a copy of the current spreadsheet, but with no records
+    def blanked()
+      sheet = self.class.new
+      sheet.columns = @columns.clone
+      sheet.index_columns = @index_columns.clone
+      sheet.records = []
+      sheet.spreadsheet_file = nil
+      sheet.create_data_accessor_module
+      sheet.create_record_class
+      sheet.opts = @opts.clone
+      sheet.reindex
+      sheet
+    end
+    # give a block, and get back a hash.
+    # The hash keys are the results of the block.
+    # The hash values are copies of the spreadsheets, with only the records
+    # which caused the block to return the key.
+    def split( &block )
+      sheets = Hash.new
+      for record in @records
+        result_key = yield record
+        ( sheets[result_key] ||= self.blanked() ) << record
+      end
+      sheets
+      # sheet_args = self.blanked
+      # for key, record_set in records
+      #   sheet = self.clone
+      #   sheet.records =
+      #
+      #   records[key] = sheet
+      # end
+      #
+      # records
+    end
     def column col
       @records.map(&col)
     end
@@ -231,6 +368,7 @@ module CsvMadness
     # If no block given, adds an empty column
     def add_column( column, &block )
       raise "Column already exists: #{column}" if @columns.include?( column )
+      raise "#{column} is in the list FORBIDDEN_COLUMN_NAMES" if FORBIDDEN_COLUMN_NAMES.include?(column)
       @columns << column
       # add empty column to each row
@@ -316,8 +454,15 @@ module CsvMadness
       end
     end
+    def length
+      self.records.length
+    end
     protected
+    attr_writer :columns, :index_columns, :records, :spreadsheet_file, :record_class, :opts
     def load_csv
       # encoding seems to solve a specific problem with a specific spreadsheet, at an unknown cost.
       @csv = CSV.new( File.read(@spreadsheet_file).force_encoding("ISO-8859-1").encode("UTF-8"),
                         { write_headers: true,
@@ -325,21 +470,34 @@ module CsvMadness
     end
     def add_to_index( col, key, record )
-      @indexes[col][key] = record
+      (@indexes[col] ||= {})[key] = record
     end
-    # Reindexes the record lookup tables.
-    def reindex
-      @indexes = {}
-      for col in @index_columns
-        @indexes[col] = {}
-        for record in @records
+    def add_to_indexes( records )
+      if records.is_a?( Array )
+        for record in records
+          add_to_indexes( record )
+        end
+      else
+        record = records
+        for col in @index_columns
           add_to_index( col, record.send(col), record )
         end
       end
     end
+    def remove_from_index( record )
+      for col in @index_columns
+        @indexes[col].delete( record.send(col) )
+      end
+    end
+    # Reindexes the record lookup tables.
+    def reindex
+      @indexes = {}
+      add_to_indexes( @records )
+    end
     # shouldn't require reindex
     def rename_index_column( column, new_name )
       @index_columns[ @index_columns.index( column ) ] = new_name
@@ -381,18 +539,23 @@ module CsvMadness
     # prints a warning and a comparison of the columns to the headers.
     def set_initial_columns( columns = nil )
       if columns.nil?
-        if @opts[:header] == false    #
-          @columns = (0...csv_column_count).map{ |i| :"col#{i}" }
+        if @opts[:header] == false
+          columns = (0...csv_column_count).map{ |i| :"col#{i}" }
         else
-          @columns = fetch_csv_headers.map{ |name| self.class.getter_name( name ) }
+          columns = fetch_csv_headers.map{ |name| self.class.getter_name( name ) }
         end
       else
-        @columns = columns
-        unless @columns.length == csv_column_count
-          puts "Warning <#{@spreadsheet_file}>: columns array does not match the number of columns in the spreadsheet."
+        unless !@csv || columns.length == csv_column_count
+          $stderr.puts "Warning <#{@spreadsheet_file}>: columns array does not match the number of columns in the spreadsheet."
           compare_columns_to_headers
         end
       end
+      for column in columns
+        raise "#{column} is in the list FORBIDDEN_COLUMN_NAMES" if FORBIDDEN_COLUMN_NAMES.include?(column)
+      end
+      @columns = columns
     end
     # Printout so the user can see which CSV columns are being matched to which
@@ -402,7 +565,7 @@ module CsvMadness
       headers = fetch_csv_headers
       for i in 0...([@columns, headers].map(&:length).max)
-        puts "\t#{i}:  #{@columns[i]} ==> #{headers[i]}"
+        $stdout.puts "\t#{i}:  #{@columns[i]} ==> #{headers[i]}"
       end
     end
@@ -410,7 +573,7 @@ module CsvMadness
     # Create objects that respond to the recipe-named methods
     def package
       @records = []
-      @csv.each do |row|
+      (@csv || []).each do |row|
         @records << @record_class.new( row )
       end
     end

data/test/csv/forbidden_column.csv ADDED

	@@ -0,0 +1,2 @@
1	+ "to_s"
2	+ "string!"

data/test/csv/splitter.csv ADDED

@@ -0,0 +1,11 @@
+"id","fname","lname","party"
+"1","Mary","Moore","D"
+"2","Bill","Paxton","R"
+"3","Charles","Darwin","I"
+"4","Chuck","Norris","D"
+"5","Annabelle","Lecter","R"
+"6","Mortimer","Bradford","D"
+"7","Wilford","Brimley","I"
+"8","Cala","Wilcox","R"
+"9","Horace","Wilcox","R"
+"10","Jacob","Buford","D"

data/test/csv/test_column_types.csv ADDED

@@ -0,0 +1,3 @@
+"id","number","integer","float","date"
+,,,,
+12,134.2,100,123.4,2013-01-13

data/test/csv/with_nils.csv ADDED

@@ -0,0 +1,5 @@
+"id","fname","lname","age","born"
+"1",,"Moore",,"1986-04-08 15:06:10"
+"2","Bill","Paxton","39","1974-02-22"
+"3","Charles","Darwin",,"Invalid Date"
+"4","Chuck","Norris","57"

data/test/helper.rb CHANGED

@@ -1,30 +1,37 @@
-require 'rubygems'
-require 'bundler'
-begin
-  Bundler.setup(:default, :development)
-rescue Bundler::BundlerError => e
-  $stderr.puts e.message
-  $stderr.puts "Run `bundle install` to install missing gems"
-  exit e.status_code
-end
-require 'test/unit'
-require 'shoulda'
+# require 'rubygems'
+# require 'bundler'
+#
+# begin
+#   Bundler.setup(:default, :development)
+# rescue Bundler::BundlerError => e
+#   $stderr.puts e.message
+#   $stderr.puts "Run `bundle install` to install missing gems"
+#   exit e.status_code
+# end
+#
+# require 'test/unit'
+# require 'shoulda'
 $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
 $LOAD_PATH.unshift(File.dirname(__FILE__))
+require 'fun_with_testing'
 require 'csv_madness'
-class Test::Unit::TestCase
-end
+# class Test::Unit::TestCase
+# end
-class MadTestCase < Test::Unit::TestCase
+class MadTestCase < FunWith::Testing::TestCase # Test::Unit::TestCase
+  include FunWith::Testing::Assertions::Basics
   MARY_ID = "1"
   BILL_ID = "2"
   DARWIN_ID = "3"
   CHUCK_ID = "4"
+  def setup
+    set_spreadsheet_paths
+  end
   def load_mary
     id = @simple.index_columns.first
@@ -78,9 +85,9 @@ class MadTestCase < Test::Unit::TestCase
   end
   def set_spreadsheet_paths
-    @csv_search_path = Pathname.new( __FILE__ ).dirname.join("csv")
-    @csv_output_path = @csv_search_path.join("out")
-    CsvMadness::Sheet.add_search_path( @csv_search_path )
+    @csv_load_path = CsvMadness.root( "test", "csv" )
+    @csv_output_path = CsvMadness.root( "test", "csv", "out" )
+    CsvMadness::Sheet.add_search_path( @csv_load_path )
     CsvMadness::Sheet.add_search_path( @csv_output_path )
   end

data/test/test_builder.rb ADDED

@@ -0,0 +1,33 @@
+require 'helper'
+class TestBuilder < MadTestCase
+  context "testing simple cases" do
+    should "spreadsheetize integers" do
+      integers = [65, 66, 67, 68, 69, 70]
+      sb = CsvMadness::Builder.new do |s|
+        s.column( :even, "even?" )
+        s.column( :odd, "odd?" )
+        s.column( :hashh, "hash" )
+        s.column( :hashhash, "hash.hash" )
+        s.column( :chr )
+        s.column( :not_a_valid_method )
+      end
+      #
+      ss = sb.build( integers )
+      for record in ss.records
+        assert_kind_of( CsvMadness::Record, ss.records.first )
+        for col in [:even, :odd, :hashh, :hashhash, :chr]
+          assert_respond_to record, col
+        end
+      end
+      assert_matches ss.records.first.not_a_valid_method, /^ERROR: undefined method `not_a_valid_method'/
+      ss = sb.build( integers, :on_error => :ignore )
+      assert_equal "", ss.records.first.not_a_valid_method
+    end
+  end
+end