RubyGems - smarter_csv - Versions diffs - 1.6.0 → 1.7.2 - Mend

smarter_csv 1.6.0 → 1.7.2

Files changed (101) hide show

checksums.yaml +4 -4
data/.rubocop.yml +133 -0
data/CHANGELOG.md +28 -0
data/CONTRIBUTORS.md +3 -0
data/Gemfile +7 -4
data/README.md +30 -26
data/Rakefile +15 -13
data/ext/smarter_csv/extconf.rb +14 -0
data/ext/smarter_csv/smarter_csv.c +86 -0
data/lib/extensions/hash.rb +4 -2
data/lib/smarter_csv/version.rb +3 -1
data/lib/smarter_csv.rb +524 -10
data/smarter_csv.gemspec +22 -7
metadata +55 -177
data/.gitignore +0 -10
data/.rspec +0 -2
data/.travis.yml +0 -27
data/lib/smarter_csv/smarter_csv.rb +0 -461
data/spec/fixtures/additional_separator.csv +0 -6
data/spec/fixtures/basic.csv +0 -8
data/spec/fixtures/binary.csv +0 -1
data/spec/fixtures/carriage_returns_n.csv +0 -18
data/spec/fixtures/carriage_returns_quoted.csv +0 -3
data/spec/fixtures/carriage_returns_r.csv +0 -1
data/spec/fixtures/carriage_returns_rn.csv +0 -18
data/spec/fixtures/chunk_cornercase.csv +0 -10
data/spec/fixtures/duplicate_headers.csv +0 -3
data/spec/fixtures/empty.csv +0 -5
data/spec/fixtures/empty_columns_1.csv +0 -2
data/spec/fixtures/empty_columns_2.csv +0 -2
data/spec/fixtures/hard_sample.csv +0 -2
data/spec/fixtures/ignore_comments.csv +0 -11
data/spec/fixtures/ignore_comments2.csv +0 -3
data/spec/fixtures/key_mapping.csv +0 -2
data/spec/fixtures/line_endings_n.csv +0 -4
data/spec/fixtures/line_endings_r.csv +0 -1
data/spec/fixtures/line_endings_rn.csv +0 -4
data/spec/fixtures/lots_of_columns.csv +0 -2
data/spec/fixtures/malformed.csv +0 -3
data/spec/fixtures/malformed_header.csv +0 -3
data/spec/fixtures/money.csv +0 -3
data/spec/fixtures/no_header.csv +0 -7
data/spec/fixtures/numeric.csv +0 -5
data/spec/fixtures/pets.csv +0 -5
data/spec/fixtures/problematic.csv +0 -8
data/spec/fixtures/quote_char.csv +0 -9
data/spec/fixtures/quoted.csv +0 -5
data/spec/fixtures/quoted2.csv +0 -4
data/spec/fixtures/separator_colon.csv +0 -4
data/spec/fixtures/separator_comma.csv +0 -4
data/spec/fixtures/separator_pipe.csv +0 -4
data/spec/fixtures/separator_semi.csv +0 -4
data/spec/fixtures/separator_tab.csv +0 -4
data/spec/fixtures/skip_lines.csv +0 -8
data/spec/fixtures/trading.csv +0 -3
data/spec/fixtures/user_import.csv +0 -3
data/spec/fixtures/valid_unicode.csv +0 -5
data/spec/fixtures/with_dashes.csv +0 -8
data/spec/fixtures/with_dates.csv +0 -4
data/spec/smarter_csv/additional_separator_spec.rb +0 -45
data/spec/smarter_csv/binary_file2_spec.rb +0 -24
data/spec/smarter_csv/binary_file_spec.rb +0 -22
data/spec/smarter_csv/blank_spec.rb +0 -55
data/spec/smarter_csv/carriage_return_spec.rb +0 -190
data/spec/smarter_csv/chunked_reading_spec.rb +0 -14
data/spec/smarter_csv/close_file_spec.rb +0 -15
data/spec/smarter_csv/column_separator_spec.rb +0 -95
data/spec/smarter_csv/convert_values_to_numeric_spec.rb +0 -48
data/spec/smarter_csv/duplicate_headers_spec.rb +0 -76
data/spec/smarter_csv/empty_columns_spec.rb +0 -74
data/spec/smarter_csv/extenstions_spec.rb +0 -17
data/spec/smarter_csv/hard_sample_spec.rb +0 -24
data/spec/smarter_csv/header_transformation_spec.rb +0 -21
data/spec/smarter_csv/ignore_comments_spec.rb +0 -45
data/spec/smarter_csv/invalid_headers_spec.rb +0 -38
data/spec/smarter_csv/keep_headers_spec.rb +0 -24
data/spec/smarter_csv/key_mapping_spec.rb +0 -56
data/spec/smarter_csv/line_ending_spec.rb +0 -43
data/spec/smarter_csv/load_basic_spec.rb +0 -20
data/spec/smarter_csv/malformed_spec.rb +0 -25
data/spec/smarter_csv/no_header_spec.rb +0 -29
data/spec/smarter_csv/not_downcase_header_spec.rb +0 -24
data/spec/smarter_csv/parse/column_separator_spec.rb +0 -61
data/spec/smarter_csv/parse/old_csv_library_spec.rb +0 -74
data/spec/smarter_csv/parse/rfc4180_and_more_spec.rb +0 -170
data/spec/smarter_csv/problematic.rb +0 -34
data/spec/smarter_csv/quoted_spec.rb +0 -52
data/spec/smarter_csv/remove_empty_values_spec.rb +0 -13
data/spec/smarter_csv/remove_keys_from_hashes_spec.rb +0 -25
data/spec/smarter_csv/remove_not_mapped_keys_spec.rb +0 -35
data/spec/smarter_csv/remove_values_matching_spec.rb +0 -26
data/spec/smarter_csv/remove_zero_values_spec.rb +0 -25
data/spec/smarter_csv/skip_lines_spec.rb +0 -29
data/spec/smarter_csv/strings_as_keys_spec.rb +0 -24
data/spec/smarter_csv/strip_chars_from_headers_spec.rb +0 -24
data/spec/smarter_csv/trading_spec.rb +0 -25
data/spec/smarter_csv/valid_unicode_spec.rb +0 -94
data/spec/smarter_csv/value_converters_spec.rb +0 -52
data/spec/spec/spec_helper.rb +0 -17
data/spec/spec.opts +0 -2
data/spec/spec_helper.rb +0 -21

data/lib/smarter_csv.rb CHANGED Viewed

@@ -1,12 +1,526 @@
-if ENV['COVERAGE']
-  require 'simplecov'
-  SimpleCov.start do
-    add_filter "/spec/"
-    add_filter "/pkg/"
+# frozen_string_literal: true
+require_relative "extensions/hash"
+require_relative "smarter_csv/version"
+require_relative "smarter_csv/smarter_csv" unless ENV['CI'] # does not compile/link in CI?
+# require 'smarter_csv.bundle' unless ENV['CI'] # does not compile/link in CI?
+module SmarterCSV
+  class SmarterCSVException < StandardError; end
+  class HeaderSizeMismatch < SmarterCSVException; end
+  class IncorrectOption < SmarterCSVException; end
+  class DuplicateHeaders < SmarterCSVException; end
+  class MissingHeaders < SmarterCSVException; end
+  class NoColSepDetected < SmarterCSVException; end
+  class KeyMappingError < SmarterCSVException; end
+  class MalformedCSVError < SmarterCSVException; end
+  # first parameter: filename or input object which responds to readline method
+  def SmarterCSV.process(input, options = {}, &block)
+    options = default_options.merge(options)
+    options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
+    puts "SmarterCSV OPTIONS: #{options.inspect}" if options[:verbose]
+    headerA = []
+    result = []
+    @file_line_count = 0
+    @csv_line_count = 0
+    has_rails = !!defined?(Rails)
+    begin
+      fh = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
+      # auto-detect the row separator
+      options[:row_sep] = guess_line_ending(fh, options) if options[:row_sep]&.to_sym == :auto
+      # attempt to auto-detect column separator
+      options[:col_sep] = guess_column_separator(fh, options) if options[:col_sep]&.to_sym == :auto
+      if (options[:force_utf8] || options[:file_encoding] =~ /utf-8/i) && (fh.respond_to?(:external_encoding) && fh.external_encoding != Encoding.find('UTF-8') || fh.respond_to?(:encoding) && fh.encoding != Encoding.find('UTF-8'))
+        puts 'WARNING: you are trying to process UTF-8 input, but did not open the input with "b:utf-8" option. See README file "NOTES about File Encodings".'
+      end
+      if options[:skip_lines].to_i > 0
+        options[:skip_lines].to_i.times do
+          readline_with_counts(fh, options)
+        end
+      end
+      headerA, header_size = process_headers(fh, options)
+      # in case we use chunking.. we'll need to set it up..
+      if !options[:chunk_size].nil? && options[:chunk_size].to_i > 0
+        use_chunks = true
+        chunk_size = options[:chunk_size].to_i
+        chunk_count = 0
+        chunk = []
+      else
+        use_chunks = false
+      end
+      # now on to processing all the rest of the lines in the CSV file:
+      until fh.eof? # we can't use fh.readlines() here, because this would read the whole file into memory at once, and eof => true
+        line = readline_with_counts(fh, options)
+        # replace invalid byte sequence in UTF-8 with question mark to avoid errors
+        line = line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
+        print "processing file line %10d, csv line %10d\r" % [@file_line_count, @csv_line_count] if options[:verbose]
+        next if options[:comment_regexp] && line =~ options[:comment_regexp] # ignore all comment lines if there are any
+        # cater for the quoted csv data containing the row separator carriage return character
+        # in which case the row data will be split across multiple lines (see the sample content in spec/fixtures/carriage_returns_rn.csv)
+        # by detecting the existence of an uneven number of quote characters
+        multiline = line.count(options[:quote_char]).odd? # should handle quote_char nil
+        while line.count(options[:quote_char]).odd? # should handle quote_char nil
+          next_line = fh.readline(options[:row_sep])
+          next_line = next_line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
+          line += next_line
+          @file_line_count += 1
+        end
+        print "\nline contains uneven number of quote chars so including content through file line %d\n" % @file_line_count if options[:verbose] && multiline
+        line.chomp!(options[:row_sep])
+        dataA, _data_size = parse(line, options, header_size)
+        dataA.map!{|x| x.strip} if options[:strip_whitespace]
+        # if all values are blank, then ignore this line
+        next if options[:remove_empty_hashes] && (dataA.empty? || blank?(dataA))
+        hash = Hash.zip(headerA, dataA) # from Facets of Ruby library
+        # make sure we delete any key/value pairs from the hash, which the user wanted to delete:
+        # Note: Ruby < 1.9 doesn't allow empty symbol literals!
+        hash.delete(nil)
+        hash.delete('')
+        eval('hash.delete(:"")') if RUBY_VERSION.to_f > 1.8
+        if options[:remove_empty_values] == true
+          hash.delete_if{|_k, v| has_rails ? v.blank? : blank?(v)}
+        end
+        hash.delete_if{|_k, v| !v.nil? && v =~ /^(\d+|\d+\.\d+)$/ && v.to_f == 0} if options[:remove_zero_values] # values are typically Strings!
+        hash.delete_if{|_k, v| v =~ options[:remove_values_matching]} if options[:remove_values_matching]
+        if options[:convert_values_to_numeric]
+          hash.each do |k, v|
+            # deal with the :only / :except options to :convert_values_to_numeric
+            next if only_or_except_limit_execution(options, :convert_values_to_numeric, k)
+            # convert if it's a numeric value:
+            case v
+            when /^[+-]?\d+\.\d+$/
+              hash[k] = v.to_f
+            when /^[+-]?\d+$/
+              hash[k] = v.to_i
+            end
+          end
+        end
+        if options[:value_converters]
+          hash.each do |k, v|
+            converter = options[:value_converters][k]
+            next unless converter
+            hash[k] = converter.convert(v)
+          end
+        end
+        next if options[:remove_empty_hashes] && hash.empty?
+        hash[:csv_line_number] = @csv_line_count if options[:with_line_numbers]
+        if use_chunks
+          chunk << hash # append temp result to chunk
+          if chunk.size >= chunk_size || fh.eof? # if chunk if full, or EOF reached
+            # do something with the chunk
+            if block_given?
+              yield chunk # do something with the hashes in the chunk in the block
+            else
+              result << chunk # not sure yet, why anybody would want to do this without a block
+            end
+            chunk_count += 1
+            chunk = [] # initialize for next chunk of data
+          else
+            # the last chunk may contain partial data, which also needs to be returned (BUG / ISSUE-18)
+          end
+          # while a chunk is being filled up we don't need to do anything else here
+        else # no chunk handling
+          if block_given?
+            yield [hash] # do something with the hash in the block (better to use chunking here)
+          else
+            result << hash
+          end
+        end
+      end
+      # print new line to retain last processing line message
+      print "\n" if options[:verbose]
+      # last chunk:
+      if !chunk.nil? && chunk.size > 0
+        # do something with the chunk
+        if block_given?
+          yield chunk # do something with the hashes in the chunk in the block
+        else
+          result << chunk # not sure yet, why anybody would want to do this without a block
+        end
+        chunk_count += 1
+        chunk = [] # initialize for next chunk of data
+      end
+    ensure
+      fh.close if fh.respond_to?(:close)
+    end
+    if block_given?
+      return chunk_count # when we do processing through a block we only care how many chunks we processed
+    else
+      return result # returns either an Array of Hashes, or an Array of Arrays of Hashes (if in chunked mode)
+    end
   end
-end
-require 'csv'
-require "smarter_csv/version"
-require "extensions/hash.rb"
-require "smarter_csv/smarter_csv.rb"
+  class << self
+    def has_acceleration?
+      @has_acceleration ||= !!defined?(parse_csv_line_c)
+    end
+    def raw_header
+      @raw_header
+    end
+    def headers
+      @headers
+    end
+    protected
+    # NOTE: this is not called when "parse" methods are tested by themselves
+    def default_options
+      {
+        acceleration: true,
+        auto_row_sep_chars: 500,
+        chunk_size: nil,
+        col_sep: ',',
+        comment_regexp: nil, # was: /\A#/,
+        convert_values_to_numeric: true,
+        downcase_header: true,
+        duplicate_header_suffix: nil,
+        file_encoding: 'utf-8',
+        force_simple_split: false,
+        force_utf8: false,
+        headers_in_file: true,
+        invalid_byte_sequence: '',
+        keep_original_headers: false,
+        key_mapping_hash: nil,
+        quote_char: '"',
+        remove_empty_hashes: true,
+        remove_empty_values: true,
+        remove_unmapped_keys: false,
+        remove_values_matching: nil,
+        remove_zero_values: false,
+        required_headers: nil,
+        row_sep: $/,
+        skip_lines: nil,
+        strings_as_keys: false,
+        strip_chars_from_headers: nil,
+        strip_whitespace: true,
+        user_provided_headers: nil,
+        value_converters: nil,
+        verbose: false,
+        with_line_numbers: false,
+      }
+    end
+    def readline_with_counts(filehandle, options)
+      line = filehandle.readline(options[:row_sep])
+      @file_line_count += 1
+      @csv_line_count += 1
+      line
+    end
+    ###
+    ### Thin wrapper around C-extension
+    ###
+    def parse(line, options, header_size = nil)
+      # puts "SmarterCSV.parse OPTIONS: #{options[:acceleration]}" if options[:verbose]
+      if options[:acceleration] && has_acceleration?
+        # :nocov:
+        has_quotes = line =~ /#{options[:quote_char]}/
+        elements = parse_csv_line_c(line, options[:col_sep], options[:quote_char], header_size)
+        elements.map!{|x| cleanup_quotes(x, options[:quote_char])} if has_quotes
+        return [elements, elements.size]
+        # :nocov:
+      else
+        # puts "WARNING: SmarterCSV is using un-accelerated parsing of lines. Check options[:acceleration]"
+        return parse_csv_line_ruby(line, options, header_size)
+      end
+    end
+    # ------------------------------------------------------------------
+    # Ruby equivalent of the C-extension for parse_line
+    #
+    # parses a single line: either a CSV header and body line
+    # - quoting rules compared to RFC-4180 are somewhat relaxed
+    # - we are not assuming that quotes inside a fields need to be doubled
+    # - we are not assuming that all fields need to be quoted (0 is even)
+    # - works with multi-char col_sep
+    # - if header_size is given, only up to header_size fields are parsed
+    #
+    # We use header_size for parsing the body lines to make sure we always match the number of headers
+    # in case there are trailing col_sep characters in line
+    #
+    # Our convention is that empty fields are returned as empty strings, not as nil.
+    #
+    #
+    # the purpose of the max_size parameter is to handle a corner case where
+    # CSV lines contain more fields than the header.
+    # In which case the remaining fields in the line are ignored
+    #
+    def parse_csv_line_ruby(line, options, header_size = nil)
+      return [] if line.nil?
+      line_size = line.size
+      col_sep = options[:col_sep]
+      col_sep_size = col_sep.size
+      quote = options[:quote_char]
+      quote_count = 0
+      elements = []
+      start = 0
+      i = 0
+      while i < line_size
+        if line[i...i+col_sep_size] == col_sep && quote_count.even?
+          break if !header_size.nil? && elements.size >= header_size
+          elements << cleanup_quotes(line[start...i], quote)
+          i += col_sep.size
+          start = i
+        else
+          quote_count += 1 if line[i] == quote
+          i += 1
+        end
+      end
+      elements << cleanup_quotes(line[start..-1], quote) if header_size.nil? || elements.size < header_size
+      [elements, elements.size]
+    end
+    def cleanup_quotes(field, quote)
+      return field if field.nil?
+      # return if field !~ /#{quote}/ # this check can probably eliminated
+      if field.start_with?(quote) && field.end_with?(quote)
+        field.delete_prefix!(quote)
+        field.delete_suffix!(quote)
+      end
+      field.gsub!("#{quote}#{quote}", quote)
+      field
+    end
+    # SEE: https://github.com/rails/rails/blob/32015b6f369adc839c4f0955f2d9dce50c0b6123/activesupport/lib/active_support/core_ext/object/blank.rb#L121
+    # and in the future we might also include UTF-8 space characters: https://www.compart.com/en/unicode/category/Zs
+    BLANK_RE = /\A\s*\z/.freeze
+    def blank?(value)
+      case value
+      when String
+        value.empty? || BLANK_RE.match?(value)
+      when NilClass
+        true
+      when Array
+        value.empty? || value.inject(true){|result, x| result &&= elem_blank?(x)}
+      when Hash
+        value.empty? || value.values.inject(true){|result, x| result &&= elem_blank?(x)}
+      else
+        false
+      end
+    end
+    def elem_blank?(value)
+      case value
+      when String
+        value.empty? || BLANK_RE.match?(value)
+      when NilClass
+        true
+      else
+        false
+      end
+    end
+    # acts as a road-block to limit processing when iterating over all k/v pairs of a CSV-hash:
+    def only_or_except_limit_execution(options, option_name, key)
+      if options[option_name].is_a?(Hash)
+        if options[option_name].has_key?(:except)
+          return true if Array(options[option_name][:except]).include?(key)
+        elsif options[option_name].has_key?(:only)
+          return true unless Array(options[option_name][:only]).include?(key)
+        end
+      end
+      return false
+    end
+    # raise exception if none is found
+    def guess_column_separator(filehandle, options)
+      del = [',', "\t", ';', ':', '|']
+      n = Hash.new(0)
+      5.times do
+        line = filehandle.readline(options[:row_sep])
+        del.each do |d|
+          n[d] += line.scan(d).count
+        end
+      rescue EOFError # short files
+        break
+      end
+      filehandle.rewind
+      raise SmarterCSV::NoColSepDetected if n.values.max == 0
+      col_sep = n.key(n.values.max)
+    end
+    # limitation: this currently reads the whole file in before making a decision
+    def guess_line_ending(filehandle, options)
+      counts = {"\n" => 0, "\r" => 0, "\r\n" => 0}
+      quoted_char = false
+      # count how many of the pre-defined line-endings we find
+      # ignoring those contained within quote characters
+      last_char = nil
+      lines = 0
+      filehandle.each_char do |c|
+        quoted_char = !quoted_char if c == options[:quote_char]
+        next if quoted_char
+        if last_char == "\r"
+          if c == "\n"
+            counts["\r\n"] += 1
+          else
+            counts["\r"] += 1 # \r are counted after they appeared
+          end
+        elsif c == "\n"
+          counts["\n"] += 1
+        end
+        last_char = c
+        lines += 1
+        break if options[:auto_row_sep_chars] && options[:auto_row_sep_chars] > 0 && lines >= options[:auto_row_sep_chars]
+      end
+      filehandle.rewind
+      counts["\r"] += 1 if last_char == "\r"
+      # find the most frequent key/value pair:
+      k, _ = counts.max_by{|_, v| v}
+      return k
+    end
+    def process_headers(filehandle, options)
+      @raw_header = nil
+      @headers = nil
+      if options[:headers_in_file] # extract the header line
+        # process the header line in the CSV file..
+        # the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
+        header = readline_with_counts(filehandle, options)
+        @raw_header = header
+        header = header.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
+        header = header.sub(options[:comment_regexp], '') if options[:comment_regexp]
+        header = header.chomp(options[:row_sep])
+        header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
+        file_headerA, file_header_size = parse(header, options)
+        file_headerA.map!{|x| x.gsub(%r/#{options[:quote_char]}/, '')}
+        file_headerA.map!{|x| x.strip} if options[:strip_whitespace]
+        unless options[:keep_original_headers]
+          file_headerA.map!{|x| x.gsub(/\s+|-+/, '_')}
+          file_headerA.map!{|x| x.downcase} if options[:downcase_header]
+        end
+      else
+        raise SmarterCSV::IncorrectOption, "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" unless options[:user_provided_headers]
+      end
+      if options[:user_provided_headers] && options[:user_provided_headers].class == Array && !options[:user_provided_headers].empty?
+        # use user-provided headers
+        headerA = options[:user_provided_headers]
+        if defined?(file_header_size) && !file_header_size.nil?
+          if headerA.size != file_header_size
+            raise SmarterCSV::HeaderSizeMismatch, "ERROR: :user_provided_headers defines #{headerA.size} headers !=  CSV-file has #{file_header_size} headers"
+          else
+            # we could print out the mapping of file_headerA to headerA here
+          end
+        end
+      else
+        headerA = file_headerA
+      end
+      # detect duplicate headers and disambiguate
+      headerA = process_duplicate_headers(headerA, options) if options[:duplicate_header_suffix]
+      header_size = headerA.size # used for splitting lines
+      headerA.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers]
+      unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
+        key_mappingH = options[:key_mapping]
+        # do some key mapping on the keys in the file header
+        #   if you want to completely delete a key, then map it to nil or to ''
+        if !key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
+          # we can't map keys that are not there
+          missing_keys = key_mappingH.keys - headerA
+          puts "WARNING: missing header(s): #{missing_keys.join(",")}" unless missing_keys.empty?
+          headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
+        end
+      end
+      # header_validations
+      duplicate_headers = []
+      headerA.compact.each do |k|
+        duplicate_headers << k if headerA.select{|x| x == k}.size > 1
+      end
+      unless options[:user_provided_headers] || duplicate_headers.empty?
+        raise SmarterCSV::DuplicateHeaders, "ERROR: duplicate headers: #{duplicate_headers.join(',')}"
+      end
+      if options[:required_headers] && options[:required_headers].is_a?(Array)
+        missing_headers = []
+        options[:required_headers].each do |k|
+          missing_headers << k unless headerA.include?(k)
+        end
+        raise SmarterCSV::MissingHeaders, "ERROR: missing headers: #{missing_headers.join(',')}" unless missing_headers.empty?
+      end
+      @headers = headerA
+      [headerA, header_size]
+    end
+    def process_duplicate_headers(headers, options)
+      counts = Hash.new(0)
+      result = []
+      headers.each do |key|
+        counts[key] += 1
+        if counts[key] == 1
+          result << key
+        else
+          result << [key, options[:duplicate_header_suffix], counts[key]].join
+        end
+      end
+      result
+    end
+  end
+end

data/smarter_csv.gemspec CHANGED Viewed

@@ -12,14 +12,29 @@ Gem::Specification.new do |spec|
   spec.homepage      = "https://github.com/tilo/smarter_csv"
   spec.license       = 'MIT'
-  spec.files         = `git ls-files`.split($\)
+  spec.metadata["homepage_uri"] = spec.homepage
+  spec.metadata["source_code_uri"] = spec.homepage
+  spec.metadata["changelog_uri"] = "https://github.com/tilo/smarter_csv/blob/main/CHANGELOG.md"
+  # Specify which files should be added to the gem when it is released.
+  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
+  spec.files = Dir.chdir(__dir__) do
+    `git ls-files -z`.split("\x0").reject do |f|
+      (f == __FILE__) ||
+        f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|travis|circleci)|appveyor)}) || f.match(/\.h\z/)
+    end
+  end
+  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
   spec.executables   = spec.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
-  spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
-  spec.require_paths = ["lib"]
+  spec.require_paths = ["lib"] # add ext here?
+  spec.extensions = ["ext/smarter_csv/extconf.rb"]
+  spec.add_development_dependency "awesome_print"
+  spec.add_development_dependency "codecov"
+  spec.add_development_dependency "pry"
   spec.add_development_dependency "rspec"
+  spec.add_development_dependency "rubocop"
   spec.add_development_dependency "simplecov"
-  spec.add_development_dependency "awesome_print"
-  #  spec.add_development_dependency "guard-rspec"
-  spec.metadata["homepage_uri"] = spec.homepage
 end