smarter_csv 1.3.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/CHANGELOG.md +181 -0
- data/CONTRIBUTORS.md +46 -0
- data/LICENSE.txt +21 -0
- data/README.md +50 -239
- data/Rakefile +8 -15
- data/lib/smarter_csv/smarter_csv.rb +114 -38
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv.rb +8 -0
- data/smarter_csv.gemspec +20 -16
- data/spec/fixtures/additional_separator.csv +6 -0
- data/spec/fixtures/empty_columns_1.csv +2 -0
- data/spec/fixtures/empty_columns_2.csv +2 -0
- data/spec/fixtures/hard_sample.csv +2 -0
- data/spec/fixtures/numeric.csv +1 -1
- data/spec/fixtures/separator_colon.csv +4 -0
- data/spec/fixtures/separator_comma.csv +4 -0
- data/spec/fixtures/separator_pipe.csv +4 -0
- data/spec/fixtures/{separator.csv → separator_semi.csv} +0 -0
- data/spec/fixtures/separator_tab.csv +4 -0
- data/spec/smarter_csv/additional_separator_spec.rb +45 -0
- data/spec/smarter_csv/binary_file2_spec.rb +1 -1
- data/spec/smarter_csv/blank_spec.rb +55 -0
- data/spec/smarter_csv/carriage_return_spec.rb +27 -7
- data/spec/smarter_csv/column_separator_spec.rb +89 -5
- data/spec/smarter_csv/empty_columns_spec.rb +74 -0
- data/spec/smarter_csv/hard_sample_spec.rb +24 -0
- data/spec/smarter_csv/ignore_comments_spec.rb +45 -30
- metadata +50 -13
| @@ -4,48 +4,42 @@ module SmarterCSV | |
| 4 4 | 
             
              class IncorrectOption < SmarterCSVException; end
         | 
| 5 5 | 
             
              class DuplicateHeaders < SmarterCSVException; end
         | 
| 6 6 | 
             
              class MissingHeaders < SmarterCSVException; end
         | 
| 7 | 
            -
             | 
| 7 | 
            +
              class NoColSepDetected < SmarterCSVException; end
         | 
| 8 8 |  | 
| 9 9 | 
             
              def SmarterCSV.process(input, options={}, &block)   # first parameter: filename or input object with readline method
         | 
| 10 | 
            -
                default_options = {:col_sep => ',' , :row_sep => $/ , :quote_char => '"', :force_simple_split => false , :verbose => false ,
         | 
| 11 | 
            -
                  :remove_empty_values => true, :remove_zero_values => false , :remove_values_matching => nil , :remove_empty_hashes => true , :strip_whitespace => true,
         | 
| 12 | 
            -
                  :convert_values_to_numeric => true, :strip_chars_from_headers => nil , :user_provided_headers => nil , :headers_in_file => true,
         | 
| 13 | 
            -
                  :comment_regexp => /\A#/, :chunk_size => nil , :key_mapping_hash => nil , :downcase_header => true, :strings_as_keys => false, :file_encoding => 'utf-8',
         | 
| 14 | 
            -
                  :remove_unmapped_keys => false, :keep_original_headers => false, :value_converters => nil, :skip_lines => nil, :force_utf8 => false, :invalid_byte_sequence => '',
         | 
| 15 | 
            -
                  :auto_row_sep_chars => 500, :required_headers => nil
         | 
| 16 | 
            -
                }
         | 
| 17 10 | 
             
                options = default_options.merge(options)
         | 
| 18 11 | 
             
                options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
         | 
| 19 | 
            -
             | 
| 12 | 
            +
             | 
| 20 13 | 
             
                headerA = []
         | 
| 21 14 | 
             
                result = []
         | 
| 22 | 
            -
                old_row_sep = $/
         | 
| 23 15 | 
             
                file_line_count = 0
         | 
| 24 16 | 
             
                csv_line_count = 0
         | 
| 25 17 | 
             
                has_rails = !! defined?(Rails)
         | 
| 26 18 | 
             
                begin
         | 
| 27 19 | 
             
                  f = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
         | 
| 28 20 |  | 
| 21 | 
            +
                  # auto-detect the row separator
         | 
| 22 | 
            +
                  options[:row_sep] = SmarterCSV.guess_line_ending(f, options) if options[:row_sep].to_sym == :auto
         | 
| 23 | 
            +
                  # attempt to auto-detect column separator
         | 
| 24 | 
            +
                  options[:col_sep] = guess_column_separator(f, options) if options[:col_sep].to_sym == :auto
         | 
| 25 | 
            +
                  # preserve options, in case we need to call the CSV class
         | 
| 26 | 
            +
                  csv_options = options.select{|k,v| [:col_sep, :row_sep, :quote_char].include?(k)} # options.slice(:col_sep, :row_sep, :quote_char)
         | 
| 27 | 
            +
                  csv_options.delete(:row_sep) if [nil, :auto].include?( options[:row_sep].to_sym )
         | 
| 28 | 
            +
                  csv_options.delete(:col_sep) if [nil, :auto].include?( options[:col_sep].to_sym )
         | 
| 29 | 
            +
             | 
| 29 30 | 
             
                  if (options[:force_utf8] || options[:file_encoding] =~ /utf-8/i) && ( f.respond_to?(:external_encoding) && f.external_encoding != Encoding.find('UTF-8') || f.respond_to?(:encoding) && f.encoding != Encoding.find('UTF-8') )
         | 
| 30 31 | 
             
                    puts 'WARNING: you are trying to process UTF-8 input, but did not open the input with "b:utf-8" option. See README file "NOTES about File Encodings".'
         | 
| 31 32 | 
             
                  end
         | 
| 32 33 |  | 
| 33 | 
            -
                  if options[: | 
| 34 | 
            -
                    options[:row_sep] = line_ending = SmarterCSV.guess_line_ending( f, options )
         | 
| 35 | 
            -
                    f.rewind
         | 
| 36 | 
            -
                  end
         | 
| 37 | 
            -
                  $/ = options[:row_sep]
         | 
| 38 | 
            -
             | 
| 39 | 
            -
                  if options[:skip_lines].to_i > 0
         | 
| 40 | 
            -
                    options[:skip_lines].to_i.times{f.readline}
         | 
| 41 | 
            -
                  end
         | 
| 34 | 
            +
                  options[:skip_lines].to_i.times{f.readline(options[:row_sep])} if options[:skip_lines].to_i > 0
         | 
| 42 35 |  | 
| 43 36 | 
             
                  if options[:headers_in_file]        # extract the header line
         | 
| 44 37 | 
             
                    # process the header line in the CSV file..
         | 
| 45 38 | 
             
                    # the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
         | 
| 46 | 
            -
                    header = f.readline
         | 
| 39 | 
            +
                    header = f.readline(options[:row_sep])
         | 
| 47 40 | 
             
                    header = header.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
         | 
| 48 | 
            -
                    header = header.sub(options[:comment_regexp],'') | 
| 41 | 
            +
                    header = header.sub(options[:comment_regexp],'') if options[:comment_regexp]
         | 
| 42 | 
            +
                    header = header.chomp(options[:row_sep])
         | 
| 49 43 |  | 
| 50 44 | 
             
                    file_line_count += 1
         | 
| 51 45 | 
             
                    csv_line_count += 1
         | 
| @@ -60,14 +54,14 @@ module SmarterCSV | |
| 60 54 | 
             
                    else
         | 
| 61 55 | 
             
                      file_headerA =  header.split(options[:col_sep])
         | 
| 62 56 | 
             
                    end
         | 
| 57 | 
            +
                    file_header_size = file_headerA.size # before mapping, which could delete keys
         | 
| 58 | 
            +
             | 
| 63 59 | 
             
                    file_headerA.map!{|x| x.gsub(%r/#{options[:quote_char]}/,'') }
         | 
| 64 60 | 
             
                    file_headerA.map!{|x| x.strip}  if options[:strip_whitespace]
         | 
| 65 61 | 
             
                    unless options[:keep_original_headers]
         | 
| 66 62 | 
             
                      file_headerA.map!{|x| x.gsub(/\s+|-+/,'_')}
         | 
| 67 63 | 
             
                      file_headerA.map!{|x| x.downcase }   if options[:downcase_header]
         | 
| 68 64 | 
             
                    end
         | 
| 69 | 
            -
             | 
| 70 | 
            -
                    file_header_size = file_headerA.size
         | 
| 71 65 | 
             
                  else
         | 
| 72 66 | 
             
                    raise SmarterCSV::IncorrectOption , "ERROR: If :headers_in_file is set to false, you have to provide :user_provided_headers" if options[:user_provided_headers].nil?
         | 
| 73 67 | 
             
                  end
         | 
| @@ -84,6 +78,8 @@ module SmarterCSV | |
| 84 78 | 
             
                  else
         | 
| 85 79 | 
             
                    headerA = file_headerA
         | 
| 86 80 | 
             
                  end
         | 
| 81 | 
            +
                  header_size = headerA.size # used for splitting lines
         | 
| 82 | 
            +
             | 
| 87 83 | 
             
                  headerA.map!{|x| x.to_sym } unless options[:strings_as_keys] || options[:keep_original_headers]
         | 
| 88 84 |  | 
| 89 85 | 
             
                  unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
         | 
| @@ -123,7 +119,7 @@ module SmarterCSV | |
| 123 119 |  | 
| 124 120 | 
             
                  # now on to processing all the rest of the lines in the CSV file:
         | 
| 125 121 | 
             
                  while ! f.eof?    # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
         | 
| 126 | 
            -
                    line = f.readline  # read one line | 
| 122 | 
            +
                    line = f.readline(options[:row_sep])  # read one line
         | 
| 127 123 |  | 
| 128 124 | 
             
                    # replace invalid byte sequence in UTF-8 with question mark to avoid errors
         | 
| 129 125 | 
             
                    line = line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
         | 
| @@ -131,21 +127,22 @@ module SmarterCSV | |
| 131 127 | 
             
                    file_line_count += 1
         | 
| 132 128 | 
             
                    csv_line_count += 1
         | 
| 133 129 | 
             
                    print "processing file line %10d, csv line %10d\r" % [file_line_count, csv_line_count] if options[:verbose]
         | 
| 134 | 
            -
             | 
| 130 | 
            +
             | 
| 131 | 
            +
                    next if options[:comment_regexp] && line =~ options[:comment_regexp] # ignore all comment lines if there are any
         | 
| 135 132 |  | 
| 136 133 | 
             
                    # cater for the quoted csv data containing the row separator carriage return character
         | 
| 137 134 | 
             
                    # in which case the row data will be split across multiple lines (see the sample content in spec/fixtures/carriage_returns_rn.csv)
         | 
| 138 135 | 
             
                    # by detecting the existence of an uneven number of quote characters
         | 
| 139 | 
            -
                    multiline = line.count(options[:quote_char])%2 == 1
         | 
| 140 | 
            -
                    while line.count(options[:quote_char])%2 == 1
         | 
| 141 | 
            -
                      next_line = f.readline
         | 
| 136 | 
            +
                    multiline = line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
         | 
| 137 | 
            +
                    while line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
         | 
| 138 | 
            +
                      next_line = f.readline(options[:row_sep])
         | 
| 142 139 | 
             
                      next_line = next_line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
         | 
| 143 140 | 
             
                      line += next_line
         | 
| 144 141 | 
             
                      file_line_count += 1
         | 
| 145 142 | 
             
                    end
         | 
| 146 143 | 
             
                    print "\nline contains uneven number of quote chars so including content through file line %d\n" % file_line_count if options[:verbose] && multiline
         | 
| 147 144 |  | 
| 148 | 
            -
                    line.chomp! | 
| 145 | 
            +
                    line.chomp!(options[:row_sep])
         | 
| 149 146 |  | 
| 150 147 | 
             
                    if (line =~ %r{#{options[:quote_char]}}) and (! options[:force_simple_split])
         | 
| 151 148 | 
             
                      dataA = begin
         | 
| @@ -154,11 +151,17 @@ module SmarterCSV | |
| 154 151 | 
             
                        raise $!, "#{$!} [SmarterCSV: csv line #{csv_line_count}]", $!.backtrace
         | 
| 155 152 | 
             
                      end
         | 
| 156 153 | 
             
                    else
         | 
| 157 | 
            -
                      dataA = | 
| 154 | 
            +
                      dataA = line.split(options[:col_sep], header_size)
         | 
| 158 155 | 
             
                    end
         | 
| 159 | 
            -
             | 
| 160 | 
            -
                    dataA.map!{|x| x.strip} | 
| 156 | 
            +
                    dataA.map!{|x| x.sub(/(#{options[:col_sep]})+\z/, '')} # remove any unwanted trailing col_sep characters at the end
         | 
| 157 | 
            +
                    dataA.map!{|x| x.strip} if options[:strip_whitespace]
         | 
| 158 | 
            +
             | 
| 159 | 
            +
                    # if all values are blank, then ignore this line
         | 
| 160 | 
            +
                    # SEE: https://github.com/rails/rails/blob/32015b6f369adc839c4f0955f2d9dce50c0b6123/activesupport/lib/active_support/core_ext/object/blank.rb#L121
         | 
| 161 | 
            +
                    next if options[:remove_empty_hashes] && blank?(dataA)
         | 
| 162 | 
            +
             | 
| 161 163 | 
             
                    hash = Hash.zip(headerA,dataA)  # from Facets of Ruby library
         | 
| 164 | 
            +
             | 
| 162 165 | 
             
                    # make sure we delete any key/value pairs from the hash, which the user wanted to delete:
         | 
| 163 166 | 
             
                    # Note: Ruby < 1.9 doesn't allow empty symbol literals!
         | 
| 164 167 | 
             
                    hash.delete(nil); hash.delete('');
         | 
| @@ -166,18 +169,17 @@ module SmarterCSV | |
| 166 169 | 
             
                      eval('hash.delete(:"")')
         | 
| 167 170 | 
             
                    end
         | 
| 168 171 |  | 
| 169 | 
            -
                     | 
| 170 | 
            -
                    # which caters for double \n and \r\n characters such as "1\r\n\r\n2" whereas the original check (v =~ /^\s*$/) does not
         | 
| 171 | 
            -
                    if options[:remove_empty_values]
         | 
| 172 | 
            +
                    if options[:remove_empty_values] == true
         | 
| 172 173 | 
             
                      if has_rails
         | 
| 173 174 | 
             
                        hash.delete_if{|k,v| v.blank?}
         | 
| 174 175 | 
             
                      else
         | 
| 175 | 
            -
                        hash.delete_if{|k,v|  | 
| 176 | 
            +
                        hash.delete_if{|k,v| blank?(v)}
         | 
| 176 177 | 
             
                      end
         | 
| 177 178 | 
             
                    end
         | 
| 178 179 |  | 
| 179 180 | 
             
                    hash.delete_if{|k,v| ! v.nil? && v =~ /^(\d+|\d+\.\d+)$/ && v.to_f == 0} if options[:remove_zero_values]   # values are typically Strings!
         | 
| 180 181 | 
             
                    hash.delete_if{|k,v| v =~ options[:remove_values_matching]} if options[:remove_values_matching]
         | 
| 182 | 
            +
             | 
| 181 183 | 
             
                    if options[:convert_values_to_numeric]
         | 
| 182 184 | 
             
                      hash.each do |k,v|
         | 
| 183 185 | 
             
                        # deal with the :only / :except options to :convert_values_to_numeric
         | 
| @@ -247,7 +249,6 @@ module SmarterCSV | |
| 247 249 | 
             
                    chunk = []  # initialize for next chunk of data
         | 
| 248 250 | 
             
                  end
         | 
| 249 251 | 
             
                ensure
         | 
| 250 | 
            -
                  $/ = old_row_sep   # make sure this stupid global variable is always reset to it's previous value after we're done!
         | 
| 251 252 | 
             
                  f.close if f.respond_to?(:close)
         | 
| 252 253 | 
             
                end
         | 
| 253 254 | 
             
                if block_given?
         | 
| @@ -258,8 +259,63 @@ module SmarterCSV | |
| 258 259 | 
             
              end
         | 
| 259 260 |  | 
| 260 261 | 
             
              private
         | 
| 261 | 
            -
              # acts as a road-block to limit processing when iterating over all k/v pairs of a CSV-hash:
         | 
| 262 262 |  | 
| 263 | 
            +
              def self.default_options
         | 
| 264 | 
            +
                {
         | 
| 265 | 
            +
                  auto_row_sep_chars: 500,
         | 
| 266 | 
            +
                  chunk_size: nil ,
         | 
| 267 | 
            +
                  col_sep: ',',
         | 
| 268 | 
            +
                  comment_regexp: nil, # was: /\A#/,
         | 
| 269 | 
            +
                  convert_values_to_numeric: true,
         | 
| 270 | 
            +
                  downcase_header: true,
         | 
| 271 | 
            +
                  file_encoding: 'utf-8',
         | 
| 272 | 
            +
                  force_simple_split: false ,
         | 
| 273 | 
            +
                  force_utf8: false,
         | 
| 274 | 
            +
                  headers_in_file: true,
         | 
| 275 | 
            +
                  invalid_byte_sequence: '',
         | 
| 276 | 
            +
                  keep_original_headers: false,
         | 
| 277 | 
            +
                  key_mapping_hash: nil ,
         | 
| 278 | 
            +
                  quote_char: '"',
         | 
| 279 | 
            +
                  remove_empty_hashes: true ,
         | 
| 280 | 
            +
                  remove_empty_values: true,
         | 
| 281 | 
            +
                  remove_unmapped_keys: false,
         | 
| 282 | 
            +
                  remove_values_matching: nil,
         | 
| 283 | 
            +
                  remove_zero_values: false,
         | 
| 284 | 
            +
                  required_headers: nil,
         | 
| 285 | 
            +
                  row_sep: $INPUT_RECORD_SEPARATOR,
         | 
| 286 | 
            +
                  skip_lines: nil,
         | 
| 287 | 
            +
                  strings_as_keys: false,
         | 
| 288 | 
            +
                  strip_chars_from_headers: nil,
         | 
| 289 | 
            +
                  strip_whitespace: true,
         | 
| 290 | 
            +
                  user_provided_headers: nil,
         | 
| 291 | 
            +
                  value_converters: nil,
         | 
| 292 | 
            +
                  verbose: false,
         | 
| 293 | 
            +
                }
         | 
| 294 | 
            +
              end
         | 
| 295 | 
            +
             | 
| 296 | 
            +
              def self.blank?(value)
         | 
| 297 | 
            +
                case value
         | 
| 298 | 
            +
                when Array
         | 
| 299 | 
            +
                  value.inject(true){|result, x| result &&= elem_blank?(x)}
         | 
| 300 | 
            +
                when Hash
         | 
| 301 | 
            +
                  value.inject(true){|result, x| result &&= elem_blank?(x.last)}
         | 
| 302 | 
            +
                else
         | 
| 303 | 
            +
                  elem_blank?(value)
         | 
| 304 | 
            +
                end
         | 
| 305 | 
            +
              end
         | 
| 306 | 
            +
             | 
| 307 | 
            +
              def self.elem_blank?(value)
         | 
| 308 | 
            +
                case value
         | 
| 309 | 
            +
                when NilClass
         | 
| 310 | 
            +
                  true
         | 
| 311 | 
            +
                when String
         | 
| 312 | 
            +
                  value !~ /\S/
         | 
| 313 | 
            +
                else
         | 
| 314 | 
            +
                  false
         | 
| 315 | 
            +
                end
         | 
| 316 | 
            +
              end
         | 
| 317 | 
            +
             | 
| 318 | 
            +
              # acts as a road-block to limit processing when iterating over all k/v pairs of a CSV-hash:
         | 
| 263 319 | 
             
              def self.only_or_except_limit_execution( options, option_name, key )
         | 
| 264 320 | 
             
                if options[option_name].is_a?(Hash)
         | 
| 265 321 | 
             
                  if options[option_name].has_key?( :except )
         | 
| @@ -271,6 +327,24 @@ module SmarterCSV | |
| 271 327 | 
             
                return false
         | 
| 272 328 | 
             
              end
         | 
| 273 329 |  | 
| 330 | 
            +
              # raise exception if none is found
         | 
| 331 | 
            +
              def self.guess_column_separator(filehandle, options)
         | 
| 332 | 
            +
                del = [',', "\t", ';', ':', '|']
         | 
| 333 | 
            +
                n = Hash.new(0)
         | 
| 334 | 
            +
                5.times do
         | 
| 335 | 
            +
                  line = filehandle.readline(options[:row_sep])
         | 
| 336 | 
            +
                  del.each do |d|
         | 
| 337 | 
            +
                    n[d] += line.scan(d).count
         | 
| 338 | 
            +
                  end
         | 
| 339 | 
            +
                rescue EOFError # short files
         | 
| 340 | 
            +
                  break
         | 
| 341 | 
            +
                end
         | 
| 342 | 
            +
                filehandle.rewind
         | 
| 343 | 
            +
                raise SmarterCSV::NoColSepDetected if n.values.max == 0
         | 
| 344 | 
            +
             | 
| 345 | 
            +
                col_sep = n.key(n.values.max)
         | 
| 346 | 
            +
              end
         | 
| 347 | 
            +
             | 
| 274 348 | 
             
              # limitation: this currently reads the whole file in before making a decision
         | 
| 275 349 | 
             
              def self.guess_line_ending( filehandle, options )
         | 
| 276 350 | 
             
                counts = {"\n" => 0 , "\r" => 0, "\r\n" => 0}
         | 
| @@ -297,6 +371,8 @@ module SmarterCSV | |
| 297 371 | 
             
                  lines += 1
         | 
| 298 372 | 
             
                  break if options[:auto_row_sep_chars] && options[:auto_row_sep_chars] > 0 && lines >= options[:auto_row_sep_chars]
         | 
| 299 373 | 
             
                end
         | 
| 374 | 
            +
                filehandle.rewind
         | 
| 375 | 
            +
             | 
| 300 376 | 
             
                counts["\r"] += 1 if last_char == "\r"
         | 
| 301 377 | 
             
                # find the key/value pair with the largest counter:
         | 
| 302 378 | 
             
                k,_ = counts.max_by{|_,v| v}
         | 
    
        data/lib/smarter_csv/version.rb
    CHANGED
    
    
    
        data/lib/smarter_csv.rb
    CHANGED
    
    
    
        data/smarter_csv.gemspec
    CHANGED
    
    | @@ -1,21 +1,25 @@ | |
| 1 1 | 
             
            # -*- encoding: utf-8 -*-
         | 
| 2 2 | 
             
            require File.expand_path('../lib/smarter_csv/version', __FILE__)
         | 
| 3 3 |  | 
| 4 | 
            -
            Gem::Specification.new do | | 
| 5 | 
            -
               | 
| 6 | 
            -
               | 
| 7 | 
            -
               | 
| 8 | 
            -
               | 
| 9 | 
            -
              gem.homepage      = "https://github.com/tilo/smarter_csv"
         | 
| 4 | 
            +
            Gem::Specification.new do |spec|
         | 
| 5 | 
            +
              spec.name          = "smarter_csv"
         | 
| 6 | 
            +
              spec.version       = SmarterCSV::VERSION
         | 
| 7 | 
            +
              spec.authors       = ["Tilo Sloboda"]
         | 
| 8 | 
            +
              spec.email         = ["tilo.sloboda@gmail.com"]
         | 
| 10 9 |  | 
| 11 | 
            -
               | 
| 12 | 
            -
               | 
| 13 | 
            -
               | 
| 14 | 
            -
               | 
| 15 | 
            -
             | 
| 16 | 
            -
               | 
| 17 | 
            -
               | 
| 18 | 
            -
               | 
| 19 | 
            -
               | 
| 20 | 
            -
             | 
| 10 | 
            +
              spec.summary       = %q{Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots of optional features, e.g. chunked processing for huge CSV files}
         | 
| 11 | 
            +
              spec.description   = %q{Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with optional features for processing large files in parallel, embedded comments, unusual field- and record-separators, flexible mapping of CSV-headers to Hash-keys}
         | 
| 12 | 
            +
              spec.homepage      = "https://github.com/tilo/smarter_csv"
         | 
| 13 | 
            +
              spec.license       = 'MIT'
         | 
| 14 | 
            +
             | 
| 15 | 
            +
              spec.files         = `git ls-files`.split($\)
         | 
| 16 | 
            +
              spec.executables   = spec.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
         | 
| 17 | 
            +
              spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
         | 
| 18 | 
            +
              spec.require_paths = ["lib"]
         | 
| 19 | 
            +
              spec.requirements  = ['csv'] # for CSV.parse() only needed in case we have quoted fields
         | 
| 20 | 
            +
              spec.add_development_dependency "rspec"
         | 
| 21 | 
            +
              spec.add_development_dependency "simplecov"
         | 
| 22 | 
            +
              #  spec.add_development_dependency "guard-rspec"
         | 
| 23 | 
            +
             | 
| 24 | 
            +
              spec.metadata["homepage_uri"] = spec.homepage
         | 
| 21 25 | 
             
            end
         | 
| @@ -0,0 +1,2 @@ | |
| 1 | 
            +
            Name,Email,Financial Status,Paid at,Fulfillment Status,Fulfilled at,Accepts Marketing,Currency,Subtotal,Shipping,Taxes,Total,Discount Code,Discount Amount,Shipping Method,Created at,Lineitem quantity,Lineitem name,Lineitem price,Lineitem compare at price,Lineitem sku,Lineitem requires shipping,Lineitem taxable,Lineitem fulfillment status,Billing Name,Billing Street,Billing Address1,Billing Address2,Billing Company,Billing City,Billing Zip,Billing Province,Billing Country,Billing Phone,Shipping Name,Shipping Street,Shipping Address1,Shipping Address2,Shipping Company,Shipping City,Shipping Zip,Shipping Province,Shipping Country,Shipping Phone,Notes,Note Attributes,Cancelled at,Payment Method,Payment Reference,Refunded Amount,Vendor,        rece,Tags,Risk Level,Source,Lineitem discount,Tax 1 Name,Tax 1 Value,Tax 2 Name,Tax 2 Value,Tax 3 Name,Tax 3 Value,Tax 4 Name,Tax 4 Value,Tax 5 Name,Tax 5 Value,Phone,Receipt Number,Duties,Billing Province Name,Shipping Province Name,Payment ID,Payment Terms Name,Next Payment Due At
         | 
| 2 | 
            +
            #MR1220817,foo@bar.com,paid,2022-02-08 22:31:28 +0100,unfulfilled,,yes,EUR,144,0,24,144,VIP,119.6,"Livraison Standard GRATUITE, 2-5 jours avec suivi",2022-02-08 22:31:26 +0100,2,Cire Épilation Nacrée,37,,WAX-200-NAC,true,true,pending,French Fry,64 Boulevard Budgié,64 Boulevard Budgié,,,dootdoot’,'49100,,FR,06 12 34 56 78,French Fry,64 Boulevard Budgi,64 Boulevard Budgié,,,dootdoot,'49100,,FR,06 12 34 56 78,,,,Stripe,c23800013619353.2,0,Goober  Rég,4331065802905,902,Low,web,0,FR TVA 20%,24,,,,,,,,,3366012111111,,,,,,,
         | 
    
        data/spec/fixtures/numeric.csv
    CHANGED
    
    
| 
            File without changes
         | 
| @@ -0,0 +1,45 @@ | |
| 1 | 
            +
            require 'spec_helper'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            fixture_path = 'spec/fixtures'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            describe 'handling of additional trailing column separators' do
         | 
| 6 | 
            +
              let(:file) { "#{fixture_path}/additional_separator.csv" }
         | 
| 7 | 
            +
             | 
| 8 | 
            +
              describe '' do
         | 
| 9 | 
            +
                let(:data) { SmarterCSV.process(file) }
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                it 'reads all lines' do
         | 
| 12 | 
            +
                  data.size.should eq 5
         | 
| 13 | 
            +
                end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                it 'reads regular lines' do
         | 
| 16 | 
            +
                  item = data[0]
         | 
| 17 | 
            +
                  item[:col1].should == 'eins'
         | 
| 18 | 
            +
                  item[:col2].should == 'zwei'
         | 
| 19 | 
            +
                end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                it 'strips single trailing col_sep character' do
         | 
| 22 | 
            +
                  item = data[1]
         | 
| 23 | 
            +
                  item[:col1].should == 'uno'
         | 
| 24 | 
            +
                  item[:col2].should == 'dos'
         | 
| 25 | 
            +
                end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                it 'strips multiple trailing col_sep characters' do
         | 
| 28 | 
            +
                  item = data[2]
         | 
| 29 | 
            +
                  item[:col1].should == 'one'
         | 
| 30 | 
            +
                  item[:col2].should == 'two'
         | 
| 31 | 
            +
                end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                it 'strips multiple trailing col_sep chars' do
         | 
| 34 | 
            +
                  item = data[3]
         | 
| 35 | 
            +
                  item[:col1].should == 'ichi'
         | 
| 36 | 
            +
                  item[:col2].should == nil
         | 
| 37 | 
            +
                end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                it 'strips multiple trailing col_sep chars' do
         | 
| 40 | 
            +
                  item = data[4]
         | 
| 41 | 
            +
                  item[:col1].should == 'un'
         | 
| 42 | 
            +
                  item[:col2].should == nil
         | 
| 43 | 
            +
                end
         | 
| 44 | 
            +
              end
         | 
| 45 | 
            +
            end
         | 
| @@ -12,7 +12,7 @@ describe 'be_able_to' do | |
| 12 12 | 
             
              it 'loads_binary_file_with_strings_as_keys' do 
         | 
| 13 13 | 
             
                options = {:col_sep => "\cA", :row_sep => "\cB", :comment_regexp => /^#/, :strings_as_keys => true}
         | 
| 14 14 | 
             
                data = SmarterCSV.process("#{fixture_path}/binary.csv", options)
         | 
| 15 | 
            -
                data. | 
| 15 | 
            +
                data.size.should == 8
         | 
| 16 16 | 
             
                data.each do |item|
         | 
| 17 17 | 
             
                  # all keys should be strings
         | 
| 18 18 | 
             
                  item.keys.each{|x| x.class.should be == String}
         | 
| @@ -0,0 +1,55 @@ | |
| 1 | 
            +
            require 'spec_helper'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            describe 'blank?' do
         | 
| 4 | 
            +
              it 'is true for nil' do
         | 
| 5 | 
            +
                SmarterCSV.send(:blank?, nil).should eq true
         | 
| 6 | 
            +
              end
         | 
| 7 | 
            +
             | 
| 8 | 
            +
              it 'is true for empty string' do
         | 
| 9 | 
            +
                SmarterCSV.send(:blank?, '').should eq true
         | 
| 10 | 
            +
              end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
              it 'is true for blank string' do
         | 
| 13 | 
            +
                SmarterCSV.send(:blank?, '   ').should eq true
         | 
| 14 | 
            +
              end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
              it 'is true for tab string' do
         | 
| 17 | 
            +
                SmarterCSV.send(:blank?, " \t ").should eq true
         | 
| 18 | 
            +
              end
         | 
| 19 | 
            +
             | 
| 20 | 
            +
              it 'is false for string with content' do
         | 
| 21 | 
            +
                SmarterCSV.send(:blank?, " 1 ").should eq false
         | 
| 22 | 
            +
              end
         | 
| 23 | 
            +
             | 
| 24 | 
            +
              it 'is false for numeic values' do
         | 
| 25 | 
            +
                SmarterCSV.send(:blank?, 1).should eq false
         | 
| 26 | 
            +
              end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
              describe 'arrays' do
         | 
| 29 | 
            +
                it 'is true for empty arrays' do
         | 
| 30 | 
            +
                  SmarterCSV.send(:blank?, []).should eq true
         | 
| 31 | 
            +
                end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                it 'is true for blank arrays' do
         | 
| 34 | 
            +
                  SmarterCSV.send(:blank?, [nil, '', '  ', " \t "]).should eq true
         | 
| 35 | 
            +
                end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                it 'is false for non-blank arrays' do
         | 
| 38 | 
            +
                  SmarterCSV.send(:blank?, [nil, '', '  ', " 1 "]).should eq false
         | 
| 39 | 
            +
                end
         | 
| 40 | 
            +
              end
         | 
| 41 | 
            +
             | 
| 42 | 
            +
              describe 'hashes' do
         | 
| 43 | 
            +
                it 'is true for empty arrays' do
         | 
| 44 | 
            +
                  SmarterCSV.send(:blank?, {}).should eq true
         | 
| 45 | 
            +
                end
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                it 'is true for blank arrays' do
         | 
| 48 | 
            +
                  SmarterCSV.send(:blank?, {a: nil, b: '', c: '  ', d: " \t "}).should eq true
         | 
| 49 | 
            +
                end
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                it 'is false for non-blank arrays' do
         | 
| 52 | 
            +
                  SmarterCSV.send(:blank?, {a: nil, b: '', c: '  ', d: " 1 "}).should eq false
         | 
| 53 | 
            +
                end
         | 
| 54 | 
            +
              end
         | 
| 55 | 
            +
            end
         | 
| @@ -3,7 +3,6 @@ require 'spec_helper' | |
| 3 3 | 
             
            fixture_path = 'spec/fixtures'
         | 
| 4 4 |  | 
| 5 5 | 
             
            describe 'process files with line endings explicitly pre-specified' do
         | 
| 6 | 
            -
             | 
| 7 6 | 
             
              it 'should process a file with \n for line endings and within data fields' do
         | 
| 8 7 | 
             
                sep = "\n"
         | 
| 9 8 | 
             
                options = {:row_sep => sep}
         | 
| @@ -83,14 +82,14 @@ describe 'process files with line endings explicitly pre-specified' do | |
| 83 82 | 
             
                data[1][:members].should == ["Jimmy Page", "Robert Plant", "John Bonham", "John Paul Jones"].join(text_sep)
         | 
| 84 83 | 
             
                data[1][:albums].should == ["Led Zeppelin", "Led Zeppelin II", "Led Zeppelin III", "Led Zeppelin IV"].join(text_sep)
         | 
| 85 84 | 
             
              end
         | 
| 86 | 
            -
             | 
| 87 85 | 
             
            end
         | 
| 88 86 |  | 
| 89 87 | 
             
            describe 'process files with line endings in automatic mode' do
         | 
| 88 | 
            +
              let(:options) { { row_sep: :auto } }
         | 
| 90 89 |  | 
| 91 90 | 
             
              it 'should process a file with \n for line endings and within data fields' do
         | 
| 92 91 | 
             
                sep = "\n"
         | 
| 93 | 
            -
                data = SmarterCSV.process("#{fixture_path}/carriage_returns_n.csv",  | 
| 92 | 
            +
                data = SmarterCSV.process("#{fixture_path}/carriage_returns_n.csv", options)
         | 
| 94 93 | 
             
                data.flatten.size.should == 8
         | 
| 95 94 | 
             
                data[0][:name].should == "Anfield"
         | 
| 96 95 | 
             
                data[0][:street].should == "Anfield Road"
         | 
| @@ -112,7 +111,29 @@ describe 'process files with line endings in automatic mode' do | |
| 112 111 |  | 
| 113 112 | 
             
              it 'should process a file with \r for line endings and within data fields' do
         | 
| 114 113 | 
             
                sep = "\r"
         | 
| 115 | 
            -
                data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv",  | 
| 114 | 
            +
                data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv", options)
         | 
| 115 | 
            +
                data.flatten.size.should == 8
         | 
| 116 | 
            +
                data[0][:name].should == "Anfield"
         | 
| 117 | 
            +
                data[0][:street].should == "Anfield Road"
         | 
| 118 | 
            +
                data[0][:city].should == "Liverpool"
         | 
| 119 | 
            +
                data[1][:name].should == ["Highbury", "Highbury House"].join(sep)
         | 
| 120 | 
            +
                data[2][:street].should == ["Sir Matt ", "Busby Way"].join(sep)
         | 
| 121 | 
            +
                data[3][:city].should == ["Newcastle-upon-tyne ", "Tyne and Wear"].join(sep)
         | 
| 122 | 
            +
                data[4][:name].should == ["White Hart Lane", "(The Lane)"].join(sep)
         | 
| 123 | 
            +
                data[4][:street].should == ["Bill Nicholson Way ", "748 High Rd"].join(sep)
         | 
| 124 | 
            +
                data[4][:city].should == ["Tottenham", "London"].join(sep)
         | 
| 125 | 
            +
                data[5][:name].should == "Stamford Bridge"
         | 
| 126 | 
            +
                data[5][:street].should == ["Fulham Road", "London"].join(sep)
         | 
| 127 | 
            +
                data[5][:city].should be_nil
         | 
| 128 | 
            +
                data[6][:name].should == ["Etihad Stadium", "Rowsley St", "Manchester"].join(sep)
         | 
| 129 | 
            +
                data[7][:name].should == "Goodison"
         | 
| 130 | 
            +
                data[7][:street].should == "Goodison Road"
         | 
| 131 | 
            +
                data[7][:city].should == "Liverpool"
         | 
| 132 | 
            +
              end
         | 
| 133 | 
            +
             | 
| 134 | 
            +
              it 'also works when auto is given a string' do
         | 
| 135 | 
            +
                sep = "\r"
         | 
| 136 | 
            +
                data = SmarterCSV.process("#{fixture_path}/carriage_returns_r.csv", {row_sep: 'auto'})
         | 
| 116 137 | 
             
                data.flatten.size.should == 8
         | 
| 117 138 | 
             
                data[0][:name].should == "Anfield"
         | 
| 118 139 | 
             
                data[0][:street].should == "Anfield Road"
         | 
| @@ -134,7 +155,7 @@ describe 'process files with line endings in automatic mode' do | |
| 134 155 |  | 
| 135 156 | 
             
              it 'should process a file with \r\n for line endings and within data fields' do
         | 
| 136 157 | 
             
                sep = "\r\n"
         | 
| 137 | 
            -
                data = SmarterCSV.process("#{fixture_path}/carriage_returns_rn.csv",  | 
| 158 | 
            +
                data = SmarterCSV.process("#{fixture_path}/carriage_returns_rn.csv", options)
         | 
| 138 159 | 
             
                data.flatten.size.should == 8
         | 
| 139 160 | 
             
                data[0][:name].should == "Anfield"
         | 
| 140 161 | 
             
                data[0][:street].should == "Anfield Road"
         | 
| @@ -157,7 +178,7 @@ describe 'process files with line endings in automatic mode' do | |
| 157 178 | 
             
              it 'should process a file with more quoted text carriage return characters (\r) than line ending characters (\n)' do
         | 
| 158 179 | 
             
                row_sep = "\n"
         | 
| 159 180 | 
             
                text_sep = "\r"
         | 
| 160 | 
            -
                data = SmarterCSV.process("#{fixture_path}/carriage_returns_quoted.csv",  | 
| 181 | 
            +
                data = SmarterCSV.process("#{fixture_path}/carriage_returns_quoted.csv", options)
         | 
| 161 182 | 
             
                data.flatten.size.should == 2
         | 
| 162 183 | 
             
                data[0][:band].should == "New Order"
         | 
| 163 184 | 
             
                data[0][:members].should == ["Bernard Sumner", "Peter Hook", "Stephen Morris", "Gillian Gilbert"].join(text_sep)
         | 
| @@ -166,5 +187,4 @@ describe 'process files with line endings in automatic mode' do | |
| 166 187 | 
             
                data[1][:members].should == ["Jimmy Page", "Robert Plant", "John Bonham", "John Paul Jones"].join(text_sep)
         | 
| 167 188 | 
             
                data[1][:albums].should == ["Led Zeppelin", "Led Zeppelin II", "Led Zeppelin III", "Led Zeppelin IV"].join(text_sep)
         | 
| 168 189 | 
             
              end
         | 
| 169 | 
            -
             | 
| 170 190 | 
             
            end
         |