RubyGems - smarter_csv - Versions diffs - 1.1.1 → 1.1.2 - Mend

smarter_csv 1.1.1 → 1.1.2

Files changed (5) hide show

checksums.yaml +4 -4
data/README.md +20 -6
data/lib/smarter_csv/smarter_csv.rb +8 -2
data/lib/smarter_csv/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 34d6c592bebe9d6b1d8f87f9f59ecf4a7d3b3a9d
-  data.tar.gz: fe722f38c4962a312c4db7e14cb72e735426db82
+  metadata.gz: 430c575cd14bb098e7754d0228186f0a29007495
+  data.tar.gz: 903d566d2c3569c954ecabf5ebf310c8f9aa5234
 SHA512:
-  metadata.gz: 75f5c56cfdeeef41be34f17bfbec30ae201c41463ad9aa6c7da7b5031c63fa1da27ef5120c245329eec108551fae722a5d156e09ead62c2e6aa34ce8edfe4cd8
-  data.tar.gz: 2d3b83fa5e7f4eada8d7f03df50890441f594b8db6d34df4d2c07164c73beda7b9963d596ce43d2ad74ffddb83efb4bdea8ca83d780d7f3129c258c6d21bdb70
+  metadata.gz: c846301856cbcf8e76efcf42d1d532a3317e6fdfce9ca06577794bae11b1e739086c6d40df70210fd3f0c75836bb9265ccff4d6d286fa998af735d43ef5c46ae
+  data.tar.gz: 52274d68e87f3e194be610bd69cfee6b084c193e389cf3591399a70e4051f23d1a2206c4d61aadb25c6c63c8675f285ef343e022f9dc70c95aac518f849ed005

data/README.md CHANGED Viewed

@@ -204,7 +204,8 @@ The options and the block are optional.
      |                             |          | Important if the file does not contain headers,                                      |
      |                             |          | otherwise you would lose the first line of data.                                     |
      | :skip_lines                 |   nil    | how many lines to skip before the first line or header line is processed             |
-     | :force_utf8                 |   false  | force UTF-8 encoding of all lines (including headers) in the CSV file                |
+     | :force_utf8                 |   false  | force UTF-8 encoding of all lines (including headers) in the CSV file                |
+     | :invalid_byte_sequence      |   ''     | how to replace invalid byte sequences with                                           |
      ---------------------------------------------------------------------------------------------------------------------------------
      | :value_converters           |   nil    | supply a hash of :header => KlassName; the class needs to implement self.convert(val)|
      | :remove_empty_values        |   true   | remove values which have nil or empty strings as values                              |
@@ -224,10 +225,17 @@ The options and the block are optional.
  * if you have a CSV file which contains unicode characters, you can process it as follows:
-       f = File.open(filename, "r:bom|utf-8");
-       data = SmarterCSV.process(f);
-       f.close
+       File.open(filename, "r:bom|utf-8") do |f|
+         data = SmarterCSV.process(f);
+       end
+* if the CSV file with unicode characters is in a remote location, similarly you need to give the encoding as an option to the `open` call:
+       require 'open-uri'
+       file_location = 'http://your.remote.org/sample.csv'
+       open(file_location, 'r:utf-8') do |f|   # don't forget to specify the UTF-8 encoding!!
+         data = SmarterCSV.process(f)
+       end
 #### NOTES about CSV Headers:
  * as this method parses CSV files, it is assumed that the first line of any file will contain a valid header
@@ -285,14 +293,18 @@ Planned in the next releases:
 ## Changes
-#### 1.1.1 (2016-11-26)
+#### 1.1.2 (2016-12-29)
+ * added option `invalid_byte_sequence` (thanks to polycarpou)
+ * added comments on handling of UTF-8 encoding when opening from File vs. OpenURI (thanks to KevinColemanInc)
+#### 1.1.1 (2016-11-26)
  * added option to `skip_lines` (thanks to wal)
  * added option to `force_utf8` encoding (thanks to jordangraft)
  * bugfix if no headers in input data (thanks to esBeee)
  * ensure input file is closed (thanks to waldyr)
  * improved verbose output (thankd to benmaher)
  * improved documentation
 #### 1.1.0 (2015-07-26)
  * added feature :value_converters, which allows parsing of dates, money, and other things (thanks to Raphaël Bleuse, Lucas Camargo de Almeida, Alejandro)
  * added error if :headers_in_file is set to false, and no :user_provided_headers are given (thanks to innhyu)
@@ -428,6 +440,8 @@ And a special thanks to those who contributed pull requests:
  * [Ben Maher](https://github.com/benmaher)
  * [Wal McConnell](https://github.com/wal)
  * [Jordan Graft](https://github.com/jordangraft)
+ * [Michael](https://github.com/polycarpou)
+ * [Kevin Coleman](https://github.com/KevinColemanInc)
 ## Contributing

data/lib/smarter_csv/smarter_csv.rb CHANGED Viewed

@@ -9,9 +9,10 @@ module SmarterCSV
       :remove_empty_values => true, :remove_zero_values => false , :remove_values_matching => nil , :remove_empty_hashes => true , :strip_whitespace => true,
       :convert_values_to_numeric => true, :strip_chars_from_headers => nil , :user_provided_headers => nil , :headers_in_file => true,
       :comment_regexp => /^#/, :chunk_size => nil , :key_mapping_hash => nil , :downcase_header => true, :strings_as_keys => false, :file_encoding => 'utf-8',
-      :remove_unmapped_keys => false, :keep_original_headers => false, :value_converters => nil, :skip_lines => nil, :force_utf8 => false
+      :remove_unmapped_keys => false, :keep_original_headers => false, :value_converters => nil, :skip_lines => nil, :force_utf8 => false, :invalid_byte_sequence => ''
     }
     options = default_options.merge(options)
+    options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
     csv_options = options.select{|k,v| [:col_sep, :row_sep, :quote_char].include?(k)} # options.slice(:col_sep, :row_sep, :quote_char)
     headerA = []
     result = []
@@ -35,7 +36,8 @@ module SmarterCSV
         # process the header line in the CSV file..
         # the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
         header = f.readline.sub(options[:comment_regexp],'').chomp(options[:row_sep])
-        header = header.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') if options[:force_utf8]
+        header = header.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] == 'utf-8'
         file_line_count += 1
         csv_line_count += 1
         header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
@@ -99,6 +101,10 @@ module SmarterCSV
       while ! f.eof?    # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
         line = f.readline  # read one line.. this uses the input_record_separator $/ which we set previously!
         line = line.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') if options[:force_utf8]
+        # replace invalid byte sequence in UTF-8 with question mark to avoid errors
+        line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] == 'utf-8'
         file_line_count += 1
         csv_line_count += 1
         print "processing file line %10d, csv line %10d\r" % [file_line_count, csv_line_count] if options[:verbose]

data/lib/smarter_csv/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module SmarterCSV
-  VERSION = "1.1.1"
+  VERSION = "1.1.2"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: smarter_csv
 version: !ruby/object:Gem::Version
-  version: 1.1.1
+  version: 1.1.2
 platform: ruby
 authors:
 - |
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-11-26 00:00:00.000000000 Z
+date: 2016-12-30 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec