RubyGems - smarter_csv - Versions diffs - 1.1.1 → 1.1.2 - Mend

smarter_csv 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml +4 -4
data/README.md +20 -6
data/lib/smarter_csv/smarter_csv.rb +8 -2
data/lib/smarter_csv/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 34d6c592bebe9d6b1d8f87f9f59ecf4a7d3b3a9d
-  data.tar.gz: fe722f38c4962a312c4db7e14cb72e735426db82
+  metadata.gz: 430c575cd14bb098e7754d0228186f0a29007495
+  data.tar.gz: 903d566d2c3569c954ecabf5ebf310c8f9aa5234
 SHA512:
-  metadata.gz: 75f5c56cfdeeef41be34f17bfbec30ae201c41463ad9aa6c7da7b5031c63fa1da27ef5120c245329eec108551fae722a5d156e09ead62c2e6aa34ce8edfe4cd8
-  data.tar.gz: 2d3b83fa5e7f4eada8d7f03df50890441f594b8db6d34df4d2c07164c73beda7b9963d596ce43d2ad74ffddb83efb4bdea8ca83d780d7f3129c258c6d21bdb70
+  metadata.gz: c846301856cbcf8e76efcf42d1d532a3317e6fdfce9ca06577794bae11b1e739086c6d40df70210fd3f0c75836bb9265ccff4d6d286fa998af735d43ef5c46ae
+  data.tar.gz: 52274d68e87f3e194be610bd69cfee6b084c193e389cf3591399a70e4051f23d1a2206c4d61aadb25c6c63c8675f285ef343e022f9dc70c95aac518f849ed005

data/README.md CHANGED Viewed

@@ -204,7 +204,8 @@ The options and the block are optional.
      |                             |          | Important if the file does not contain headers,                                      |
      |                             |          | otherwise you would lose the first line of data.                                     |
      | :skip_lines                 |   nil    | how many lines to skip before the first line or header line is processed             |
-     | :force_utf8                 |   false  | force UTF-8 encoding of all lines (including headers) in the CSV file                |
+     | :force_utf8                 |   false  | force UTF-8 encoding of all lines (including headers) in the CSV file                |
+     | :invalid_byte_sequence      |   ''     | how to replace invalid byte sequences with                                           |
      ---------------------------------------------------------------------------------------------------------------------------------
      | :value_converters           |   nil    | supply a hash of :header => KlassName; the class needs to implement self.convert(val)|
      | :remove_empty_values        |   true   | remove values which have nil or empty strings as values                              |
@@ -224,10 +225,17 @@ The options and the block are optional.
  * if you have a CSV file which contains unicode characters, you can process it as follows:
-       f = File.open(filename, "r:bom|utf-8");
-       data = SmarterCSV.process(f);
-       f.close
+       File.open(filename, "r:bom|utf-8") do |f|
+         data = SmarterCSV.process(f);
+       end
+* if the CSV file with unicode characters is in a remote location, similarly you need to give the encoding as an option to the `open` call:
+       require 'open-uri'
+       file_location = 'http://your.remote.org/sample.csv'
+       open(file_location, 'r:utf-8') do |f|   # don't forget to specify the UTF-8 encoding!!
+         data = SmarterCSV.process(f)
+       end
 #### NOTES about CSV Headers:
  * as this method parses CSV files, it is assumed that the first line of any file will contain a valid header
@@ -285,14 +293,18 @@ Planned in the next releases:
 ## Changes
-#### 1.1.1 (2016-11-26)
+#### 1.1.2 (2016-12-29)
+ * added option `invalid_byte_sequence` (thanks to polycarpou)
+ * added comments on handling of UTF-8 encoding when opening from File vs. OpenURI (thanks to KevinColemanInc)
+#### 1.1.1 (2016-11-26)
  * added option to `skip_lines` (thanks to wal)
  * added option to `force_utf8` encoding (thanks to jordangraft)
  * bugfix if no headers in input data (thanks to esBeee)
  * ensure input file is closed (thanks to waldyr)
  * improved verbose output (thankd to benmaher)
  * improved documentation
 #### 1.1.0 (2015-07-26)
  * added feature :value_converters, which allows parsing of dates, money, and other things (thanks to Raphaël Bleuse, Lucas Camargo de Almeida, Alejandro)
  * added error if :headers_in_file is set to false, and no :user_provided_headers are given (thanks to innhyu)
@@ -428,6 +440,8 @@ And a special thanks to those who contributed pull requests:
  * [Ben Maher](https://github.com/benmaher)
  * [Wal McConnell](https://github.com/wal)
  * [Jordan Graft](https://github.com/jordangraft)
+ * [Michael](https://github.com/polycarpou)
+ * [Kevin Coleman](https://github.com/KevinColemanInc)
 ## Contributing

data/lib/smarter_csv/smarter_csv.rb CHANGED Viewed

@@ -9,9 +9,10 @@ module SmarterCSV
       :remove_empty_values => true, :remove_zero_values => false , :remove_values_matching => nil , :remove_empty_hashes => true , :strip_whitespace => true,
       :convert_values_to_numeric => true, :strip_chars_from_headers => nil , :user_provided_headers => nil , :headers_in_file => true,
       :comment_regexp => /^#/, :chunk_size => nil , :key_mapping_hash => nil , :downcase_header => true, :strings_as_keys => false, :file_encoding => 'utf-8',
-      :remove_unmapped_keys => false, :keep_original_headers => false, :value_converters => nil, :skip_lines => nil, :force_utf8 => false
+      :remove_unmapped_keys => false, :keep_original_headers => false, :value_converters => nil, :skip_lines => nil, :force_utf8 => false, :invalid_byte_sequence => ''
     }
     options = default_options.merge(options)
+    options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
     csv_options = options.select{|k,v| [:col_sep, :row_sep, :quote_char].include?(k)} # options.slice(:col_sep, :row_sep, :quote_char)
     headerA = []
     result = []
@@ -35,7 +36,8 @@ module SmarterCSV
         # process the header line in the CSV file..
         # the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
         header = f.readline.sub(options[:comment_regexp],'').chomp(options[:row_sep])
-        header = header.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') if options[:force_utf8]
+        header = header.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] == 'utf-8'
         file_line_count += 1
         csv_line_count += 1
         header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
@@ -99,6 +101,10 @@ module SmarterCSV
       while ! f.eof?    # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
         line = f.readline  # read one line.. this uses the input_record_separator $/ which we set previously!
         line = line.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') if options[:force_utf8]
+        # replace invalid byte sequence in UTF-8 with question mark to avoid errors
+        line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] == 'utf-8'
         file_line_count += 1
         csv_line_count += 1
         print "processing file line %10d, csv line %10d\r" % [file_line_count, csv_line_count] if options[:verbose]

data/lib/smarter_csv/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module SmarterCSV
-  VERSION = "1.1.1"
+  VERSION = "1.1.2"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: smarter_csv
 version: !ruby/object:Gem::Version
-  version: 1.1.1
+  version: 1.1.2
 platform: ruby
 authors:
 - |
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-11-26 00:00:00.000000000 Z
+date: 2016-12-30 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec