smarter_csv 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 34d6c592bebe9d6b1d8f87f9f59ecf4a7d3b3a9d
4
- data.tar.gz: fe722f38c4962a312c4db7e14cb72e735426db82
3
+ metadata.gz: 430c575cd14bb098e7754d0228186f0a29007495
4
+ data.tar.gz: 903d566d2c3569c954ecabf5ebf310c8f9aa5234
5
5
  SHA512:
6
- metadata.gz: 75f5c56cfdeeef41be34f17bfbec30ae201c41463ad9aa6c7da7b5031c63fa1da27ef5120c245329eec108551fae722a5d156e09ead62c2e6aa34ce8edfe4cd8
7
- data.tar.gz: 2d3b83fa5e7f4eada8d7f03df50890441f594b8db6d34df4d2c07164c73beda7b9963d596ce43d2ad74ffddb83efb4bdea8ca83d780d7f3129c258c6d21bdb70
6
+ metadata.gz: c846301856cbcf8e76efcf42d1d532a3317e6fdfce9ca06577794bae11b1e739086c6d40df70210fd3f0c75836bb9265ccff4d6d286fa998af735d43ef5c46ae
7
+ data.tar.gz: 52274d68e87f3e194be610bd69cfee6b084c193e389cf3591399a70e4051f23d1a2206c4d61aadb25c6c63c8675f285ef343e022f9dc70c95aac518f849ed005
data/README.md CHANGED
@@ -204,7 +204,8 @@ The options and the block are optional.
204
204
  | | | Important if the file does not contain headers, |
205
205
  | | | otherwise you would lose the first line of data. |
206
206
  | :skip_lines | nil | how many lines to skip before the first line or header line is processed |
207
- | :force_utf8 | false | force UTF-8 encoding of all lines (including headers) in the CSV file |
207
+ | :force_utf8 | false | force UTF-8 encoding of all lines (including headers) in the CSV file |
208
+ | :invalid_byte_sequence | '' | how to replace invalid byte sequences with |
208
209
  ---------------------------------------------------------------------------------------------------------------------------------
209
210
  | :value_converters | nil | supply a hash of :header => KlassName; the class needs to implement self.convert(val)|
210
211
  | :remove_empty_values | true | remove values which have nil or empty strings as values |
@@ -224,10 +225,17 @@ The options and the block are optional.
224
225
  * if you have a CSV file which contains unicode characters, you can process it as follows:
225
226
 
226
227
 
227
- f = File.open(filename, "r:bom|utf-8");
228
- data = SmarterCSV.process(f);
229
- f.close
228
+ File.open(filename, "r:bom|utf-8") do |f|
229
+ data = SmarterCSV.process(f);
230
+ end
231
+
232
+ * if the CSV file with unicode characters is in a remote location, similarly you need to give the encoding as an option to the `open` call:
230
233
 
234
+ require 'open-uri'
235
+ file_location = 'http://your.remote.org/sample.csv'
236
+ open(file_location, 'r:utf-8') do |f| # don't forget to specify the UTF-8 encoding!!
237
+ data = SmarterCSV.process(f)
238
+ end
231
239
 
232
240
  #### NOTES about CSV Headers:
233
241
  * as this method parses CSV files, it is assumed that the first line of any file will contain a valid header
@@ -285,14 +293,18 @@ Planned in the next releases:
285
293
 
286
294
  ## Changes
287
295
 
288
- #### 1.1.1 (2016-11-26)
296
+ #### 1.1.2 (2016-12-29)
297
+ * added option `invalid_byte_sequence` (thanks to polycarpou)
298
+ * added comments on handling of UTF-8 encoding when opening from File vs. OpenURI (thanks to KevinColemanInc)
299
+
300
+ #### 1.1.1 (2016-11-26)
289
301
  * added option to `skip_lines` (thanks to wal)
290
302
  * added option to `force_utf8` encoding (thanks to jordangraft)
291
303
  * bugfix if no headers in input data (thanks to esBeee)
292
304
  * ensure input file is closed (thanks to waldyr)
293
305
  * improved verbose output (thankd to benmaher)
294
306
  * improved documentation
295
-
307
+
296
308
  #### 1.1.0 (2015-07-26)
297
309
  * added feature :value_converters, which allows parsing of dates, money, and other things (thanks to Raphaël Bleuse, Lucas Camargo de Almeida, Alejandro)
298
310
  * added error if :headers_in_file is set to false, and no :user_provided_headers are given (thanks to innhyu)
@@ -428,6 +440,8 @@ And a special thanks to those who contributed pull requests:
428
440
  * [Ben Maher](https://github.com/benmaher)
429
441
  * [Wal McConnell](https://github.com/wal)
430
442
  * [Jordan Graft](https://github.com/jordangraft)
443
+ * [Michael](https://github.com/polycarpou)
444
+ * [Kevin Coleman](https://github.com/KevinColemanInc)
431
445
 
432
446
 
433
447
  ## Contributing
@@ -9,9 +9,10 @@ module SmarterCSV
9
9
  :remove_empty_values => true, :remove_zero_values => false , :remove_values_matching => nil , :remove_empty_hashes => true , :strip_whitespace => true,
10
10
  :convert_values_to_numeric => true, :strip_chars_from_headers => nil , :user_provided_headers => nil , :headers_in_file => true,
11
11
  :comment_regexp => /^#/, :chunk_size => nil , :key_mapping_hash => nil , :downcase_header => true, :strings_as_keys => false, :file_encoding => 'utf-8',
12
- :remove_unmapped_keys => false, :keep_original_headers => false, :value_converters => nil, :skip_lines => nil, :force_utf8 => false
12
+ :remove_unmapped_keys => false, :keep_original_headers => false, :value_converters => nil, :skip_lines => nil, :force_utf8 => false, :invalid_byte_sequence => ''
13
13
  }
14
14
  options = default_options.merge(options)
15
+ options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
15
16
  csv_options = options.select{|k,v| [:col_sep, :row_sep, :quote_char].include?(k)} # options.slice(:col_sep, :row_sep, :quote_char)
16
17
  headerA = []
17
18
  result = []
@@ -35,7 +36,8 @@ module SmarterCSV
35
36
  # process the header line in the CSV file..
36
37
  # the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
37
38
  header = f.readline.sub(options[:comment_regexp],'').chomp(options[:row_sep])
38
- header = header.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') if options[:force_utf8]
39
+ header = header.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] == 'utf-8'
40
+
39
41
  file_line_count += 1
40
42
  csv_line_count += 1
41
43
  header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
@@ -99,6 +101,10 @@ module SmarterCSV
99
101
  while ! f.eof? # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
100
102
  line = f.readline # read one line.. this uses the input_record_separator $/ which we set previously!
101
103
  line = line.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') if options[:force_utf8]
104
+
105
+ # replace invalid byte sequence in UTF-8 with question mark to avoid errors
106
+ line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] == 'utf-8'
107
+
102
108
  file_line_count += 1
103
109
  csv_line_count += 1
104
110
  print "processing file line %10d, csv line %10d\r" % [file_line_count, csv_line_count] if options[:verbose]
@@ -1,3 +1,3 @@
1
1
  module SmarterCSV
2
- VERSION = "1.1.1"
2
+ VERSION = "1.1.2"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: smarter_csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - |
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-11-26 00:00:00.000000000 Z
12
+ date: 2016-12-30 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec