smarter_csv 1.1.1 → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 34d6c592bebe9d6b1d8f87f9f59ecf4a7d3b3a9d
4
- data.tar.gz: fe722f38c4962a312c4db7e14cb72e735426db82
3
+ metadata.gz: 430c575cd14bb098e7754d0228186f0a29007495
4
+ data.tar.gz: 903d566d2c3569c954ecabf5ebf310c8f9aa5234
5
5
  SHA512:
6
- metadata.gz: 75f5c56cfdeeef41be34f17bfbec30ae201c41463ad9aa6c7da7b5031c63fa1da27ef5120c245329eec108551fae722a5d156e09ead62c2e6aa34ce8edfe4cd8
7
- data.tar.gz: 2d3b83fa5e7f4eada8d7f03df50890441f594b8db6d34df4d2c07164c73beda7b9963d596ce43d2ad74ffddb83efb4bdea8ca83d780d7f3129c258c6d21bdb70
6
+ metadata.gz: c846301856cbcf8e76efcf42d1d532a3317e6fdfce9ca06577794bae11b1e739086c6d40df70210fd3f0c75836bb9265ccff4d6d286fa998af735d43ef5c46ae
7
+ data.tar.gz: 52274d68e87f3e194be610bd69cfee6b084c193e389cf3591399a70e4051f23d1a2206c4d61aadb25c6c63c8675f285ef343e022f9dc70c95aac518f849ed005
data/README.md CHANGED
@@ -204,7 +204,8 @@ The options and the block are optional.
204
204
  | | | Important if the file does not contain headers, |
205
205
  | | | otherwise you would lose the first line of data. |
206
206
  | :skip_lines | nil | how many lines to skip before the first line or header line is processed |
207
- | :force_utf8 | false | force UTF-8 encoding of all lines (including headers) in the CSV file |
207
+ | :force_utf8 | false | force UTF-8 encoding of all lines (including headers) in the CSV file |
208
+ | :invalid_byte_sequence | '' | how to replace invalid byte sequences with |
208
209
  ---------------------------------------------------------------------------------------------------------------------------------
209
210
  | :value_converters | nil | supply a hash of :header => KlassName; the class needs to implement self.convert(val)|
210
211
  | :remove_empty_values | true | remove values which have nil or empty strings as values |
@@ -224,10 +225,17 @@ The options and the block are optional.
224
225
  * if you have a CSV file which contains unicode characters, you can process it as follows:
225
226
 
226
227
 
227
- f = File.open(filename, "r:bom|utf-8");
228
- data = SmarterCSV.process(f);
229
- f.close
228
+ File.open(filename, "r:bom|utf-8") do |f|
229
+ data = SmarterCSV.process(f);
230
+ end
231
+
232
+ * if the CSV file with unicode characters is in a remote location, similarly you need to give the encoding as an option to the `open` call:
230
233
 
234
+ require 'open-uri'
235
+ file_location = 'http://your.remote.org/sample.csv'
236
+ open(file_location, 'r:utf-8') do |f| # don't forget to specify the UTF-8 encoding!!
237
+ data = SmarterCSV.process(f)
238
+ end
231
239
 
232
240
  #### NOTES about CSV Headers:
233
241
  * as this method parses CSV files, it is assumed that the first line of any file will contain a valid header
@@ -285,14 +293,18 @@ Planned in the next releases:
285
293
 
286
294
  ## Changes
287
295
 
288
- #### 1.1.1 (2016-11-26)
296
+ #### 1.1.2 (2016-12-29)
297
+ * added option `invalid_byte_sequence` (thanks to polycarpou)
298
+ * added comments on handling of UTF-8 encoding when opening from File vs. OpenURI (thanks to KevinColemanInc)
299
+
300
+ #### 1.1.1 (2016-11-26)
289
301
  * added option to `skip_lines` (thanks to wal)
290
302
  * added option to `force_utf8` encoding (thanks to jordangraft)
291
303
  * bugfix if no headers in input data (thanks to esBeee)
292
304
  * ensure input file is closed (thanks to waldyr)
293
305
  * improved verbose output (thankd to benmaher)
294
306
  * improved documentation
295
-
307
+
296
308
  #### 1.1.0 (2015-07-26)
297
309
  * added feature :value_converters, which allows parsing of dates, money, and other things (thanks to Raphaël Bleuse, Lucas Camargo de Almeida, Alejandro)
298
310
  * added error if :headers_in_file is set to false, and no :user_provided_headers are given (thanks to innhyu)
@@ -428,6 +440,8 @@ And a special thanks to those who contributed pull requests:
428
440
  * [Ben Maher](https://github.com/benmaher)
429
441
  * [Wal McConnell](https://github.com/wal)
430
442
  * [Jordan Graft](https://github.com/jordangraft)
443
+ * [Michael](https://github.com/polycarpou)
444
+ * [Kevin Coleman](https://github.com/KevinColemanInc)
431
445
 
432
446
 
433
447
  ## Contributing
@@ -9,9 +9,10 @@ module SmarterCSV
9
9
  :remove_empty_values => true, :remove_zero_values => false , :remove_values_matching => nil , :remove_empty_hashes => true , :strip_whitespace => true,
10
10
  :convert_values_to_numeric => true, :strip_chars_from_headers => nil , :user_provided_headers => nil , :headers_in_file => true,
11
11
  :comment_regexp => /^#/, :chunk_size => nil , :key_mapping_hash => nil , :downcase_header => true, :strings_as_keys => false, :file_encoding => 'utf-8',
12
- :remove_unmapped_keys => false, :keep_original_headers => false, :value_converters => nil, :skip_lines => nil, :force_utf8 => false
12
+ :remove_unmapped_keys => false, :keep_original_headers => false, :value_converters => nil, :skip_lines => nil, :force_utf8 => false, :invalid_byte_sequence => ''
13
13
  }
14
14
  options = default_options.merge(options)
15
+ options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
15
16
  csv_options = options.select{|k,v| [:col_sep, :row_sep, :quote_char].include?(k)} # options.slice(:col_sep, :row_sep, :quote_char)
16
17
  headerA = []
17
18
  result = []
@@ -35,7 +36,8 @@ module SmarterCSV
35
36
  # process the header line in the CSV file..
36
37
  # the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
37
38
  header = f.readline.sub(options[:comment_regexp],'').chomp(options[:row_sep])
38
- header = header.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') if options[:force_utf8]
39
+ header = header.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] == 'utf-8'
40
+
39
41
  file_line_count += 1
40
42
  csv_line_count += 1
41
43
  header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
@@ -99,6 +101,10 @@ module SmarterCSV
99
101
  while ! f.eof? # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
100
102
  line = f.readline # read one line.. this uses the input_record_separator $/ which we set previously!
101
103
  line = line.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') if options[:force_utf8]
104
+
105
+ # replace invalid byte sequence in UTF-8 with question mark to avoid errors
106
+ line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] == 'utf-8'
107
+
102
108
  file_line_count += 1
103
109
  csv_line_count += 1
104
110
  print "processing file line %10d, csv line %10d\r" % [file_line_count, csv_line_count] if options[:verbose]
@@ -1,3 +1,3 @@
1
1
  module SmarterCSV
2
- VERSION = "1.1.1"
2
+ VERSION = "1.1.2"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: smarter_csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - |
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-11-26 00:00:00.000000000 Z
12
+ date: 2016-12-30 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec