smarter_csv 1.1.1 → 1.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +20 -6
- data/lib/smarter_csv/smarter_csv.rb +8 -2
- data/lib/smarter_csv/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 430c575cd14bb098e7754d0228186f0a29007495
|
4
|
+
data.tar.gz: 903d566d2c3569c954ecabf5ebf310c8f9aa5234
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c846301856cbcf8e76efcf42d1d532a3317e6fdfce9ca06577794bae11b1e739086c6d40df70210fd3f0c75836bb9265ccff4d6d286fa998af735d43ef5c46ae
|
7
|
+
data.tar.gz: 52274d68e87f3e194be610bd69cfee6b084c193e389cf3591399a70e4051f23d1a2206c4d61aadb25c6c63c8675f285ef343e022f9dc70c95aac518f849ed005
|
data/README.md
CHANGED
@@ -204,7 +204,8 @@ The options and the block are optional.
|
|
204
204
|
| | | Important if the file does not contain headers, |
|
205
205
|
| | | otherwise you would lose the first line of data. |
|
206
206
|
| :skip_lines | nil | how many lines to skip before the first line or header line is processed |
|
207
|
-
| :force_utf8 | false | force UTF-8 encoding of all lines (including headers) in the CSV file |
|
207
|
+
| :force_utf8 | false | force UTF-8 encoding of all lines (including headers) in the CSV file |
|
208
|
+
| :invalid_byte_sequence | '' | how to replace invalid byte sequences with |
|
208
209
|
---------------------------------------------------------------------------------------------------------------------------------
|
209
210
|
| :value_converters | nil | supply a hash of :header => KlassName; the class needs to implement self.convert(val)|
|
210
211
|
| :remove_empty_values | true | remove values which have nil or empty strings as values |
|
@@ -224,10 +225,17 @@ The options and the block are optional.
|
|
224
225
|
* if you have a CSV file which contains unicode characters, you can process it as follows:
|
225
226
|
|
226
227
|
|
227
|
-
|
228
|
-
|
229
|
-
|
228
|
+
File.open(filename, "r:bom|utf-8") do |f|
|
229
|
+
data = SmarterCSV.process(f);
|
230
|
+
end
|
231
|
+
|
232
|
+
* if the CSV file with unicode characters is in a remote location, similarly you need to give the encoding as an option to the `open` call:
|
230
233
|
|
234
|
+
require 'open-uri'
|
235
|
+
file_location = 'http://your.remote.org/sample.csv'
|
236
|
+
open(file_location, 'r:utf-8') do |f| # don't forget to specify the UTF-8 encoding!!
|
237
|
+
data = SmarterCSV.process(f)
|
238
|
+
end
|
231
239
|
|
232
240
|
#### NOTES about CSV Headers:
|
233
241
|
* as this method parses CSV files, it is assumed that the first line of any file will contain a valid header
|
@@ -285,14 +293,18 @@ Planned in the next releases:
|
|
285
293
|
|
286
294
|
## Changes
|
287
295
|
|
288
|
-
#### 1.1.
|
296
|
+
#### 1.1.2 (2016-12-29)
|
297
|
+
* added option `invalid_byte_sequence` (thanks to polycarpou)
|
298
|
+
* added comments on handling of UTF-8 encoding when opening from File vs. OpenURI (thanks to KevinColemanInc)
|
299
|
+
|
300
|
+
#### 1.1.1 (2016-11-26)
|
289
301
|
* added option to `skip_lines` (thanks to wal)
|
290
302
|
* added option to `force_utf8` encoding (thanks to jordangraft)
|
291
303
|
* bugfix if no headers in input data (thanks to esBeee)
|
292
304
|
* ensure input file is closed (thanks to waldyr)
|
293
305
|
* improved verbose output (thankd to benmaher)
|
294
306
|
* improved documentation
|
295
|
-
|
307
|
+
|
296
308
|
#### 1.1.0 (2015-07-26)
|
297
309
|
* added feature :value_converters, which allows parsing of dates, money, and other things (thanks to Raphaël Bleuse, Lucas Camargo de Almeida, Alejandro)
|
298
310
|
* added error if :headers_in_file is set to false, and no :user_provided_headers are given (thanks to innhyu)
|
@@ -428,6 +440,8 @@ And a special thanks to those who contributed pull requests:
|
|
428
440
|
* [Ben Maher](https://github.com/benmaher)
|
429
441
|
* [Wal McConnell](https://github.com/wal)
|
430
442
|
* [Jordan Graft](https://github.com/jordangraft)
|
443
|
+
* [Michael](https://github.com/polycarpou)
|
444
|
+
* [Kevin Coleman](https://github.com/KevinColemanInc)
|
431
445
|
|
432
446
|
|
433
447
|
## Contributing
|
@@ -9,9 +9,10 @@ module SmarterCSV
|
|
9
9
|
:remove_empty_values => true, :remove_zero_values => false , :remove_values_matching => nil , :remove_empty_hashes => true , :strip_whitespace => true,
|
10
10
|
:convert_values_to_numeric => true, :strip_chars_from_headers => nil , :user_provided_headers => nil , :headers_in_file => true,
|
11
11
|
:comment_regexp => /^#/, :chunk_size => nil , :key_mapping_hash => nil , :downcase_header => true, :strings_as_keys => false, :file_encoding => 'utf-8',
|
12
|
-
:remove_unmapped_keys => false, :keep_original_headers => false, :value_converters => nil, :skip_lines => nil, :force_utf8 => false
|
12
|
+
:remove_unmapped_keys => false, :keep_original_headers => false, :value_converters => nil, :skip_lines => nil, :force_utf8 => false, :invalid_byte_sequence => ''
|
13
13
|
}
|
14
14
|
options = default_options.merge(options)
|
15
|
+
options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
|
15
16
|
csv_options = options.select{|k,v| [:col_sep, :row_sep, :quote_char].include?(k)} # options.slice(:col_sep, :row_sep, :quote_char)
|
16
17
|
headerA = []
|
17
18
|
result = []
|
@@ -35,7 +36,8 @@ module SmarterCSV
|
|
35
36
|
# process the header line in the CSV file..
|
36
37
|
# the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
|
37
38
|
header = f.readline.sub(options[:comment_regexp],'').chomp(options[:row_sep])
|
38
|
-
header = header.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace:
|
39
|
+
header = header.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] == 'utf-8'
|
40
|
+
|
39
41
|
file_line_count += 1
|
40
42
|
csv_line_count += 1
|
41
43
|
header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
|
@@ -99,6 +101,10 @@ module SmarterCSV
|
|
99
101
|
while ! f.eof? # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
|
100
102
|
line = f.readline # read one line.. this uses the input_record_separator $/ which we set previously!
|
101
103
|
line = line.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') if options[:force_utf8]
|
104
|
+
|
105
|
+
# replace invalid byte sequence in UTF-8 with question mark to avoid errors
|
106
|
+
line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] == 'utf-8'
|
107
|
+
|
102
108
|
file_line_count += 1
|
103
109
|
csv_line_count += 1
|
104
110
|
print "processing file line %10d, csv line %10d\r" % [file_line_count, csv_line_count] if options[:verbose]
|
data/lib/smarter_csv/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: smarter_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- |
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-12-30 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|