smarter_csv 1.1.1 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +20 -6
- data/lib/smarter_csv/smarter_csv.rb +8 -2
- data/lib/smarter_csv/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 430c575cd14bb098e7754d0228186f0a29007495
|
4
|
+
data.tar.gz: 903d566d2c3569c954ecabf5ebf310c8f9aa5234
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c846301856cbcf8e76efcf42d1d532a3317e6fdfce9ca06577794bae11b1e739086c6d40df70210fd3f0c75836bb9265ccff4d6d286fa998af735d43ef5c46ae
|
7
|
+
data.tar.gz: 52274d68e87f3e194be610bd69cfee6b084c193e389cf3591399a70e4051f23d1a2206c4d61aadb25c6c63c8675f285ef343e022f9dc70c95aac518f849ed005
|
data/README.md
CHANGED
@@ -204,7 +204,8 @@ The options and the block are optional.
|
|
204
204
|
| | | Important if the file does not contain headers, |
|
205
205
|
| | | otherwise you would lose the first line of data. |
|
206
206
|
| :skip_lines | nil | how many lines to skip before the first line or header line is processed |
|
207
|
-
| :force_utf8 | false | force UTF-8 encoding of all lines (including headers) in the CSV file |
|
207
|
+
| :force_utf8 | false | force UTF-8 encoding of all lines (including headers) in the CSV file |
|
208
|
+
| :invalid_byte_sequence | '' | how to replace invalid byte sequences with |
|
208
209
|
---------------------------------------------------------------------------------------------------------------------------------
|
209
210
|
| :value_converters | nil | supply a hash of :header => KlassName; the class needs to implement self.convert(val)|
|
210
211
|
| :remove_empty_values | true | remove values which have nil or empty strings as values |
|
@@ -224,10 +225,17 @@ The options and the block are optional.
|
|
224
225
|
* if you have a CSV file which contains unicode characters, you can process it as follows:
|
225
226
|
|
226
227
|
|
227
|
-
|
228
|
-
|
229
|
-
|
228
|
+
File.open(filename, "r:bom|utf-8") do |f|
|
229
|
+
data = SmarterCSV.process(f);
|
230
|
+
end
|
231
|
+
|
232
|
+
* if the CSV file with unicode characters is in a remote location, similarly you need to give the encoding as an option to the `open` call:
|
230
233
|
|
234
|
+
require 'open-uri'
|
235
|
+
file_location = 'http://your.remote.org/sample.csv'
|
236
|
+
open(file_location, 'r:utf-8') do |f| # don't forget to specify the UTF-8 encoding!!
|
237
|
+
data = SmarterCSV.process(f)
|
238
|
+
end
|
231
239
|
|
232
240
|
#### NOTES about CSV Headers:
|
233
241
|
* as this method parses CSV files, it is assumed that the first line of any file will contain a valid header
|
@@ -285,14 +293,18 @@ Planned in the next releases:
|
|
285
293
|
|
286
294
|
## Changes
|
287
295
|
|
288
|
-
#### 1.1.
|
296
|
+
#### 1.1.2 (2016-12-29)
|
297
|
+
* added option `invalid_byte_sequence` (thanks to polycarpou)
|
298
|
+
* added comments on handling of UTF-8 encoding when opening from File vs. OpenURI (thanks to KevinColemanInc)
|
299
|
+
|
300
|
+
#### 1.1.1 (2016-11-26)
|
289
301
|
* added option to `skip_lines` (thanks to wal)
|
290
302
|
* added option to `force_utf8` encoding (thanks to jordangraft)
|
291
303
|
* bugfix if no headers in input data (thanks to esBeee)
|
292
304
|
* ensure input file is closed (thanks to waldyr)
|
293
305
|
* improved verbose output (thankd to benmaher)
|
294
306
|
* improved documentation
|
295
|
-
|
307
|
+
|
296
308
|
#### 1.1.0 (2015-07-26)
|
297
309
|
* added feature :value_converters, which allows parsing of dates, money, and other things (thanks to Raphaël Bleuse, Lucas Camargo de Almeida, Alejandro)
|
298
310
|
* added error if :headers_in_file is set to false, and no :user_provided_headers are given (thanks to innhyu)
|
@@ -428,6 +440,8 @@ And a special thanks to those who contributed pull requests:
|
|
428
440
|
* [Ben Maher](https://github.com/benmaher)
|
429
441
|
* [Wal McConnell](https://github.com/wal)
|
430
442
|
* [Jordan Graft](https://github.com/jordangraft)
|
443
|
+
* [Michael](https://github.com/polycarpou)
|
444
|
+
* [Kevin Coleman](https://github.com/KevinColemanInc)
|
431
445
|
|
432
446
|
|
433
447
|
## Contributing
|
@@ -9,9 +9,10 @@ module SmarterCSV
|
|
9
9
|
:remove_empty_values => true, :remove_zero_values => false , :remove_values_matching => nil , :remove_empty_hashes => true , :strip_whitespace => true,
|
10
10
|
:convert_values_to_numeric => true, :strip_chars_from_headers => nil , :user_provided_headers => nil , :headers_in_file => true,
|
11
11
|
:comment_regexp => /^#/, :chunk_size => nil , :key_mapping_hash => nil , :downcase_header => true, :strings_as_keys => false, :file_encoding => 'utf-8',
|
12
|
-
:remove_unmapped_keys => false, :keep_original_headers => false, :value_converters => nil, :skip_lines => nil, :force_utf8 => false
|
12
|
+
:remove_unmapped_keys => false, :keep_original_headers => false, :value_converters => nil, :skip_lines => nil, :force_utf8 => false, :invalid_byte_sequence => ''
|
13
13
|
}
|
14
14
|
options = default_options.merge(options)
|
15
|
+
options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
|
15
16
|
csv_options = options.select{|k,v| [:col_sep, :row_sep, :quote_char].include?(k)} # options.slice(:col_sep, :row_sep, :quote_char)
|
16
17
|
headerA = []
|
17
18
|
result = []
|
@@ -35,7 +36,8 @@ module SmarterCSV
|
|
35
36
|
# process the header line in the CSV file..
|
36
37
|
# the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
|
37
38
|
header = f.readline.sub(options[:comment_regexp],'').chomp(options[:row_sep])
|
38
|
-
header = header.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace:
|
39
|
+
header = header.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] == 'utf-8'
|
40
|
+
|
39
41
|
file_line_count += 1
|
40
42
|
csv_line_count += 1
|
41
43
|
header = header.gsub(options[:strip_chars_from_headers], '') if options[:strip_chars_from_headers]
|
@@ -99,6 +101,10 @@ module SmarterCSV
|
|
99
101
|
while ! f.eof? # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
|
100
102
|
line = f.readline # read one line.. this uses the input_record_separator $/ which we set previously!
|
101
103
|
line = line.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') if options[:force_utf8]
|
104
|
+
|
105
|
+
# replace invalid byte sequence in UTF-8 with question mark to avoid errors
|
106
|
+
line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] == 'utf-8'
|
107
|
+
|
102
108
|
file_line_count += 1
|
103
109
|
csv_line_count += 1
|
104
110
|
print "processing file line %10d, csv line %10d\r" % [file_line_count, csv_line_count] if options[:verbose]
|
data/lib/smarter_csv/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: smarter_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- |
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-12-30 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|