smarter_csv 1.5.1 → 1.5.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -1
- data/CONTRIBUTORS.md +1 -0
- data/lib/smarter_csv/smarter_csv.rb +25 -17
- data/lib/smarter_csv/version.rb +1 -1
- data/spec/smarter_csv/duplicate_headers_spec.rb +2 -2
- data/spec/smarter_csv/invalid_headers_spec.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 88b9932c898320fb05d5697e155dc0bd3ade887d2fcfab7b660933e230007364
|
4
|
+
data.tar.gz: f0525d9c917aff44f910d4547b8e918faa3beb50d47adc29182df1fc1ec2be19
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 330ad44b9808150f6fdf96dec65d259c2d9cf5eb25e22dc80f63095f4014b065b8aa97a2ba9b814c6cea6f4c0361e04567be403ab78b54d0518b49dc072f36ac
|
7
|
+
data.tar.gz: 27531bd508b5b455a32947badfb85d7e95489ad282a837ef046864806ba7fa12539148ab2fe4c84174fba3ef085dd3adda5d7c070615d684cd99ed0f90b903a3
|
data/CHANGELOG.md
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
|
2
2
|
# SmarterCSV 1.x Change Log
|
3
3
|
|
4
|
-
## 1.5.
|
4
|
+
## 1.5.2 (2022-04-29)
|
5
|
+
* added missing keys to the SmarterCSV::KeyMappingError exception message #189 (thanks to John Dell)
|
6
|
+
|
7
|
+
## 1.5.1 (2022-04-27)
|
5
8
|
* added raising of `KeyMappingError` if `key_mapping` refers to a non-existent key
|
6
9
|
* added option `duplicate_header_suffix` (thanks to Skye Shaw)
|
7
10
|
When given a non-nil string, it uses the suffix to append numbering 2..n to duplicate headers.
|
data/CONTRIBUTORS.md
CHANGED
@@ -18,24 +18,28 @@ module SmarterCSV
|
|
18
18
|
@csv_line_count = 0
|
19
19
|
has_rails = !! defined?(Rails)
|
20
20
|
begin
|
21
|
-
|
21
|
+
fh = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
|
22
22
|
|
23
23
|
# auto-detect the row separator
|
24
|
-
options[:row_sep] = SmarterCSV.guess_line_ending(
|
24
|
+
options[:row_sep] = SmarterCSV.guess_line_ending(fh, options) if options[:row_sep].to_sym == :auto
|
25
25
|
# attempt to auto-detect column separator
|
26
|
-
options[:col_sep] = guess_column_separator(
|
26
|
+
options[:col_sep] = guess_column_separator(fh, options) if options[:col_sep].to_sym == :auto
|
27
27
|
# preserve options, in case we need to call the CSV class
|
28
28
|
csv_options = options.select{|k,v| [:col_sep, :row_sep, :quote_char].include?(k)} # options.slice(:col_sep, :row_sep, :quote_char)
|
29
29
|
csv_options.delete(:row_sep) if [nil, :auto].include?( options[:row_sep].to_sym )
|
30
30
|
csv_options.delete(:col_sep) if [nil, :auto].include?( options[:col_sep].to_sym )
|
31
31
|
|
32
|
-
if (options[:force_utf8] || options[:file_encoding] =~ /utf-8/i) && (
|
32
|
+
if (options[:force_utf8] || options[:file_encoding] =~ /utf-8/i) && ( fh.respond_to?(:external_encoding) && fh.external_encoding != Encoding.find('UTF-8') || fh.respond_to?(:encoding) && fh.encoding != Encoding.find('UTF-8') )
|
33
33
|
puts 'WARNING: you are trying to process UTF-8 input, but did not open the input with "b:utf-8" option. See README file "NOTES about File Encodings".'
|
34
34
|
end
|
35
35
|
|
36
|
-
|
36
|
+
if options[:skip_lines].to_i > 0
|
37
|
+
options[:skip_lines].to_i.times do
|
38
|
+
readline_with_counts(fh, options)
|
39
|
+
end
|
40
|
+
end
|
37
41
|
|
38
|
-
headerA, header_size = process_headers(
|
42
|
+
headerA, header_size = process_headers(fh, options, csv_options)
|
39
43
|
|
40
44
|
# in case we use chunking.. we'll need to set it up..
|
41
45
|
if ! options[:chunk_size].nil? && options[:chunk_size].to_i > 0
|
@@ -48,10 +52,8 @@ module SmarterCSV
|
|
48
52
|
end
|
49
53
|
|
50
54
|
# now on to processing all the rest of the lines in the CSV file:
|
51
|
-
while !
|
52
|
-
line =
|
53
|
-
@file_line_count += 1
|
54
|
-
@csv_line_count += 1
|
55
|
+
while ! fh.eof? # we can't use fh.readlines() here, because this would read the whole file into memory at once, and eof => true
|
56
|
+
line = readline_with_counts(fh, options)
|
55
57
|
|
56
58
|
# replace invalid byte sequence in UTF-8 with question mark to avoid errors
|
57
59
|
line = line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
@@ -65,7 +67,7 @@ module SmarterCSV
|
|
65
67
|
# by detecting the existence of an uneven number of quote characters
|
66
68
|
multiline = line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
|
67
69
|
while line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
|
68
|
-
next_line =
|
70
|
+
next_line = fh.readline(options[:row_sep])
|
69
71
|
next_line = next_line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
70
72
|
line += next_line
|
71
73
|
@file_line_count += 1
|
@@ -138,7 +140,7 @@ module SmarterCSV
|
|
138
140
|
if use_chunks
|
139
141
|
chunk << hash # append temp result to chunk
|
140
142
|
|
141
|
-
if chunk.size >= chunk_size ||
|
143
|
+
if chunk.size >= chunk_size || fh.eof? # if chunk if full, or EOF reached
|
142
144
|
# do something with the chunk
|
143
145
|
if block_given?
|
144
146
|
yield chunk # do something with the hashes in the chunk in the block
|
@@ -179,7 +181,7 @@ module SmarterCSV
|
|
179
181
|
chunk = [] # initialize for next chunk of data
|
180
182
|
end
|
181
183
|
ensure
|
182
|
-
|
184
|
+
fh.close if fh.respond_to?(:close)
|
183
185
|
end
|
184
186
|
if block_given?
|
185
187
|
return chunk_count # when we do processing through a block we only care how many chunks we processed
|
@@ -190,6 +192,13 @@ module SmarterCSV
|
|
190
192
|
|
191
193
|
private
|
192
194
|
|
195
|
+
def self.readline_with_counts(filehandle, options)
|
196
|
+
line = filehandle.readline(options[:row_sep])
|
197
|
+
@file_line_count += 1
|
198
|
+
@csv_line_count += 1
|
199
|
+
line
|
200
|
+
end
|
201
|
+
|
193
202
|
def self.default_options
|
194
203
|
{
|
195
204
|
auto_row_sep_chars: 500,
|
@@ -314,9 +323,7 @@ module SmarterCSV
|
|
314
323
|
if options[:headers_in_file] # extract the header line
|
315
324
|
# process the header line in the CSV file..
|
316
325
|
# the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
|
317
|
-
header = filehandle
|
318
|
-
@file_line_count += 1
|
319
|
-
@csv_line_count += 1
|
326
|
+
header = readline_with_counts(filehandle, options)
|
320
327
|
|
321
328
|
header = header.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
322
329
|
header = header.sub(options[:comment_regexp],'') if options[:comment_regexp]
|
@@ -371,7 +378,8 @@ module SmarterCSV
|
|
371
378
|
# if you want to completely delete a key, then map it to nil or to ''
|
372
379
|
if ! key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
|
373
380
|
# we can't map keys that are not there
|
374
|
-
|
381
|
+
missing_keys = key_mappingH.keys - headerA
|
382
|
+
raise(SmarterCSV::KeyMappingError, "missing header(s): #{missing_keys.join(",")}") unless missing_keys.empty?
|
375
383
|
|
376
384
|
headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
|
377
385
|
end
|
data/lib/smarter_csv/version.rb
CHANGED
@@ -17,12 +17,12 @@ describe 'duplicate headers' do
|
|
17
17
|
}.to raise_exception(SmarterCSV::DuplicateHeaders)
|
18
18
|
end
|
19
19
|
|
20
|
-
it 'raises error on missing mapped headers' do
|
20
|
+
it 'raises error on missing mapped headers and includes missing headers in message' do
|
21
21
|
expect {
|
22
22
|
# the mapping is right, but the underlying csv file is bad
|
23
23
|
options = {:key_mapping => {:email => :a, :firstname => :b, :lastname => :c, :manager_email => :d, :age => :e} }
|
24
24
|
SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
25
|
-
}.to raise_exception(SmarterCSV::KeyMappingError)
|
25
|
+
}.to raise_exception(SmarterCSV::KeyMappingError, "missing header(s): manager_email")
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
@@ -28,11 +28,11 @@ describe 'test exceptions for invalid headers' do
|
|
28
28
|
}.to raise_exception(SmarterCSV::MissingHeaders)
|
29
29
|
end
|
30
30
|
|
31
|
-
it 'raises error on missing mapped headers' do
|
31
|
+
it 'raises error on missing mapped headers and includes missing headers in message' do
|
32
32
|
expect {
|
33
33
|
# :age does not exist in the CSV header
|
34
34
|
options = {:key_mapping => {:email => :a, :firstname => :b, :lastname => :c, :manager_email => :d, :age => :e} }
|
35
35
|
SmarterCSV.process("#{fixture_path}/user_import.csv", options)
|
36
|
-
}.to raise_exception(SmarterCSV::KeyMappingError)
|
36
|
+
}.to raise_exception(SmarterCSV::KeyMappingError, "missing header(s): age")
|
37
37
|
end
|
38
38
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: smarter_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.5.
|
4
|
+
version: 1.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tilo Sloboda
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-04-
|
11
|
+
date: 2022-04-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|