smarter_csv 1.5.1 → 1.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -1
- data/CONTRIBUTORS.md +1 -0
- data/lib/smarter_csv/smarter_csv.rb +25 -17
- data/lib/smarter_csv/version.rb +1 -1
- data/spec/smarter_csv/duplicate_headers_spec.rb +2 -2
- data/spec/smarter_csv/invalid_headers_spec.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 88b9932c898320fb05d5697e155dc0bd3ade887d2fcfab7b660933e230007364
|
4
|
+
data.tar.gz: f0525d9c917aff44f910d4547b8e918faa3beb50d47adc29182df1fc1ec2be19
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 330ad44b9808150f6fdf96dec65d259c2d9cf5eb25e22dc80f63095f4014b065b8aa97a2ba9b814c6cea6f4c0361e04567be403ab78b54d0518b49dc072f36ac
|
7
|
+
data.tar.gz: 27531bd508b5b455a32947badfb85d7e95489ad282a837ef046864806ba7fa12539148ab2fe4c84174fba3ef085dd3adda5d7c070615d684cd99ed0f90b903a3
|
data/CHANGELOG.md
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
|
2
2
|
# SmarterCSV 1.x Change Log
|
3
3
|
|
4
|
-
## 1.5.
|
4
|
+
## 1.5.2 (2022-04-29)
|
5
|
+
* added missing keys to the SmarterCSV::KeyMappingError exception message #189 (thanks to John Dell)
|
6
|
+
|
7
|
+
## 1.5.1 (2022-04-27)
|
5
8
|
* added raising of `KeyMappingError` if `key_mapping` refers to a non-existent key
|
6
9
|
* added option `duplicate_header_suffix` (thanks to Skye Shaw)
|
7
10
|
When given a non-nil string, it uses the suffix to append numbering 2..n to duplicate headers.
|
data/CONTRIBUTORS.md
CHANGED
@@ -18,24 +18,28 @@ module SmarterCSV
|
|
18
18
|
@csv_line_count = 0
|
19
19
|
has_rails = !! defined?(Rails)
|
20
20
|
begin
|
21
|
-
|
21
|
+
fh = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
|
22
22
|
|
23
23
|
# auto-detect the row separator
|
24
|
-
options[:row_sep] = SmarterCSV.guess_line_ending(
|
24
|
+
options[:row_sep] = SmarterCSV.guess_line_ending(fh, options) if options[:row_sep].to_sym == :auto
|
25
25
|
# attempt to auto-detect column separator
|
26
|
-
options[:col_sep] = guess_column_separator(
|
26
|
+
options[:col_sep] = guess_column_separator(fh, options) if options[:col_sep].to_sym == :auto
|
27
27
|
# preserve options, in case we need to call the CSV class
|
28
28
|
csv_options = options.select{|k,v| [:col_sep, :row_sep, :quote_char].include?(k)} # options.slice(:col_sep, :row_sep, :quote_char)
|
29
29
|
csv_options.delete(:row_sep) if [nil, :auto].include?( options[:row_sep].to_sym )
|
30
30
|
csv_options.delete(:col_sep) if [nil, :auto].include?( options[:col_sep].to_sym )
|
31
31
|
|
32
|
-
if (options[:force_utf8] || options[:file_encoding] =~ /utf-8/i) && (
|
32
|
+
if (options[:force_utf8] || options[:file_encoding] =~ /utf-8/i) && ( fh.respond_to?(:external_encoding) && fh.external_encoding != Encoding.find('UTF-8') || fh.respond_to?(:encoding) && fh.encoding != Encoding.find('UTF-8') )
|
33
33
|
puts 'WARNING: you are trying to process UTF-8 input, but did not open the input with "b:utf-8" option. See README file "NOTES about File Encodings".'
|
34
34
|
end
|
35
35
|
|
36
|
-
|
36
|
+
if options[:skip_lines].to_i > 0
|
37
|
+
options[:skip_lines].to_i.times do
|
38
|
+
readline_with_counts(fh, options)
|
39
|
+
end
|
40
|
+
end
|
37
41
|
|
38
|
-
headerA, header_size = process_headers(
|
42
|
+
headerA, header_size = process_headers(fh, options, csv_options)
|
39
43
|
|
40
44
|
# in case we use chunking.. we'll need to set it up..
|
41
45
|
if ! options[:chunk_size].nil? && options[:chunk_size].to_i > 0
|
@@ -48,10 +52,8 @@ module SmarterCSV
|
|
48
52
|
end
|
49
53
|
|
50
54
|
# now on to processing all the rest of the lines in the CSV file:
|
51
|
-
while !
|
52
|
-
line =
|
53
|
-
@file_line_count += 1
|
54
|
-
@csv_line_count += 1
|
55
|
+
while ! fh.eof? # we can't use fh.readlines() here, because this would read the whole file into memory at once, and eof => true
|
56
|
+
line = readline_with_counts(fh, options)
|
55
57
|
|
56
58
|
# replace invalid byte sequence in UTF-8 with question mark to avoid errors
|
57
59
|
line = line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
@@ -65,7 +67,7 @@ module SmarterCSV
|
|
65
67
|
# by detecting the existence of an uneven number of quote characters
|
66
68
|
multiline = line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
|
67
69
|
while line.count(options[:quote_char])%2 == 1 # should handle quote_char nil
|
68
|
-
next_line =
|
70
|
+
next_line = fh.readline(options[:row_sep])
|
69
71
|
next_line = next_line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
70
72
|
line += next_line
|
71
73
|
@file_line_count += 1
|
@@ -138,7 +140,7 @@ module SmarterCSV
|
|
138
140
|
if use_chunks
|
139
141
|
chunk << hash # append temp result to chunk
|
140
142
|
|
141
|
-
if chunk.size >= chunk_size ||
|
143
|
+
if chunk.size >= chunk_size || fh.eof? # if chunk if full, or EOF reached
|
142
144
|
# do something with the chunk
|
143
145
|
if block_given?
|
144
146
|
yield chunk # do something with the hashes in the chunk in the block
|
@@ -179,7 +181,7 @@ module SmarterCSV
|
|
179
181
|
chunk = [] # initialize for next chunk of data
|
180
182
|
end
|
181
183
|
ensure
|
182
|
-
|
184
|
+
fh.close if fh.respond_to?(:close)
|
183
185
|
end
|
184
186
|
if block_given?
|
185
187
|
return chunk_count # when we do processing through a block we only care how many chunks we processed
|
@@ -190,6 +192,13 @@ module SmarterCSV
|
|
190
192
|
|
191
193
|
private
|
192
194
|
|
195
|
+
def self.readline_with_counts(filehandle, options)
|
196
|
+
line = filehandle.readline(options[:row_sep])
|
197
|
+
@file_line_count += 1
|
198
|
+
@csv_line_count += 1
|
199
|
+
line
|
200
|
+
end
|
201
|
+
|
193
202
|
def self.default_options
|
194
203
|
{
|
195
204
|
auto_row_sep_chars: 500,
|
@@ -314,9 +323,7 @@ module SmarterCSV
|
|
314
323
|
if options[:headers_in_file] # extract the header line
|
315
324
|
# process the header line in the CSV file..
|
316
325
|
# the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
|
317
|
-
header = filehandle
|
318
|
-
@file_line_count += 1
|
319
|
-
@csv_line_count += 1
|
326
|
+
header = readline_with_counts(filehandle, options)
|
320
327
|
|
321
328
|
header = header.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence]) if options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
|
322
329
|
header = header.sub(options[:comment_regexp],'') if options[:comment_regexp]
|
@@ -371,7 +378,8 @@ module SmarterCSV
|
|
371
378
|
# if you want to completely delete a key, then map it to nil or to ''
|
372
379
|
if ! key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
|
373
380
|
# we can't map keys that are not there
|
374
|
-
|
381
|
+
missing_keys = key_mappingH.keys - headerA
|
382
|
+
raise(SmarterCSV::KeyMappingError, "missing header(s): #{missing_keys.join(",")}") unless missing_keys.empty?
|
375
383
|
|
376
384
|
headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
|
377
385
|
end
|
data/lib/smarter_csv/version.rb
CHANGED
@@ -17,12 +17,12 @@ describe 'duplicate headers' do
|
|
17
17
|
}.to raise_exception(SmarterCSV::DuplicateHeaders)
|
18
18
|
end
|
19
19
|
|
20
|
-
it 'raises error on missing mapped headers' do
|
20
|
+
it 'raises error on missing mapped headers and includes missing headers in message' do
|
21
21
|
expect {
|
22
22
|
# the mapping is right, but the underlying csv file is bad
|
23
23
|
options = {:key_mapping => {:email => :a, :firstname => :b, :lastname => :c, :manager_email => :d, :age => :e} }
|
24
24
|
SmarterCSV.process("#{fixture_path}/duplicate_headers.csv", options)
|
25
|
-
}.to raise_exception(SmarterCSV::KeyMappingError)
|
25
|
+
}.to raise_exception(SmarterCSV::KeyMappingError, "missing header(s): manager_email")
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
@@ -28,11 +28,11 @@ describe 'test exceptions for invalid headers' do
|
|
28
28
|
}.to raise_exception(SmarterCSV::MissingHeaders)
|
29
29
|
end
|
30
30
|
|
31
|
-
it 'raises error on missing mapped headers' do
|
31
|
+
it 'raises error on missing mapped headers and includes missing headers in message' do
|
32
32
|
expect {
|
33
33
|
# :age does not exist in the CSV header
|
34
34
|
options = {:key_mapping => {:email => :a, :firstname => :b, :lastname => :c, :manager_email => :d, :age => :e} }
|
35
35
|
SmarterCSV.process("#{fixture_path}/user_import.csv", options)
|
36
|
-
}.to raise_exception(SmarterCSV::KeyMappingError)
|
36
|
+
}.to raise_exception(SmarterCSV::KeyMappingError, "missing header(s): age")
|
37
37
|
end
|
38
38
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: smarter_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.5.
|
4
|
+
version: 1.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tilo Sloboda
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-04-
|
11
|
+
date: 2022-04-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|