smarter_csv 1.10.3 → 1.11.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +12 -0
- data/CHANGELOG.md +35 -6
- data/CONTRIBUTORS.md +1 -2
- data/README.md +10 -4
- data/lib/smarter_csv/hash_transformations.rb +120 -40
- data/lib/smarter_csv/header_transformations.rb +103 -1
- data/lib/smarter_csv/header_validations.rb +109 -7
- data/lib/smarter_csv/options_processing.rb +117 -14
- data/lib/smarter_csv/smarter_csv.rb +5 -0
- data/lib/smarter_csv/variables.rb +1 -0
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv.rb +2 -0
- metadata +4 -5
- data/.rspec +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f4f709c1e38fd4e755802a17103231bd17ba58f54ce2d3fd9f8ad57222704114
|
4
|
+
data.tar.gz: 3ac00625a1478793b9cf9272862e448de4ca80784db71ebce3060d0ae34623ad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dec4c77b2b4019788ff576e0d58c913e4eb5d284f7c06bc95554dc3a83985a6a5ce38fbd3fbe851d7a5944ea53621bab7dc70d270b0fdf786624d7b32a631395
|
7
|
+
data.tar.gz: 3b9cdea9f632698d78c749196c225726ff37bc807a0f10ca6b9f1df5c9e4cd5ad15415c81042277461bd1a2449f1f07e24ecebb1b2e1dcff63759c26cc92f67f
|
data/.rubocop.yml
CHANGED
@@ -88,12 +88,18 @@ Style/IfInsideElse:
|
|
88
88
|
Style/IfUnlessModifier:
|
89
89
|
Enabled: false
|
90
90
|
|
91
|
+
Style/InverseMethods:
|
92
|
+
Enabled: false
|
93
|
+
|
91
94
|
Style/NestedTernaryOperator:
|
92
95
|
Enabled: false
|
93
96
|
|
94
97
|
Style/PreferredHashMethods:
|
95
98
|
Enabled: false
|
96
99
|
|
100
|
+
Style/Proc:
|
101
|
+
Enabled: false
|
102
|
+
|
97
103
|
Style/NumericPredicate:
|
98
104
|
Enabled: false
|
99
105
|
|
@@ -129,6 +135,9 @@ Style/SymbolProc: # old Ruby versions can't do this
|
|
129
135
|
Style/TrailingCommaInHashLiteral:
|
130
136
|
Enabled: false
|
131
137
|
|
138
|
+
Style/TrailingCommaInArrayLiteral:
|
139
|
+
Enabled: false
|
140
|
+
|
132
141
|
Style/TrailingUnderscoreVariable:
|
133
142
|
Enabled: false
|
134
143
|
|
@@ -138,6 +147,9 @@ Style/TrivialAccessors:
|
|
138
147
|
# Style/UnlessModifier:
|
139
148
|
# Enabled: false
|
140
149
|
|
150
|
+
Style/WordArray:
|
151
|
+
Enabled: false
|
152
|
+
|
141
153
|
Style/ZeroLengthPredicate:
|
142
154
|
Enabled: false
|
143
155
|
|
data/CHANGELOG.md
CHANGED
@@ -1,13 +1,42 @@
|
|
1
1
|
|
2
2
|
# SmarterCSV 1.x Change Log
|
3
3
|
|
4
|
-
##
|
5
|
-
* fixed issue when frozen options are handed in (thanks to Daniel Pepper)
|
6
|
-
* cleaned-up rspec tests (thanks to Daniel Pepper)
|
7
|
-
* fixed link in README (issue #251)
|
4
|
+
## T.B.D.
|
8
5
|
|
9
|
-
|
10
|
-
|
6
|
+
* code refactor
|
7
|
+
|
8
|
+
* NEW BEHAVIOR:
|
9
|
+
- hidden `:v2_mode` options (incomplete!)
|
10
|
+
- pre-processing for v2 options
|
11
|
+
- implemented v2 `:header_transformations` (DO NOT USE YET!)
|
12
|
+
+ -> check if all v1 transformations are correctly done
|
13
|
+
How are we going to
|
14
|
+
* disambiguate headers?
|
15
|
+
|
16
|
+
|
17
|
+
* do key_mapping? -> seems to work
|
18
|
+
- remove_unmapped_keys ?
|
19
|
+
- silence missing keys ... a missing mapped key should raise an exception, except when silenced
|
20
|
+
- required_keys needs to be a header-validation
|
21
|
+
|
22
|
+
|
23
|
+
* keep original headers? -> :none
|
24
|
+
* do strings_as_* ? -> either :keys_as_symbols, :keys_as_strings
|
25
|
+
* remove quote_chars? -> included in keys_as_*
|
26
|
+
* strip whitespace? -> included in keys_as_*
|
27
|
+
|
28
|
+
TODO:
|
29
|
+
|
30
|
+
- add tests for header_validations
|
31
|
+
|
32
|
+
- modify options to handle v1 and v2 options
|
33
|
+
- add v1 defaults in v2 processing
|
34
|
+
- add tests for all options processing
|
35
|
+
- 100% backwards compatibility when working in v1 mode
|
36
|
+
|
37
|
+
|
38
|
+
## 1.10.1 (2024-01-07)
|
39
|
+
* fix incorrect warning about UTF-8 (issue #268, thanks hirowatari)
|
11
40
|
|
12
41
|
## 1.10.1 (2024-01-07)
|
13
42
|
* fix incorrect warning about UTF-8 (issue #268, thanks hirowatari)
|
data/CONTRIBUTORS.md
CHANGED
@@ -51,5 +51,4 @@ A Big Thank you to everyone who filed issues, sent comments, and who contributed
|
|
51
51
|
* [Rahul Chaudhary](https://github.com/rahulch95)
|
52
52
|
* [Alessandro Fazzi](https://github.com/pioneerskies)
|
53
53
|
* [JP Camara](https://github.com/jpcamara)
|
54
|
-
* [
|
55
|
-
* [Daniel Pepper](https://github.com/dpep)
|
54
|
+
* [Hiro Watari](https://github.com/hirowatari)
|
data/README.md
CHANGED
@@ -23,8 +23,13 @@
|
|
23
23
|
|
24
24
|
* default branch is `main` for 1.x development
|
25
25
|
|
26
|
-
* 2.x development is
|
27
|
-
-
|
26
|
+
* 2.x development is on `2.0-development` (check this branch for 2.0 documentation)
|
27
|
+
- This is an EXPERIMENTAL branch - DO NOT USE in production
|
28
|
+
|
29
|
+
#### Work towards Future Version 2.x
|
30
|
+
|
31
|
+
* Work towards SmarterCSV 2.x is still ongoing, with improved features, and more streamlined options, but consider it as experimental at this time.
|
32
|
+
Please check the [2.0-develop branch](https://github.com/tilo/smarter_csv/tree/2.0-develop), open any issues and pull requests with mention of tag v2.0.
|
28
33
|
|
29
34
|
---------------
|
30
35
|
|
@@ -389,9 +394,10 @@ And header and data validations will also be supported in 2.x
|
|
389
394
|
* some CSV files use un-escaped quotation characters inside fields. This can cause the import to break. To get around this, use the `:force_simple_split => true` option in combination with `:strip_chars_from_headers => /[\-"]/` . This will also significantly speed up the import.
|
390
395
|
If you would force a different :quote_char instead (setting it to a non-used character), then the import would be up to 5-times slower than using `:force_simple_split`.
|
391
396
|
|
392
|
-
##
|
397
|
+
## See also:
|
398
|
+
|
399
|
+
http://www.unixgods.org/~tilo/Ruby/process_csv_as_hashes.html
|
393
400
|
|
394
|
-
http://www.unixgods.org/Ruby/process_csv_as_hashes.html
|
395
401
|
|
396
402
|
|
397
403
|
## Installation
|
@@ -2,7 +2,16 @@
|
|
2
2
|
|
3
3
|
module SmarterCSV
|
4
4
|
class << self
|
5
|
+
# this is processing the headers from the input file
|
5
6
|
def hash_transformations(hash, options)
|
7
|
+
if options[:v2_mode]
|
8
|
+
hash_transformations_v2(hash, options)
|
9
|
+
else
|
10
|
+
hash_transformations_v1(hash, options)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def hash_transformations_v1(hash, options)
|
6
15
|
# there may be unmapped keys, or keys purposedly mapped to nil or an empty key..
|
7
16
|
# make sure we delete any key/value pairs from the hash, which the user wanted to delete:
|
8
17
|
remove_empty_values = options[:remove_empty_values] == true
|
@@ -33,46 +42,117 @@ module SmarterCSV
|
|
33
42
|
end
|
34
43
|
end
|
35
44
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
#
|
66
|
-
#
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
45
|
+
def hash_transformations_v2(hash, options)
|
46
|
+
return hash if options[:hash_transformations].nil? || options[:hash_transformations].empty?
|
47
|
+
|
48
|
+
# do the header transformations the user requested:
|
49
|
+
if options[:hash_transformations]
|
50
|
+
options[:hash_transformations].each do |transformation|
|
51
|
+
if transformation.respond_to?(:call) # this is used when a user-provided Proc is passed in
|
52
|
+
hash = transformation.call(hash, options)
|
53
|
+
else
|
54
|
+
case transformation
|
55
|
+
when Symbol # this is used for pre-defined transformations that are defined in the SmarterCSV module
|
56
|
+
hash = public_send(transformation, hash, options)
|
57
|
+
when Hash # this is called for hash arguments, e.g. hash_transformations
|
58
|
+
trans, args = transformation.first # .first treats the hash first element as an array
|
59
|
+
hash = apply_transformation(trans, hash, args, options)
|
60
|
+
when Array # this can be used for passing additional arguments in array form (e.g. into a Proc)
|
61
|
+
trans, *args = transformation
|
62
|
+
hash = apply_transformation(trans, hash, args, options)
|
63
|
+
else
|
64
|
+
raise SmarterCSV::IncorrectOption, "Invalid transformation type: #{transformation.class}"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
hash
|
71
|
+
end
|
72
|
+
|
73
|
+
#
|
74
|
+
# To handle v1-backward-compatible behavior, it is faster to roll all behavior into one method
|
75
|
+
#
|
76
|
+
def v1_backwards_compatibility(hash, options)
|
77
|
+
hash.each_with_object({}) do |(k, v), new_hash|
|
78
|
+
next if k.nil? || k == '' || k == :"" # remove_empty_keys
|
79
|
+
next if has_rails ? v.blank? : blank?(v) # remove_empty_values
|
80
|
+
|
81
|
+
# convert_values_to_numeric:
|
82
|
+
# deal with the :only / :except options to :convert_values_to_numeric
|
83
|
+
unless limit_execution_for_only_or_except(options, :convert_values_to_numeric, k)
|
84
|
+
if v =~ /^[+-]?\d+\.\d+$/
|
85
|
+
v = v.to_f
|
86
|
+
elsif v =~ /^[+-]?\d+$/
|
87
|
+
v = v.to_i
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
new_hash[k] = v
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
#
|
96
|
+
# Building Blocks in case you want to build your own flow:
|
97
|
+
#
|
98
|
+
|
99
|
+
def value_converters(hash, _options)
|
100
|
+
#
|
101
|
+
# TO BE IMPLEMENTED
|
102
|
+
#
|
103
|
+
end
|
104
|
+
|
105
|
+
def strip_spaces(hash, _options)
|
106
|
+
hash.each_key {|key| hash[key].strip! unless hash[key].nil? } # &. syntax was introduced in Ruby 2.3 - need to stay backwards compatible
|
107
|
+
end
|
108
|
+
|
109
|
+
def remove_blank_values(hash, _options)
|
110
|
+
hash.each_key {|key| hash.delete(key) if hash[key].nil? || hash[key].is_a?(String) && hash[key] !~ /[^[:space:]]/ }
|
111
|
+
end
|
112
|
+
|
113
|
+
def remove_zero_values(hash, _options)
|
114
|
+
hash.each_key {|key| hash.delete(key) if hash[key].is_a?(Numeric) && hash[key].zero? }
|
115
|
+
end
|
116
|
+
|
117
|
+
def remove_empty_keys(hash, _options)
|
118
|
+
hash.reject!{|key, _v| key.nil? || key.empty?}
|
119
|
+
end
|
120
|
+
|
121
|
+
def convert_values_to_numeric(hash, _options)
|
122
|
+
hash.each_key do |k|
|
123
|
+
case hash[k]
|
124
|
+
when /^[+-]?\d+\.\d+$/
|
125
|
+
hash[k] = hash[k].to_f
|
126
|
+
when /^[+-]?\d+$/
|
127
|
+
hash[k] = hash[k].to_i
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def convert_values_to_numeric_unless_leading_zeroes(hash, _options)
|
133
|
+
hash.each_key do |k|
|
134
|
+
case hash[k]
|
135
|
+
when /^[+-]?[1-9]\d*\.\d+$/
|
136
|
+
hash[k] = hash[k].to_f
|
137
|
+
when /^[+-]?[1-9]\d*$/
|
138
|
+
hash[k] = hash[k].to_i
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
# IMPORTANT NOTE:
|
144
|
+
# this can lead to cases where a nil or empty value gets converted into 0 or 0.0,
|
145
|
+
# and can then not be properly removed!
|
146
|
+
#
|
147
|
+
# you should first try to use convert_values_to_numeric or convert_values_to_numeric_unless_leading_zeroes
|
148
|
+
#
|
149
|
+
def convert_to_integer(hash, _options)
|
150
|
+
hash.each_key {|key| hash[key] = hash[key].to_i }
|
151
|
+
end
|
152
|
+
|
153
|
+
def convert_to_float(hash, _options)
|
154
|
+
hash.each_key {|key| hash[key] = hash[key].to_f }
|
155
|
+
end
|
76
156
|
|
77
157
|
protected
|
78
158
|
|
@@ -2,8 +2,18 @@
|
|
2
2
|
|
3
3
|
module SmarterCSV
|
4
4
|
class << self
|
5
|
-
#
|
5
|
+
# this is processing the headers from the input file
|
6
6
|
def header_transformations(header_array, options)
|
7
|
+
if options[:v2_mode]
|
8
|
+
header_transformations_v2(header_array, options)
|
9
|
+
else
|
10
|
+
header_transformations_v1(header_array, options)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
# ---- V1.x Version: transform the headers that were in the file: ------------------------------------------
|
15
|
+
#
|
16
|
+
def header_transformations_v1(header_array, options)
|
7
17
|
header_array.map!{|x| x.gsub(%r/#{options[:quote_char]}/, '')}
|
8
18
|
header_array.map!{|x| x.strip} if options[:strip_whitespace]
|
9
19
|
|
@@ -57,7 +67,99 @@ module SmarterCSV
|
|
57
67
|
header
|
58
68
|
end
|
59
69
|
end
|
70
|
+
|
60
71
|
headers
|
61
72
|
end
|
73
|
+
|
74
|
+
# ---- V2.x Version: transform the headers that were in the file: ------------------------------------------
|
75
|
+
#
|
76
|
+
def header_transformations_v2(header_array, options)
|
77
|
+
return header_array if options[:header_transformations].nil? || options[:header_transformations].empty?
|
78
|
+
|
79
|
+
# do the header transformations the user requested:
|
80
|
+
if options[:header_transformations]
|
81
|
+
options[:header_transformations].each do |transformation|
|
82
|
+
if transformation.respond_to?(:call) # this is used when a user-provided Proc is passed in
|
83
|
+
header_array = transformation.call(header_array, options)
|
84
|
+
else
|
85
|
+
case transformation
|
86
|
+
when Symbol # this is used for pre-defined transformations that are defined in the SmarterCSV module
|
87
|
+
header_array = public_send(transformation, header_array, options)
|
88
|
+
when Hash # this is called for hash arguments, e.g. header_transformations
|
89
|
+
trans, args = transformation.first # .first treats the hash first element as an array
|
90
|
+
header_array = apply_transformation(trans, header_array, args, options)
|
91
|
+
when Array # this can be used for passing additional arguments in array form (e.g. into a Proc)
|
92
|
+
trans, *args = transformation
|
93
|
+
header_array = apply_transformation(trans, header_array, args, options)
|
94
|
+
else
|
95
|
+
raise SmarterCSV::IncorrectOption, "Invalid transformation type: #{transformation.class}"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
header_array
|
102
|
+
end
|
103
|
+
|
104
|
+
def apply_transformation(transformation, header_array, args, options)
|
105
|
+
if transformation.respond_to?(:call)
|
106
|
+
# If transformation is a callable object (like a Proc)
|
107
|
+
transformation.call(header_array, args, options)
|
108
|
+
else
|
109
|
+
# If transformation is a symbol (method name)
|
110
|
+
public_send(transformation, header_array, args, options)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# pre-defined v2 header transformations:
|
115
|
+
|
116
|
+
# these are some pre-defined header transformations which can be used
|
117
|
+
# all these take the headers array as the input
|
118
|
+
#
|
119
|
+
# the computed options can be accessed via @options
|
120
|
+
|
121
|
+
def keys_as_symbols(headers, options)
|
122
|
+
headers.map do |header|
|
123
|
+
header.strip.downcase.gsub(%r{#{options[:quote_char]}}, '').gsub(/(\s|-)+/, '_').to_sym
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def keys_as_strings(headers, options)
|
128
|
+
headers.map do |header|
|
129
|
+
header.strip.gsub(%r{#{options[:quote_char]}}, '').downcase.gsub(/(\s|-)+/, '_')
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def downcase_headers(headers, _options)
|
134
|
+
headers.map do |header|
|
135
|
+
header.strip.downcase!
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def key_mapping(headers, mapping = {}, options)
|
140
|
+
raise(SmarterCSV::IncorrectOption, "ERROR: incorrect format for key_mapping! Expecting hash with from -> to mappings") if mapping.empty? || !mapping.is_a?(Hash)
|
141
|
+
|
142
|
+
headers_set = headers.to_set
|
143
|
+
mapping_keys_set = mapping.keys.to_set
|
144
|
+
silence_keys_set = (options[:silence_missing_keys] || []).to_set
|
145
|
+
|
146
|
+
# Check for missing keys
|
147
|
+
missing_keys = mapping_keys_set - headers_set - silence_keys_set
|
148
|
+
raise SmarterCSV::KeyMappingError, "ERROR: cannot map headers: #{missing_keys.to_a.join(', ')}" if missing_keys.any? && !options[:silence_missing_keys]
|
149
|
+
|
150
|
+
# Apply key mapping, retaining nils for explicitly mapped headers
|
151
|
+
headers.map do |header|
|
152
|
+
if mapping.key?(header)
|
153
|
+
# Maps the key according to the mapping, including nil mapping
|
154
|
+
mapping[header]
|
155
|
+
elsif options[:remove_unmapped_keys]
|
156
|
+
# Remove headers not specified in the mapping
|
157
|
+
nil
|
158
|
+
else
|
159
|
+
# Keep the original header if not specified in the mapping
|
160
|
+
header
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
62
164
|
end
|
63
165
|
end
|
@@ -3,11 +3,21 @@
|
|
3
3
|
module SmarterCSV
|
4
4
|
class << self
|
5
5
|
def header_validations(headers, options)
|
6
|
-
|
7
|
-
|
6
|
+
if options[:v2_mode]
|
7
|
+
header_validations_v2(headers, options)
|
8
|
+
else
|
9
|
+
header_validations_v1(headers, options)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# ---- V1.x Version: validate the headers -----------------------------------------------------------------
|
14
|
+
|
15
|
+
def header_validations_v1(headers, options)
|
16
|
+
check_duplicate_headers_v1(headers, options)
|
17
|
+
check_required_headers_v1(headers, options)
|
8
18
|
end
|
9
19
|
|
10
|
-
def
|
20
|
+
def check_duplicate_headers_v1(headers, _options)
|
11
21
|
header_counts = Hash.new(0)
|
12
22
|
headers.each { |header| header_counts[header] += 1 unless header.nil? }
|
13
23
|
|
@@ -18,17 +28,109 @@ module SmarterCSV
|
|
18
28
|
end
|
19
29
|
end
|
20
30
|
|
21
|
-
|
22
|
-
|
23
|
-
def check_required_headers(headers, options)
|
31
|
+
def check_required_headers_v1(headers, options)
|
24
32
|
if options[:required_keys] && options[:required_keys].is_a?(Array)
|
25
33
|
headers_set = headers.to_set
|
26
34
|
missing_keys = options[:required_keys].select { |k| !headers_set.include?(k) }
|
27
35
|
|
28
36
|
unless missing_keys.empty?
|
29
|
-
raise SmarterCSV::MissingKeys, "ERROR: missing attributes: #{missing_keys.join(',')}
|
37
|
+
raise SmarterCSV::MissingKeys, "ERROR: missing attributes: #{missing_keys.join(',')}"
|
30
38
|
end
|
31
39
|
end
|
32
40
|
end
|
41
|
+
|
42
|
+
# ---- V2.x Version: validate the headers -----------------------------------------------------------------
|
43
|
+
|
44
|
+
# def header_validations_v2(headers, options)
|
45
|
+
# return unless options[:header_validations]
|
46
|
+
|
47
|
+
# options[:header_validations].each do |validation|
|
48
|
+
# if validation.respond_to?(:call)
|
49
|
+
# # Directly call if it's a Proc or lambda
|
50
|
+
# validation.call(headers)
|
51
|
+
# else
|
52
|
+
# binding.pry
|
53
|
+
# # Handle Symbol, Hash, or Array
|
54
|
+
# method_name, args = validation.is_a?(Symbol) ? [validation, []] : validation
|
55
|
+
# public_send(method_name, headers, *Array(args))
|
56
|
+
# end
|
57
|
+
# end
|
58
|
+
# end
|
59
|
+
|
60
|
+
def header_validations_v2(headers, options)
|
61
|
+
return unless options[:header_validations]
|
62
|
+
|
63
|
+
# do the header validations the user requested:
|
64
|
+
# Header validations typically raise errors directly
|
65
|
+
#
|
66
|
+
options[:header_validations].each do |validation|
|
67
|
+
if validation.respond_to?(:call)
|
68
|
+
# Directly call if it's a Proc or lambda
|
69
|
+
validation.call(headers)
|
70
|
+
else
|
71
|
+
case validation
|
72
|
+
when Symbol
|
73
|
+
public_send(validation, headers)
|
74
|
+
when Hash
|
75
|
+
val, args = validation.first
|
76
|
+
public_send(val, headers, args)
|
77
|
+
when Array
|
78
|
+
val, *args = validation
|
79
|
+
public_send(val, headers, args)
|
80
|
+
else
|
81
|
+
raise SmarterCSV::IncorrectOption, "Invalid validation type: #{validation.class}"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# def header_validations_v2_orig(headers, options)
|
88
|
+
# # do the header validations the user requested:
|
89
|
+
# # Header validations typically raise errors directly
|
90
|
+
# #
|
91
|
+
# if options[:header_validations]
|
92
|
+
# options[:header_validations].each do |validation|
|
93
|
+
# case validation
|
94
|
+
# when Symbol
|
95
|
+
# public_send(validation, headers)
|
96
|
+
# when Hash
|
97
|
+
# val, args = validation.first
|
98
|
+
# public_send(val, headers, args)
|
99
|
+
# when Array
|
100
|
+
# val, args = validation
|
101
|
+
# public_send(val, headers, args)
|
102
|
+
# else
|
103
|
+
# validation.call(headers) unless validation.nil?
|
104
|
+
# end
|
105
|
+
# end
|
106
|
+
# end
|
107
|
+
# end
|
108
|
+
|
109
|
+
# these are some pre-defined header validations which can be used
|
110
|
+
# all these take the headers array as the input
|
111
|
+
#
|
112
|
+
# the computed options can be accessed via @options
|
113
|
+
|
114
|
+
def unique_headers(headers)
|
115
|
+
header_counts = Hash.new(0)
|
116
|
+
headers.each { |header| header_counts[header] += 1 unless header.nil? }
|
117
|
+
|
118
|
+
duplicates = header_counts.select { |_, count| count > 1 }
|
119
|
+
|
120
|
+
unless duplicates.empty?
|
121
|
+
raise(SmarterCSV::DuplicateHeaders, "Duplicate Headers in CSV: #{duplicates.inspect}")
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def required_headers(headers, required = [])
|
126
|
+
raise(SmarterCSV::IncorrectOption, "ERROR: required_headers validation needs an array argument") unless required.is_a?(Array)
|
127
|
+
|
128
|
+
headers_set = headers.to_set
|
129
|
+
missing = required.select { |r| !headers_set.include?(r) }
|
130
|
+
|
131
|
+
unless missing.empty?
|
132
|
+
raise(SmarterCSV::MissingKeys, "Missing Headers in CSV: #{missing.inspect}")
|
133
|
+
end
|
134
|
+
end
|
33
135
|
end
|
34
136
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module SmarterCSV
|
4
|
-
|
4
|
+
COMMON_OPTIONS = {
|
5
5
|
acceleration: true,
|
6
6
|
auto_row_sep_chars: 500,
|
7
7
|
chunk_size: nil,
|
@@ -15,39 +15,66 @@ module SmarterCSV
|
|
15
15
|
force_utf8: false,
|
16
16
|
headers_in_file: true,
|
17
17
|
invalid_byte_sequence: '',
|
18
|
+
quote_char: '"',
|
19
|
+
remove_unmapped_keys: false,
|
20
|
+
row_sep: :auto, # was: $/,
|
21
|
+
silence_deprecations: false, # new in 1.11
|
22
|
+
silence_missing_keys: false,
|
23
|
+
skip_lines: nil,
|
24
|
+
user_provided_headers: nil,
|
25
|
+
verbose: false,
|
26
|
+
with_line_numbers: false,
|
27
|
+
v2_mode: false,
|
28
|
+
}.freeze
|
29
|
+
|
30
|
+
V1_DEFAULT_OPTIONS = {
|
18
31
|
keep_original_headers: false,
|
19
32
|
key_mapping: nil,
|
20
|
-
quote_char: '"',
|
21
33
|
remove_empty_hashes: true,
|
22
34
|
remove_empty_values: true,
|
23
|
-
remove_unmapped_keys: false,
|
24
35
|
remove_values_matching: nil,
|
25
36
|
remove_zero_values: false,
|
26
37
|
required_headers: nil,
|
27
38
|
required_keys: nil,
|
28
|
-
row_sep: :auto, # was: $/,
|
29
|
-
silence_missing_keys: false,
|
30
|
-
skip_lines: nil,
|
31
39
|
strings_as_keys: false,
|
32
40
|
strip_chars_from_headers: nil,
|
33
41
|
strip_whitespace: true,
|
34
|
-
user_provided_headers: nil,
|
35
42
|
value_converters: nil,
|
36
|
-
|
37
|
-
with_line_numbers: false,
|
43
|
+
v2_mode: false,
|
38
44
|
}.freeze
|
39
45
|
|
46
|
+
DEPRECATED_OPTIONS = [
|
47
|
+
:convert_values_to_numeric,
|
48
|
+
:downcase_headers,
|
49
|
+
:keep_original_headers,
|
50
|
+
:key_mapping,
|
51
|
+
:remove_empty_hashes,
|
52
|
+
:remove_empty_values,
|
53
|
+
:remove_values_matching,
|
54
|
+
:remove_zero_values,
|
55
|
+
:required_headers,
|
56
|
+
:required_keys,
|
57
|
+
:stirngs_as_keys,
|
58
|
+
:strip_cars_from_headers,
|
59
|
+
:strip_whitespace,
|
60
|
+
:value_converters,
|
61
|
+
].freeze
|
62
|
+
|
40
63
|
class << self
|
41
64
|
# NOTE: this is not called when "parse" methods are tested by themselves
|
42
65
|
def process_options(given_options = {})
|
43
66
|
puts "User provided options:\n#{pp(given_options)}\n" if given_options[:verbose]
|
44
67
|
|
45
|
-
@options = DEFAULT_OPTIONS.dup.merge!(given_options)
|
46
|
-
|
47
68
|
# fix invalid input
|
48
|
-
|
69
|
+
given_options[:invalid_byte_sequence] = '' if given_options[:invalid_byte_sequence].nil?
|
70
|
+
|
71
|
+
# warn about deprecated options / raises error for v2_mode
|
72
|
+
handle_deprecations(given_options)
|
73
|
+
|
74
|
+
given_options = preprocess_v2_options(given_options) if given_options[:v2_mode]
|
49
75
|
|
50
|
-
|
76
|
+
@options = compute_default_options(given_options).merge!(given_options)
|
77
|
+
puts "Computed options:\n#{pp(@options)}\n" if given_options[:verbose]
|
51
78
|
|
52
79
|
validate_options!(@options)
|
53
80
|
@options
|
@@ -57,11 +84,35 @@ module SmarterCSV
|
|
57
84
|
#
|
58
85
|
# ONLY FOR BACKWARDS-COMPATIBILITY
|
59
86
|
def default_options
|
60
|
-
|
87
|
+
COMMON_OPTIONS.merge(V1_DEFAULT_OPTIONS)
|
61
88
|
end
|
62
89
|
|
63
90
|
private
|
64
91
|
|
92
|
+
def compute_default_options(options = {})
|
93
|
+
return COMMON_OPTIONS.merge(V1_DEFAULT_OPTIONS) unless options[:v2_mode]
|
94
|
+
|
95
|
+
default_options = {}
|
96
|
+
if options[:defaults].to_s != 'none'
|
97
|
+
default_options = COMMON_OPTIONS.dup.merge(V2_DEFAULT_OPTIONS)
|
98
|
+
if options[:defaults].to_s == 'v1'
|
99
|
+
default_options.merge(V1_TRANSFORMATIONS)
|
100
|
+
else
|
101
|
+
default_options.merge(V2_TRANSFORMATIONS)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def handle_deprecations(options)
|
107
|
+
used_deprecated_options = DEPRECATED_OPTIONS & options.keys
|
108
|
+
message = "SmarterCSV #{VERSION} DEPRECATED OPTIONS: #{pp(used_deprecated_options)}"
|
109
|
+
if options[:v2_mode]
|
110
|
+
raise(SmarterCSV::DeprecatedOptions, "ERROR: #{message}") unless used_deprecated_options.empty? || options[:silence_deprecations]
|
111
|
+
else
|
112
|
+
puts "DEPRECATION WARNING: #{message}" unless used_deprecated_options.empty? || options[:silence_deprecations]
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
65
116
|
def validate_options!(options)
|
66
117
|
# deprecate required_headers
|
67
118
|
unless options[:required_headers].nil?
|
@@ -90,5 +141,57 @@ module SmarterCSV
|
|
90
141
|
def pp(value)
|
91
142
|
defined?(AwesomePrint) ? value.awesome_inspect(index: nil) : value.inspect
|
92
143
|
end
|
144
|
+
|
145
|
+
# ---- V2 code ----------------------------------------------------------------------------------------
|
146
|
+
|
147
|
+
V2_DEFAULT_OPTIONS = {
|
148
|
+
# These need to go to the COMMON_OPTIONS:
|
149
|
+
remove_empty_hashes: true, # this might need a transformation or move to common options
|
150
|
+
# ------------
|
151
|
+
header_transformations: [:keys_as_symbols],
|
152
|
+
header_validations: [:unique_headers],
|
153
|
+
# data_transformations: [:replace_blank_with_nil],
|
154
|
+
# data_validations: [],
|
155
|
+
hash_transformations: [:strip_spaces, :remove_blank_values],
|
156
|
+
hash_validations: [],
|
157
|
+
v2_mode: true,
|
158
|
+
}.freeze
|
159
|
+
|
160
|
+
V2_TRANSFORMATIONS = {
|
161
|
+
header_transformations: [:keys_as_symbols],
|
162
|
+
header_validations: [:unique_headers],
|
163
|
+
# data_transformations: [:replace_blank_with_nil],
|
164
|
+
# data_validations: [],
|
165
|
+
hash_transformations: [:v1_backwards_compatibility],
|
166
|
+
# hash_transformations: [:remove_empty_keys, :strip_spaces, :remove_blank_values, :convert_values_to_numeric], # ??? :convert_values_to_numeric]
|
167
|
+
hash_validations: [],
|
168
|
+
}.freeze
|
169
|
+
|
170
|
+
V1_TRANSFORMATIONS = {
|
171
|
+
header_transformations: [:keys_as_symbols],
|
172
|
+
header_validations: [:unique_headers],
|
173
|
+
# data_transformations: [:replace_blank_with_nil],
|
174
|
+
# data_validations: [],
|
175
|
+
hash_transformations: [:strip_spaces, :remove_blank_values, :convert_values_to_numeric],
|
176
|
+
hash_validations: [],
|
177
|
+
}.freeze
|
178
|
+
|
179
|
+
def preprocess_v2_options(options)
|
180
|
+
return options unless options[:v2_mode] || options[:header_transformations]
|
181
|
+
|
182
|
+
# We want to provide safe defaults for easy processing, that is why we have a special keyword :none
|
183
|
+
# to not do any header transformations..
|
184
|
+
#
|
185
|
+
# this is why we need to remove the 'none' here:
|
186
|
+
#
|
187
|
+
requested_header_transformations = options[:header_transformations]
|
188
|
+
if requested_header_transformations.to_s == 'none'
|
189
|
+
requested_header_transformations = []
|
190
|
+
else
|
191
|
+
requested_header_transformations = requested_header_transformations.reject {|x| x.to_s == 'none'} unless requested_header_transformations.nil?
|
192
|
+
end
|
193
|
+
options[:header_transformations] = requested_header_transformations || []
|
194
|
+
options
|
195
|
+
end
|
93
196
|
end
|
94
197
|
end
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
module SmarterCSV
|
4
4
|
class SmarterCSVException < StandardError; end
|
5
|
+
class DeprecatedOptions < SmarterCSVException; end
|
5
6
|
class HeaderSizeMismatch < SmarterCSVException; end
|
6
7
|
class IncorrectOption < SmarterCSVException; end
|
7
8
|
class ValidationError < SmarterCSVException; end
|
@@ -108,6 +109,10 @@ module SmarterCSV
|
|
108
109
|
|
109
110
|
next if options[:remove_empty_hashes] && hash.empty?
|
110
111
|
|
112
|
+
#
|
113
|
+
# should HASH VALIDATIONS go here instead?
|
114
|
+
#
|
115
|
+
|
111
116
|
puts "CSV Line #{@file_line_count}: #{pp(hash)}" if @verbose == '2' # very verbose setting
|
112
117
|
# optional adding of csv_line_number to the hash to help debugging
|
113
118
|
hash[:csv_line_number] = @csv_line_count if options[:with_line_numbers]
|
data/lib/smarter_csv/version.rb
CHANGED
data/lib/smarter_csv.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: smarter_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.11.0.pre1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tilo Sloboda
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-01-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: awesome_print
|
@@ -104,7 +104,6 @@ extensions:
|
|
104
104
|
- ext/smarter_csv/extconf.rb
|
105
105
|
extra_rdoc_files: []
|
106
106
|
files:
|
107
|
-
- ".rspec"
|
108
107
|
- ".rubocop.yml"
|
109
108
|
- ".rvmrc"
|
110
109
|
- CHANGELOG.md
|
@@ -148,9 +147,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
148
147
|
version: 2.5.0
|
149
148
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
150
149
|
requirements:
|
151
|
-
- - "
|
150
|
+
- - ">"
|
152
151
|
- !ruby/object:Gem::Version
|
153
|
-
version:
|
152
|
+
version: 1.3.1
|
154
153
|
requirements: []
|
155
154
|
rubygems_version: 3.2.3
|
156
155
|
signing_key:
|
data/.rspec
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
--require spec_helper
|