smarter_csv 1.10.1 → 1.11.0.pre2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6b214e402e999d37eb8fff613e0d87afe9084298ea0813447ca81aec33d7503a
4
- data.tar.gz: 5344f4221d56ce53864bcd825c35d128cd998b1a54a2f60bed6f7e9d4d7c802f
3
+ metadata.gz: 9027a37c4b29e68fcbc559a6a8285e5076684883612e98b13f116526dadc6e4b
4
+ data.tar.gz: 43cfa0254ac2caa8ca02a8863fc790f1c56beb0b16e175a16fd947f92eda8c08
5
5
  SHA512:
6
- metadata.gz: f05993e5a591b7b720dc2833d525ee2443d6fe00e6d0acdda2d237406296e16fddbe3959ff8df57f6d3bf64f95401f3d8b3b83d4a8a92ea8e9a7a8ba82cd57fe
7
- data.tar.gz: 906a7b5ef793ed46d875a77d55471d568f6ac5adebcf2c121bf67ecdbbf150059eb0eeddec6a63a2d10cc6617ed2ba7359eeb6f627b25891a38ea4bdbdf37b83
6
+ metadata.gz: d5b2eac35e33bdeb9ec632578207c47f34a08a4595c1ebf04929a7fe302efc3fc08565c0d6fc6454fd538b99cc1c4a5662599a88693e3aa1b80c5c0c7fc1b05e
7
+ data.tar.gz: 36f622f12d5412ef8919c30a267def315985aef4b3f33c209341a046442795bb66df6c8ebb97399f35e24fd565df658a0e4b7a94d3563831d7ce32facbeab33f
data/.rubocop.yml CHANGED
@@ -88,12 +88,18 @@ Style/IfInsideElse:
88
88
  Style/IfUnlessModifier:
89
89
  Enabled: false
90
90
 
91
+ Style/InverseMethods:
92
+ Enabled: false
93
+
91
94
  Style/NestedTernaryOperator:
92
95
  Enabled: false
93
96
 
94
97
  Style/PreferredHashMethods:
95
98
  Enabled: false
96
99
 
100
+ Style/Proc:
101
+ Enabled: false
102
+
97
103
  Style/NumericPredicate:
98
104
  Enabled: false
99
105
 
@@ -129,6 +135,9 @@ Style/SymbolProc: # old Ruby versions can't do this
129
135
  Style/TrailingCommaInHashLiteral:
130
136
  Enabled: false
131
137
 
138
+ Style/TrailingCommaInArrayLiteral:
139
+ Enabled: false
140
+
132
141
  Style/TrailingUnderscoreVariable:
133
142
  Enabled: false
134
143
 
@@ -138,6 +147,9 @@ Style/TrivialAccessors:
138
147
  # Style/UnlessModifier:
139
148
  # Enabled: false
140
149
 
150
+ Style/WordArray:
151
+ Enabled: false
152
+
141
153
  Style/ZeroLengthPredicate:
142
154
  Enabled: false
143
155
 
data/CHANGELOG.md CHANGED
@@ -1,6 +1,43 @@
1
1
 
2
2
  # SmarterCSV 1.x Change Log
3
3
 
4
+ ## T.B.D.
5
+
6
+ * code refactor
7
+
8
+ * NEW BEHAVIOR:
9
+ - hidden `:v2_mode` options (incomplete!)
10
+ - pre-processing for v2 options
11
+ - implemented v2 `:header_transformations` (DO NOT USE YET!)
12
+ + -> check if all v1 transformations are correctly done
13
+ How are we going to
14
+ * disambiguate headers?
15
+
16
+
17
+ * do key_mapping? -> seems to work
18
+ - remove_unmapped_keys ?
19
+ - silence missing keys ... a missing mapped key should raise an exception, except when silenced
20
+ - required_keys needs to be a header-validation
21
+
22
+
23
+ * keep original headers? -> :none
24
+ * do strings_as_* ? -> either :keys_as_symbols, :keys_as_strings
25
+ * remove quote_chars? -> included in keys_as_*
26
+ * strip whitespace? -> included in keys_as_*
27
+
28
+ TODO:
29
+
30
+ - add tests for header_validations
31
+
32
+ - modify options to handle v1 and v2 options
33
+ - add v1 defaults in v2 processing
34
+ - add tests for all options processing
35
+ - 100% backwards compatibility when working in v1 mode
36
+
37
+
38
+ ## 1.10.1 (2024-01-07)
39
+ * fix incorrect warning about UTF-8 (issue #268, thanks hirowatari)
40
+
4
41
  ## 1.10.1 (2024-01-07)
5
42
  * fix incorrect warning about UTF-8 (issue #268, thanks hirowatari)
6
43
 
@@ -2,7 +2,16 @@
2
2
 
3
3
  module SmarterCSV
4
4
  class << self
5
+ # this is processing the headers from the input file
5
6
  def hash_transformations(hash, options)
7
+ if options[:v2_mode]
8
+ hash_transformations_v2(hash, options)
9
+ else
10
+ hash_transformations_v1(hash, options)
11
+ end
12
+ end
13
+
14
+ def hash_transformations_v1(hash, options)
6
15
  # there may be unmapped keys, or keys purposedly mapped to nil or an empty key..
7
16
  # make sure we delete any key/value pairs from the hash, which the user wanted to delete:
8
17
  remove_empty_values = options[:remove_empty_values] == true
@@ -33,46 +42,117 @@ module SmarterCSV
33
42
  end
34
43
  end
35
44
 
36
- # def hash_transformations(hash, options)
37
- # # there may be unmapped keys, or keys purposedly mapped to nil or an empty key..
38
- # # make sure we delete any key/value pairs from the hash, which the user wanted to delete:
39
- # hash.delete(nil)
40
- # hash.delete('')
41
- # hash.delete(:"")
42
-
43
- # if options[:remove_empty_values] == true
44
- # hash.delete_if{|_k, v| has_rails ? v.blank? : blank?(v)}
45
- # end
46
-
47
- # hash.delete_if{|_k, v| !v.nil? && v =~ /^(0+|0+\.0+)$/} if options[:remove_zero_values] # values are Strings
48
- # hash.delete_if{|_k, v| v =~ options[:remove_values_matching]} if options[:remove_values_matching]
49
-
50
- # if options[:convert_values_to_numeric]
51
- # hash.each do |k, v|
52
- # # deal with the :only / :except options to :convert_values_to_numeric
53
- # next if limit_execution_for_only_or_except(options, :convert_values_to_numeric, k)
54
-
55
- # # convert if it's a numeric value:
56
- # case v
57
- # when /^[+-]?\d+\.\d+$/
58
- # hash[k] = v.to_f
59
- # when /^[+-]?\d+$/
60
- # hash[k] = v.to_i
61
- # end
62
- # end
63
- # end
64
-
65
- # if options[:value_converters]
66
- # hash.each do |k, v|
67
- # converter = options[:value_converters][k]
68
- # next unless converter
69
-
70
- # hash[k] = converter.convert(v)
71
- # end
72
- # end
73
-
74
- # hash
75
- # end
45
+ def hash_transformations_v2(hash, options)
46
+ return hash if options[:hash_transformations].nil? || options[:hash_transformations].empty?
47
+
48
+ # do the header transformations the user requested:
49
+ if options[:hash_transformations]
50
+ options[:hash_transformations].each do |transformation|
51
+ if transformation.respond_to?(:call) # this is used when a user-provided Proc is passed in
52
+ hash = transformation.call(hash, options)
53
+ else
54
+ case transformation
55
+ when Symbol # this is used for pre-defined transformations that are defined in the SmarterCSV module
56
+ hash = public_send(transformation, hash, options)
57
+ when Hash # this is called for hash arguments, e.g. hash_transformations
58
+ trans, args = transformation.first # .first treats the hash first element as an array
59
+ hash = apply_transformation(trans, hash, args, options)
60
+ when Array # this can be used for passing additional arguments in array form (e.g. into a Proc)
61
+ trans, *args = transformation
62
+ hash = apply_transformation(trans, hash, args, options)
63
+ else
64
+ raise SmarterCSV::IncorrectOption, "Invalid transformation type: #{transformation.class}"
65
+ end
66
+ end
67
+ end
68
+ end
69
+
70
+ hash
71
+ end
72
+
73
+ #
74
+ # To handle v1-backward-compatible behavior, it is faster to roll all behavior into one method
75
+ #
76
+ def v1_backwards_compatibility(hash, options)
77
+ hash.each_with_object({}) do |(k, v), new_hash|
78
+ next if k.nil? || k == '' || k == :"" # remove_empty_keys
79
+ next if has_rails ? v.blank? : blank?(v) # remove_empty_values
80
+
81
+ # convert_values_to_numeric:
82
+ # deal with the :only / :except options to :convert_values_to_numeric
83
+ unless limit_execution_for_only_or_except(options, :convert_values_to_numeric, k)
84
+ if v =~ /^[+-]?\d+\.\d+$/
85
+ v = v.to_f
86
+ elsif v =~ /^[+-]?\d+$/
87
+ v = v.to_i
88
+ end
89
+ end
90
+
91
+ new_hash[k] = v
92
+ end
93
+ end
94
+
95
+ #
96
+ # Building Blocks in case you want to build your own flow:
97
+ #
98
+
99
+ def value_converters(hash, _options)
100
+ #
101
+ # TO BE IMPLEMENTED
102
+ #
103
+ end
104
+
105
+ def strip_spaces(hash, _options)
106
+ hash.each_key {|key| hash[key].strip! unless hash[key].nil? } # &. syntax was introduced in Ruby 2.3 - need to stay backwards compatible
107
+ end
108
+
109
+ def remove_blank_values(hash, _options)
110
+ hash.each_key {|key| hash.delete(key) if hash[key].nil? || hash[key].is_a?(String) && hash[key] !~ /[^[:space:]]/ }
111
+ end
112
+
113
+ def remove_zero_values(hash, _options)
114
+ hash.each_key {|key| hash.delete(key) if hash[key].is_a?(Numeric) && hash[key].zero? }
115
+ end
116
+
117
+ def remove_empty_keys(hash, _options)
118
+ hash.reject!{|key, _v| key.nil? || key.empty?}
119
+ end
120
+
121
+ def convert_values_to_numeric(hash, _options)
122
+ hash.each_key do |k|
123
+ case hash[k]
124
+ when /^[+-]?\d+\.\d+$/
125
+ hash[k] = hash[k].to_f
126
+ when /^[+-]?\d+$/
127
+ hash[k] = hash[k].to_i
128
+ end
129
+ end
130
+ end
131
+
132
+ def convert_values_to_numeric_unless_leading_zeroes(hash, _options)
133
+ hash.each_key do |k|
134
+ case hash[k]
135
+ when /^[+-]?[1-9]\d*\.\d+$/
136
+ hash[k] = hash[k].to_f
137
+ when /^[+-]?[1-9]\d*$/
138
+ hash[k] = hash[k].to_i
139
+ end
140
+ end
141
+ end
142
+
143
+ # IMPORTANT NOTE:
144
+ # this can lead to cases where a nil or empty value gets converted into 0 or 0.0,
145
+ # and can then not be properly removed!
146
+ #
147
+ # you should first try to use convert_values_to_numeric or convert_values_to_numeric_unless_leading_zeroes
148
+ #
149
+ def convert_to_integer(hash, _options)
150
+ hash.each_key {|key| hash[key] = hash[key].to_i }
151
+ end
152
+
153
+ def convert_to_float(hash, _options)
154
+ hash.each_key {|key| hash[key] = hash[key].to_f }
155
+ end
76
156
 
77
157
  protected
78
158
 
@@ -2,8 +2,18 @@
2
2
 
3
3
  module SmarterCSV
4
4
  class << self
5
- # transform the headers that were in the file:
5
+ # this is processing the headers from the input file
6
6
  def header_transformations(header_array, options)
7
+ if options[:v2_mode]
8
+ header_transformations_v2(header_array, options)
9
+ else
10
+ header_transformations_v1(header_array, options)
11
+ end
12
+ end
13
+
14
+ # ---- V1.x Version: transform the headers that were in the file: ------------------------------------------
15
+ #
16
+ def header_transformations_v1(header_array, options)
7
17
  header_array.map!{|x| x.gsub(%r/#{options[:quote_char]}/, '')}
8
18
  header_array.map!{|x| x.strip} if options[:strip_whitespace]
9
19
 
@@ -57,7 +67,99 @@ module SmarterCSV
57
67
  header
58
68
  end
59
69
  end
70
+
60
71
  headers
61
72
  end
73
+
74
+ # ---- V2.x Version: transform the headers that were in the file: ------------------------------------------
75
+ #
76
+ def header_transformations_v2(header_array, options)
77
+ return header_array if options[:header_transformations].nil? || options[:header_transformations].empty?
78
+
79
+ # do the header transformations the user requested:
80
+ if options[:header_transformations]
81
+ options[:header_transformations].each do |transformation|
82
+ if transformation.respond_to?(:call) # this is used when a user-provided Proc is passed in
83
+ header_array = transformation.call(header_array, options)
84
+ else
85
+ case transformation
86
+ when Symbol # this is used for pre-defined transformations that are defined in the SmarterCSV module
87
+ header_array = public_send(transformation, header_array, options)
88
+ when Hash # this is called for hash arguments, e.g. header_transformations
89
+ trans, args = transformation.first # .first treats the hash first element as an array
90
+ header_array = apply_transformation(trans, header_array, args, options)
91
+ when Array # this can be used for passing additional arguments in array form (e.g. into a Proc)
92
+ trans, *args = transformation
93
+ header_array = apply_transformation(trans, header_array, args, options)
94
+ else
95
+ raise SmarterCSV::IncorrectOption, "Invalid transformation type: #{transformation.class}"
96
+ end
97
+ end
98
+ end
99
+ end
100
+
101
+ header_array
102
+ end
103
+
104
+ def apply_transformation(transformation, header_array, args, options)
105
+ if transformation.respond_to?(:call)
106
+ # If transformation is a callable object (like a Proc)
107
+ transformation.call(header_array, args, options)
108
+ else
109
+ # If transformation is a symbol (method name)
110
+ public_send(transformation, header_array, args, options)
111
+ end
112
+ end
113
+
114
+ # pre-defined v2 header transformations:
115
+
116
+ # these are some pre-defined header transformations which can be used
117
+ # all these take the headers array as the input
118
+ #
119
+ # the computed options can be accessed via @options
120
+
121
+ def keys_as_symbols(headers, options)
122
+ headers.map do |header|
123
+ header.strip.downcase.gsub(%r{#{options[:quote_char]}}, '').gsub(/(\s|-)+/, '_').to_sym
124
+ end
125
+ end
126
+
127
+ def keys_as_strings(headers, options)
128
+ headers.map do |header|
129
+ header.strip.gsub(%r{#{options[:quote_char]}}, '').downcase.gsub(/(\s|-)+/, '_')
130
+ end
131
+ end
132
+
133
+ def downcase_headers(headers, _options)
134
+ headers.map do |header|
135
+ header.strip.downcase!
136
+ end
137
+ end
138
+
139
+ def key_mapping(headers, mapping = {}, options)
140
+ raise(SmarterCSV::IncorrectOption, "ERROR: incorrect format for key_mapping! Expecting hash with from -> to mappings") if mapping.empty? || !mapping.is_a?(Hash)
141
+
142
+ headers_set = headers.to_set
143
+ mapping_keys_set = mapping.keys.to_set
144
+ silence_keys_set = (options[:silence_missing_keys] || []).to_set
145
+
146
+ # Check for missing keys
147
+ missing_keys = mapping_keys_set - headers_set - silence_keys_set
148
+ raise SmarterCSV::KeyMappingError, "ERROR: cannot map headers: #{missing_keys.to_a.join(', ')}" if missing_keys.any? && !options[:silence_missing_keys]
149
+
150
+ # Apply key mapping, retaining nils for explicitly mapped headers
151
+ headers.map do |header|
152
+ if mapping.key?(header)
153
+ # Maps the key according to the mapping, including nil mapping
154
+ mapping[header]
155
+ elsif options[:remove_unmapped_keys]
156
+ # Remove headers not specified in the mapping
157
+ nil
158
+ else
159
+ # Keep the original header if not specified in the mapping
160
+ header
161
+ end
162
+ end
163
+ end
62
164
  end
63
165
  end
@@ -3,11 +3,21 @@
3
3
  module SmarterCSV
4
4
  class << self
5
5
  def header_validations(headers, options)
6
- check_duplicate_headers(headers, options)
7
- check_required_headers(headers, options)
6
+ if options[:v2_mode]
7
+ header_validations_v2(headers, options)
8
+ else
9
+ header_validations_v1(headers, options)
10
+ end
11
+ end
12
+
13
+ # ---- V1.x Version: validate the headers -----------------------------------------------------------------
14
+
15
+ def header_validations_v1(headers, options)
16
+ check_duplicate_headers_v1(headers, options)
17
+ check_required_headers_v1(headers, options)
8
18
  end
9
19
 
10
- def check_duplicate_headers(headers, _options)
20
+ def check_duplicate_headers_v1(headers, _options)
11
21
  header_counts = Hash.new(0)
12
22
  headers.each { |header| header_counts[header] += 1 unless header.nil? }
13
23
 
@@ -18,9 +28,7 @@ module SmarterCSV
18
28
  end
19
29
  end
20
30
 
21
- require 'set'
22
-
23
- def check_required_headers(headers, options)
31
+ def check_required_headers_v1(headers, options)
24
32
  if options[:required_keys] && options[:required_keys].is_a?(Array)
25
33
  headers_set = headers.to_set
26
34
  missing_keys = options[:required_keys].select { |k| !headers_set.include?(k) }
@@ -30,5 +38,99 @@ module SmarterCSV
30
38
  end
31
39
  end
32
40
  end
41
+
42
+ # ---- V2.x Version: validate the headers -----------------------------------------------------------------
43
+
44
+ # def header_validations_v2(headers, options)
45
+ # return unless options[:header_validations]
46
+
47
+ # options[:header_validations].each do |validation|
48
+ # if validation.respond_to?(:call)
49
+ # # Directly call if it's a Proc or lambda
50
+ # validation.call(headers)
51
+ # else
52
+ # binding.pry
53
+ # # Handle Symbol, Hash, or Array
54
+ # method_name, args = validation.is_a?(Symbol) ? [validation, []] : validation
55
+ # public_send(method_name, headers, *Array(args))
56
+ # end
57
+ # end
58
+ # end
59
+
60
+ def header_validations_v2(headers, options)
61
+ return unless options[:header_validations]
62
+
63
+ # do the header validations the user requested:
64
+ # Header validations typically raise errors directly
65
+ #
66
+ options[:header_validations].each do |validation|
67
+ if validation.respond_to?(:call)
68
+ # Directly call if it's a Proc or lambda
69
+ validation.call(headers)
70
+ else
71
+ case validation
72
+ when Symbol
73
+ public_send(validation, headers)
74
+ when Hash
75
+ val, args = validation.first
76
+ public_send(val, headers, args)
77
+ when Array
78
+ val, *args = validation
79
+ public_send(val, headers, args)
80
+ else
81
+ raise SmarterCSV::IncorrectOption, "Invalid validation type: #{validation.class}"
82
+ end
83
+ end
84
+ end
85
+ end
86
+
87
+ # def header_validations_v2_orig(headers, options)
88
+ # # do the header validations the user requested:
89
+ # # Header validations typically raise errors directly
90
+ # #
91
+ # if options[:header_validations]
92
+ # options[:header_validations].each do |validation|
93
+ # case validation
94
+ # when Symbol
95
+ # public_send(validation, headers)
96
+ # when Hash
97
+ # val, args = validation.first
98
+ # public_send(val, headers, args)
99
+ # when Array
100
+ # val, args = validation
101
+ # public_send(val, headers, args)
102
+ # else
103
+ # validation.call(headers) unless validation.nil?
104
+ # end
105
+ # end
106
+ # end
107
+ # end
108
+
109
+ # these are some pre-defined header validations which can be used
110
+ # all these take the headers array as the input
111
+ #
112
+ # the computed options can be accessed via @options
113
+
114
+ def unique_headers(headers)
115
+ header_counts = Hash.new(0)
116
+ headers.each { |header| header_counts[header] += 1 unless header.nil? }
117
+
118
+ duplicates = header_counts.select { |_, count| count > 1 }
119
+
120
+ unless duplicates.empty?
121
+ raise(SmarterCSV::DuplicateHeaders, "Duplicate Headers in CSV: #{duplicates.inspect}")
122
+ end
123
+ end
124
+
125
+ def required_headers(headers, required = [])
126
+ raise(SmarterCSV::IncorrectOption, "ERROR: required_headers validation needs an array argument") unless required.is_a?(Array)
127
+
128
+ headers_set = headers.to_set
129
+ missing = required.select { |r| !headers_set.include?(r) }
130
+
131
+ unless missing.empty?
132
+ raise(SmarterCSV::MissingKeys, "Missing Headers in CSV: #{missing.inspect}")
133
+ end
134
+ end
33
135
  end
34
136
  end
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SmarterCSV
4
- DEFAULT_OPTIONS = {
4
+ COMMON_OPTIONS = {
5
5
  acceleration: true,
6
6
  auto_row_sep_chars: 500,
7
7
  chunk_size: nil,
@@ -15,28 +15,51 @@ module SmarterCSV
15
15
  force_utf8: false,
16
16
  headers_in_file: true,
17
17
  invalid_byte_sequence: '',
18
+ quote_char: '"',
19
+ remove_unmapped_keys: false,
20
+ row_sep: :auto, # was: $/,
21
+ silence_deprecations: false, # new in 1.11
22
+ silence_missing_keys: false,
23
+ skip_lines: nil,
24
+ user_provided_headers: nil,
25
+ verbose: false,
26
+ with_line_numbers: false,
27
+ v2_mode: false,
28
+ }.freeze
29
+
30
+ V1_DEFAULT_OPTIONS = {
18
31
  keep_original_headers: false,
19
32
  key_mapping: nil,
20
- quote_char: '"',
21
33
  remove_empty_hashes: true,
22
34
  remove_empty_values: true,
23
- remove_unmapped_keys: false,
24
35
  remove_values_matching: nil,
25
36
  remove_zero_values: false,
26
37
  required_headers: nil,
27
38
  required_keys: nil,
28
- row_sep: :auto, # was: $/,
29
- silence_missing_keys: false,
30
- skip_lines: nil,
31
39
  strings_as_keys: false,
32
40
  strip_chars_from_headers: nil,
33
41
  strip_whitespace: true,
34
- user_provided_headers: nil,
35
42
  value_converters: nil,
36
- verbose: false,
37
- with_line_numbers: false,
43
+ v2_mode: false,
38
44
  }.freeze
39
45
 
46
+ DEPRECATED_OPTIONS = [
47
+ :convert_values_to_numeric,
48
+ :downcase_headers,
49
+ :keep_original_headers,
50
+ :key_mapping,
51
+ :remove_empty_hashes,
52
+ :remove_empty_values,
53
+ :remove_values_matching,
54
+ :remove_zero_values,
55
+ :required_headers,
56
+ :required_keys,
57
+ :stirngs_as_keys,
58
+ :strip_cars_from_headers,
59
+ :strip_whitespace,
60
+ :value_converters,
61
+ ].freeze
62
+
40
63
  class << self
41
64
  # NOTE: this is not called when "parse" methods are tested by themselves
42
65
  def process_options(given_options = {})
@@ -45,7 +68,12 @@ module SmarterCSV
45
68
  # fix invalid input
46
69
  given_options[:invalid_byte_sequence] = '' if given_options[:invalid_byte_sequence].nil?
47
70
 
48
- @options = DEFAULT_OPTIONS.dup.merge!(given_options)
71
+ # warn about deprecated options / raises error for v2_mode
72
+ handle_deprecations(given_options)
73
+
74
+ given_options = preprocess_v2_options(given_options) if given_options[:v2_mode]
75
+
76
+ @options = compute_default_options(given_options).merge!(given_options)
49
77
  puts "Computed options:\n#{pp(@options)}\n" if given_options[:verbose]
50
78
 
51
79
  validate_options!(@options)
@@ -56,11 +84,35 @@ module SmarterCSV
56
84
  #
57
85
  # ONLY FOR BACKWARDS-COMPATIBILITY
58
86
  def default_options
59
- DEFAULT_OPTIONS
87
+ COMMON_OPTIONS.merge(V1_DEFAULT_OPTIONS)
60
88
  end
61
89
 
62
90
  private
63
91
 
92
+ def compute_default_options(options = {})
93
+ return COMMON_OPTIONS.merge(V1_DEFAULT_OPTIONS) unless options[:v2_mode]
94
+
95
+ default_options = {}
96
+ if options[:defaults].to_s != 'none'
97
+ default_options = COMMON_OPTIONS.dup.merge(V2_DEFAULT_OPTIONS)
98
+ if options[:defaults].to_s == 'v1'
99
+ default_options.merge(V1_TRANSFORMATIONS)
100
+ else
101
+ default_options.merge(V2_TRANSFORMATIONS)
102
+ end
103
+ end
104
+ end
105
+
106
+ def handle_deprecations(options)
107
+ used_deprecated_options = DEPRECATED_OPTIONS & options.keys
108
+ message = "SmarterCSV #{VERSION} DEPRECATED OPTIONS: #{pp(used_deprecated_options)}"
109
+ if options[:v2_mode]
110
+ raise(SmarterCSV::DeprecatedOptions, "ERROR: #{message}") unless used_deprecated_options.empty? || options[:silence_deprecations]
111
+ else
112
+ puts "DEPRECATION WARNING: #{message}" unless used_deprecated_options.empty? || options[:silence_deprecations]
113
+ end
114
+ end
115
+
64
116
  def validate_options!(options)
65
117
  # deprecate required_headers
66
118
  unless options[:required_headers].nil?
@@ -89,5 +141,57 @@ module SmarterCSV
89
141
  def pp(value)
90
142
  defined?(AwesomePrint) ? value.awesome_inspect(index: nil) : value.inspect
91
143
  end
144
+
145
+ # ---- V2 code ----------------------------------------------------------------------------------------
146
+
147
+ V2_DEFAULT_OPTIONS = {
148
+ # These need to go to the COMMON_OPTIONS:
149
+ remove_empty_hashes: true, # this might need a transformation or move to common options
150
+ # ------------
151
+ header_transformations: [:keys_as_symbols],
152
+ header_validations: [:unique_headers],
153
+ # data_transformations: [:replace_blank_with_nil],
154
+ # data_validations: [],
155
+ hash_transformations: [:strip_spaces, :remove_blank_values],
156
+ hash_validations: [],
157
+ v2_mode: true,
158
+ }.freeze
159
+
160
+ V2_TRANSFORMATIONS = {
161
+ header_transformations: [:keys_as_symbols],
162
+ header_validations: [:unique_headers],
163
+ # data_transformations: [:replace_blank_with_nil],
164
+ # data_validations: [],
165
+ hash_transformations: [:v1_backwards_compatibility],
166
+ # hash_transformations: [:remove_empty_keys, :strip_spaces, :remove_blank_values, :convert_values_to_numeric], # ??? :convert_values_to_numeric]
167
+ hash_validations: [],
168
+ }.freeze
169
+
170
+ V1_TRANSFORMATIONS = {
171
+ header_transformations: [:keys_as_symbols],
172
+ header_validations: [:unique_headers],
173
+ # data_transformations: [:replace_blank_with_nil],
174
+ # data_validations: [],
175
+ hash_transformations: [:strip_spaces, :remove_blank_values, :convert_values_to_numeric],
176
+ hash_validations: [],
177
+ }.freeze
178
+
179
+ def preprocess_v2_options(options)
180
+ return options unless options[:v2_mode] || options[:header_transformations]
181
+
182
+ # We want to provide safe defaults for easy processing, that is why we have a special keyword :none
183
+ # to not do any header transformations..
184
+ #
185
+ # this is why we need to remove the 'none' here:
186
+ #
187
+ requested_header_transformations = options[:header_transformations]
188
+ if requested_header_transformations.to_s == 'none'
189
+ requested_header_transformations = []
190
+ else
191
+ requested_header_transformations = requested_header_transformations.reject {|x| x.to_s == 'none'} unless requested_header_transformations.nil?
192
+ end
193
+ options[:header_transformations] = requested_header_transformations || []
194
+ options
195
+ end
92
196
  end
93
197
  end
@@ -2,6 +2,7 @@
2
2
 
3
3
  module SmarterCSV
4
4
  class SmarterCSVException < StandardError; end
5
+ class DeprecatedOptions < SmarterCSVException; end
5
6
  class HeaderSizeMismatch < SmarterCSVException; end
6
7
  class IncorrectOption < SmarterCSVException; end
7
8
  class ValidationError < SmarterCSVException; end
@@ -108,6 +109,10 @@ module SmarterCSV
108
109
 
109
110
  next if options[:remove_empty_hashes] && hash.empty?
110
111
 
112
+ #
113
+ # should HASH VALIDATIONS go here instead?
114
+ #
115
+
111
116
  puts "CSV Line #{@file_line_count}: #{pp(hash)}" if @verbose == '2' # very verbose setting
112
117
  # optional adding of csv_line_number to the hash to help debugging
113
118
  hash[:csv_line_number] = @csv_line_count if options[:with_line_numbers]
@@ -165,22 +170,19 @@ module SmarterCSV
165
170
  end
166
171
 
167
172
  class << self
173
+ # Counts the number of quote characters in a line, excluding escaped quotes.
174
+ # FYI: using Ruby built-in regex processing to determine the number of quotes
168
175
  def count_quote_chars(line, quote_char)
169
176
  return 0 if line.nil? || quote_char.nil? || quote_char.empty?
170
177
 
171
- count = 0
172
- escaped = false
178
+ # Escaped quote character (e.g., if quote_char is ", then escaped is \")
179
+ escaped_quote = Regexp.escape(quote_char)
173
180
 
174
- line.each_char do |char|
175
- if char == '\\' && !escaped
176
- escaped = true
177
- else
178
- count += 1 if char == quote_char && !escaped
179
- escaped = false
180
- end
181
- end
181
+ # Pattern to match a quote character not preceded by a backslash
182
+ pattern = /(?<!\\)(?:\\\\)*#{escaped_quote}/
182
183
 
183
- count
184
+ # Count occurrences
185
+ line.scan(pattern).count
184
186
  end
185
187
 
186
188
  def has_acceleration?
@@ -15,6 +15,7 @@ module SmarterCSV
15
15
  @raw_header = nil # header as it appears in the file
16
16
  @result = []
17
17
  @warnings = {}
18
+ @v2_mode = false
18
19
  @enforce_utf8 = false # only set to true if needed (after options parsing)
19
20
  end
20
21
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SmarterCSV
4
- VERSION = "1.10.1"
4
+ VERSION = "1.11.0.pre2"
5
5
  end
data/lib/smarter_csv.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'set'
4
+
3
5
  require "smarter_csv/version"
4
6
  require "smarter_csv/file_io"
5
7
  require "smarter_csv/options_processing"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: smarter_csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.10.1
4
+ version: 1.11.0.pre2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tilo Sloboda
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-01-07 00:00:00.000000000 Z
11
+ date: 2024-01-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: awesome_print
@@ -147,9 +147,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
147
147
  version: 2.5.0
148
148
  required_rubygems_version: !ruby/object:Gem::Requirement
149
149
  requirements:
150
- - - ">="
150
+ - - ">"
151
151
  - !ruby/object:Gem::Version
152
- version: '0'
152
+ version: 1.3.1
153
153
  requirements: []
154
154
  rubygems_version: 3.2.3
155
155
  signing_key: