smarter_csv 1.16.1 → 1.16.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,229 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module SmarterCSV
4
- #
5
- # NOTE: this is not called when "parse" methods are tested by themselves
6
- #
7
- # ONLY FOR BACKWARDS-COMPATIBILITY
8
- def self.default_options
9
- Options::DEFAULT_OPTIONS
10
- end
11
-
12
- module Options
13
- DEFAULT_OPTIONS = {
14
- acceleration: true, # if user wants to use accelleration or not
15
- auto_row_sep_chars: 500,
16
- bad_row_limit: nil,
17
- chunk_size: nil,
18
- col_sep: :auto, # was: ',',
19
- collect_raw_lines: true,
20
- comment_regexp: nil, # was: /\A#/,
21
- convert_values_to_numeric: true,
22
- downcase_header: true,
23
- duplicate_header_suffix: '', # was: nil,
24
- field_size_limit: nil, # Integer (bytes) or nil for no limit. Raises FieldSizeLimitExceeded if any
25
- # extracted field exceeds this size. Prevents DoS from runaway quoted
26
- # fields (unbounded multiline stitching) or huge inline payloads.
27
- file_encoding: 'utf-8',
28
- force_utf8: false,
29
- headers_in_file: true,
30
- invalid_byte_sequence: '',
31
- keep_original_headers: false,
32
- key_mapping: nil,
33
- strict: false, # DEPRECATED -> use missing_headers
34
- missing_headers: :auto, # :auto (auto-generate names for extra cols) or :raise (raise HeaderSizeMismatch)
35
- missing_header_prefix: 'column_',
36
- nil_values_matching: nil, # regex: set matching values to nil (key kept); pairs with remove_empty_values
37
- on_bad_row: :raise,
38
- on_chunk: nil, # callable: fired after each chunk is parsed, before yielding to the block
39
- on_complete: nil, # callable: fired once after the entire file is processed
40
- on_start: nil, # callable: fired once before the first row is parsed
41
- quote_boundary: :standard, # :standard (only at field boundary 👍) or :legacy (any quote toggles state 👎)
42
- quote_char: '"',
43
- quote_escaping: :auto,
44
- remove_empty_hashes: true,
45
- remove_empty_values: true,
46
- remove_unmapped_keys: false,
47
- remove_values_matching: nil, # DEPRECATED: use nil_values_matching instead
48
- remove_zero_values: false,
49
- required_headers: nil,
50
- required_keys: nil,
51
- row_sep: :auto, # was: $/,
52
- silence_missing_keys: false,
53
- skip_lines: nil,
54
- strings_as_keys: false,
55
- strip_chars_from_headers: nil,
56
- strip_whitespace: true,
57
- user_provided_headers: nil,
58
- value_converters: nil,
59
- verbose: :normal, # nil/:normal (default), :quiet (suppress warnings), :debug (print diagnostics); true/false are deprecated
60
- with_line_numbers: false,
61
- }.freeze
62
-
63
- # NOTE: this is not called when "parse" methods are tested by themselves
64
- def process_options(given_options = {})
65
- # Debug output before merge — check raw verbose value (true or :debug)
66
- $stderr.puts "User provided options:\n#{pp(given_options)}\n" if [true, :debug].include?(given_options[:verbose])
67
-
68
- # Special case for :user_provided_headers:
69
- #
70
- # If we would use the default `headers_in_file: true`, and `:user_provided_headers` are given,
71
- # we could lose the first data row
72
- #
73
- # We now err on the side of treating an actual header as data, rather than losing a data row.
74
- #
75
- if given_options[:user_provided_headers] && !given_options.keys.include?(:headers_in_file)
76
- given_options[:headers_in_file] = false
77
- warn "WARNING: setting `headers_in_file: false` as a precaution to not lose the first row. Set explicitly to `true` if you have headers." unless given_options[:verbose] == :quiet
78
- end
79
-
80
- @options = DEFAULT_OPTIONS.dup.merge!(given_options)
81
-
82
- # Normalize verbose to a symbol — done once here, stored back into @options.
83
- # All subsequent checks are free symbol comparisons; no re-evaluation needed.
84
- # :quiet — suppress all warnings and notices (good for production)
85
- # :normal — show behavioral warnings (default; helpful for new users)
86
- # :debug — :normal + print computed options and per-row diagnostics
87
- # nil is silently normalized to :normal; true/false are deprecated.
88
- case @options[:verbose]
89
- when :quiet, :normal, :debug
90
- # keep as is
91
- when nil
92
- @options[:verbose] = :normal
93
- when false
94
- warn "DEPRECATION WARNING: verbose: false is deprecated. Use verbose: :normal instead (or omit — it is the default)."
95
- @options[:verbose] = :normal
96
- when true
97
- warn "DEPRECATION WARNING: verbose: true is deprecated. Use verbose: :debug instead."
98
- @options[:verbose] = :debug
99
- else
100
- warn "WARNING: unknown verbose value #{@options[:verbose].inspect}, defaulting to :normal. Valid values: :quiet, :normal, :debug."
101
- @options[:verbose] = :normal
102
- end
103
-
104
- # fix invalid input
105
- @options[:invalid_byte_sequence] ||= ''
106
-
107
- # Normalize headers: { only: [...] } / { except: [...] } to internal option names.
108
- # The public API is headers: { only: } or headers: { except: }.
109
- # Internally we use only_headers: / except_headers: (what the C extension reads).
110
- if (hdr = @options.delete(:headers)).is_a?(Hash)
111
- @options[:only_headers] = hdr[:only] if hdr.key?(:only)
112
- @options[:except_headers] = hdr[:except] if hdr.key?(:except)
113
- end
114
-
115
- # Deprecation: direct use of only_headers: / except_headers: (use headers: { only: } instead)
116
- if given_options.key?(:only_headers) && !given_options.key?(:headers)
117
- warn "DEPRECATION WARNING: 'only_headers:' is deprecated. Use 'headers: { only: [...] }' instead." unless @options[:verbose] == :quiet
118
- end
119
- if given_options.key?(:except_headers) && !given_options.key?(:headers)
120
- warn "DEPRECATION WARNING: 'except_headers:' is deprecated. Use 'headers: { except: [...] }' instead." unless @options[:verbose] == :quiet
121
- end
122
-
123
- # Normalize only_headers/except_headers to arrays of symbols (internal names, read by C extension)
124
- if @options[:only_headers]
125
- values = Array(@options[:only_headers])
126
- bad = values.reject { |v| v.is_a?(Symbol) || v.is_a?(String) }
127
- raise SmarterCSV::ValidationError, "headers: { only: } elements must be String or Symbol, got: #{bad.map(&:class).uniq.inspect}" if bad.any?
128
- @options[:only_headers] = values.map(&:to_sym)
129
- end
130
- if @options[:except_headers]
131
- values = Array(@options[:except_headers])
132
- bad = values.reject { |v| v.is_a?(Symbol) || v.is_a?(String) }
133
- raise SmarterCSV::ValidationError, "headers: { except: } elements must be String or Symbol, got: #{bad.map(&:class).uniq.inspect}" if bad.any?
134
- @options[:except_headers] = values.map(&:to_sym)
135
- end
136
-
137
- # Deprecation: remove_values_matching → nil_values_matching
138
- # Old behavior: removes the key-value pair entirely.
139
- # New behavior: nil_values_matching sets the value to nil (key kept);
140
- # combined with the default remove_empty_values: true the net effect is identical.
141
- # With remove_empty_values: false, the key is retained with a nil value.
142
- if given_options.key?(:remove_values_matching)
143
- unless @options[:verbose] == :quiet
144
- warn "DEPRECATION WARNING: 'remove_values_matching' is deprecated. " \
145
- "Use 'nil_values_matching' instead. With the default 'remove_empty_values: true' " \
146
- "the net behavior is identical. With 'remove_empty_values: false', matching values " \
147
- "are set to nil but the key is retained in the result hash."
148
- end
149
- @options[:nil_values_matching] ||= @options[:remove_values_matching]
150
- @options[:remove_values_matching] = nil # clear to prevent double-processing
151
- end
152
-
153
- # Translate deprecated :strict option to :missing_headers
154
- if given_options.key?(:strict)
155
- unless @options[:verbose] == :quiet
156
- warn "DEPRECATION WARNING: 'strict' option is deprecated and will be removed in a future version. " \
157
- "Use 'missing_headers: :raise' instead of 'strict: true', or 'missing_headers: :auto' instead of 'strict: false'."
158
- end
159
- @options[:missing_headers] = @options[:strict] ? :raise : :auto unless given_options.key?(:missing_headers)
160
- end
161
-
162
- # Keep :strict synchronized with :missing_headers (C extension reads :strict directly)
163
- @options[:strict] = (@options[:missing_headers] == :raise)
164
-
165
- $stderr.puts "Computed options:\n#{pp(@options)}\n" if @options[:verbose] == :debug
166
-
167
- validate_options!(@options)
168
- @options
169
- end
170
-
171
- private
172
-
173
- def validate_options!(options)
174
- # deprecate required_headers
175
- unless options[:required_headers].nil?
176
- warn "DEPRECATION WARNING: please use 'required_keys' instead of 'required_headers'" unless options[:verbose] == :quiet
177
- if options[:required_keys].nil?
178
- options[:required_keys] = options[:required_headers]
179
- options[:required_headers] = nil
180
- end
181
- end
182
-
183
- keys = options.keys
184
- errors = []
185
- errors << "invalid row_sep" if keys.include?(:row_sep) && !option_valid?(options[:row_sep])
186
- errors << "invalid col_sep" if keys.include?(:col_sep) && !option_valid?(options[:col_sep])
187
- errors << "invalid quote_char" if keys.include?(:quote_char) && !option_valid?(options[:quote_char])
188
- if keys.include?(:quote_char) && options[:quote_char].is_a?(String) && options[:quote_char].bytesize > 1
189
- errors << "invalid quote_char: must be a single byte (got #{options[:quote_char].inspect})"
190
- end
191
- unless %i[double_quotes backslash auto].include?(options[:quote_escaping])
192
- errors << "invalid quote_escaping: must be :double_quotes, :backslash, or :auto"
193
- end
194
- unless %i[legacy standard].include?(options[:quote_boundary])
195
- errors << "invalid quote_boundary: must be :legacy or :standard"
196
- end
197
- fsl = options[:field_size_limit]
198
- unless fsl.nil? || (fsl.is_a?(Integer) && fsl > 0)
199
- errors << "invalid field_size_limit: must be nil or a positive Integer (got #{fsl.inspect})"
200
- end
201
- obr = options[:on_bad_row]
202
- unless %i[raise skip collect].include?(obr) || obr.respond_to?(:call)
203
- errors << "invalid on_bad_row: must be :raise, :skip, :collect, or a callable"
204
- end
205
- %i[on_start on_chunk on_complete].each do |hook|
206
- val = options[hook]
207
- errors << "invalid #{hook}: must be nil or a callable" if !val.nil? && !val.respond_to?(:call)
208
- end
209
- unless %i[auto raise].include?(options[:missing_headers])
210
- errors << "invalid missing_headers: must be :auto or :raise"
211
- end
212
- if options[:only_headers] && options[:except_headers]
213
- errors << "cannot use both 'headers: { only: }' and 'headers: { except: }' at the same time"
214
- end
215
- raise SmarterCSV::ValidationError, errors.inspect if errors.any?
216
- end
217
-
218
- def option_valid?(str)
219
- return true if str.is_a?(Symbol) && str == :auto
220
- return true if str.is_a?(String) && !str.empty?
221
-
222
- false
223
- end
224
-
225
- def pp(value)
226
- defined?(AwesomePrint) ? value.awesome_inspect(index: nil) : value.inspect
227
- end
228
- end
229
- end