smarter_csv 1.16.1 → 1.16.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -1
- data/CONTRIBUTORS.md +2 -1
- data/README.md +1 -1
- data/docs/basic_write_api.md +48 -0
- data/docs/options.md +2 -1
- data/docs/releases/1.16.0/changes.md +0 -2
- data/docs/ruby_csv_pitfalls.md +228 -197
- data/ext/smarter_csv/Makefile +14 -17
- data/lib/smarter_csv/hash_transformations.rb +1 -1
- data/lib/smarter_csv/header_transformations.rb +11 -9
- data/lib/smarter_csv/reader.rb +7 -3
- data/lib/smarter_csv/reader_options.rb +223 -0
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv/writer.rb +36 -26
- data/lib/smarter_csv/writer_options.rb +26 -0
- data/lib/smarter_csv.rb +2 -1
- metadata +4 -8
- data/ext/smarter_csv/smarter_csv.bundle +0 -0
- data/ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Info.plist +0 -20
- data/ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Resources/DWARF/smarter_csv.bundle +0 -0
- data/ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Resources/Relocations/aarch64/smarter_csv.bundle.yml +0 -5
- data/ext/smarter_csv/smarter_csv.o +0 -0
- data/lib/smarter_csv/options.rb +0 -229
data/lib/smarter_csv/options.rb
DELETED
|
@@ -1,229 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module SmarterCSV
|
|
4
|
-
#
|
|
5
|
-
# NOTE: this is not called when "parse" methods are tested by themselves
|
|
6
|
-
#
|
|
7
|
-
# ONLY FOR BACKWARDS-COMPATIBILITY
|
|
8
|
-
def self.default_options
|
|
9
|
-
Options::DEFAULT_OPTIONS
|
|
10
|
-
end
|
|
11
|
-
|
|
12
|
-
module Options
|
|
13
|
-
DEFAULT_OPTIONS = {
|
|
14
|
-
acceleration: true, # if user wants to use accelleration or not
|
|
15
|
-
auto_row_sep_chars: 500,
|
|
16
|
-
bad_row_limit: nil,
|
|
17
|
-
chunk_size: nil,
|
|
18
|
-
col_sep: :auto, # was: ',',
|
|
19
|
-
collect_raw_lines: true,
|
|
20
|
-
comment_regexp: nil, # was: /\A#/,
|
|
21
|
-
convert_values_to_numeric: true,
|
|
22
|
-
downcase_header: true,
|
|
23
|
-
duplicate_header_suffix: '', # was: nil,
|
|
24
|
-
field_size_limit: nil, # Integer (bytes) or nil for no limit. Raises FieldSizeLimitExceeded if any
|
|
25
|
-
# extracted field exceeds this size. Prevents DoS from runaway quoted
|
|
26
|
-
# fields (unbounded multiline stitching) or huge inline payloads.
|
|
27
|
-
file_encoding: 'utf-8',
|
|
28
|
-
force_utf8: false,
|
|
29
|
-
headers_in_file: true,
|
|
30
|
-
invalid_byte_sequence: '',
|
|
31
|
-
keep_original_headers: false,
|
|
32
|
-
key_mapping: nil,
|
|
33
|
-
strict: false, # DEPRECATED -> use missing_headers
|
|
34
|
-
missing_headers: :auto, # :auto (auto-generate names for extra cols) or :raise (raise HeaderSizeMismatch)
|
|
35
|
-
missing_header_prefix: 'column_',
|
|
36
|
-
nil_values_matching: nil, # regex: set matching values to nil (key kept); pairs with remove_empty_values
|
|
37
|
-
on_bad_row: :raise,
|
|
38
|
-
on_chunk: nil, # callable: fired after each chunk is parsed, before yielding to the block
|
|
39
|
-
on_complete: nil, # callable: fired once after the entire file is processed
|
|
40
|
-
on_start: nil, # callable: fired once before the first row is parsed
|
|
41
|
-
quote_boundary: :standard, # :standard (only at field boundary 👍) or :legacy (any quote toggles state 👎)
|
|
42
|
-
quote_char: '"',
|
|
43
|
-
quote_escaping: :auto,
|
|
44
|
-
remove_empty_hashes: true,
|
|
45
|
-
remove_empty_values: true,
|
|
46
|
-
remove_unmapped_keys: false,
|
|
47
|
-
remove_values_matching: nil, # DEPRECATED: use nil_values_matching instead
|
|
48
|
-
remove_zero_values: false,
|
|
49
|
-
required_headers: nil,
|
|
50
|
-
required_keys: nil,
|
|
51
|
-
row_sep: :auto, # was: $/,
|
|
52
|
-
silence_missing_keys: false,
|
|
53
|
-
skip_lines: nil,
|
|
54
|
-
strings_as_keys: false,
|
|
55
|
-
strip_chars_from_headers: nil,
|
|
56
|
-
strip_whitespace: true,
|
|
57
|
-
user_provided_headers: nil,
|
|
58
|
-
value_converters: nil,
|
|
59
|
-
verbose: :normal, # nil/:normal (default), :quiet (suppress warnings), :debug (print diagnostics); true/false are deprecated
|
|
60
|
-
with_line_numbers: false,
|
|
61
|
-
}.freeze
|
|
62
|
-
|
|
63
|
-
# NOTE: this is not called when "parse" methods are tested by themselves
|
|
64
|
-
def process_options(given_options = {})
|
|
65
|
-
# Debug output before merge — check raw verbose value (true or :debug)
|
|
66
|
-
$stderr.puts "User provided options:\n#{pp(given_options)}\n" if [true, :debug].include?(given_options[:verbose])
|
|
67
|
-
|
|
68
|
-
# Special case for :user_provided_headers:
|
|
69
|
-
#
|
|
70
|
-
# If we would use the default `headers_in_file: true`, and `:user_provided_headers` are given,
|
|
71
|
-
# we could lose the first data row
|
|
72
|
-
#
|
|
73
|
-
# We now err on the side of treating an actual header as data, rather than losing a data row.
|
|
74
|
-
#
|
|
75
|
-
if given_options[:user_provided_headers] && !given_options.keys.include?(:headers_in_file)
|
|
76
|
-
given_options[:headers_in_file] = false
|
|
77
|
-
warn "WARNING: setting `headers_in_file: false` as a precaution to not lose the first row. Set explicitly to `true` if you have headers." unless given_options[:verbose] == :quiet
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
@options = DEFAULT_OPTIONS.dup.merge!(given_options)
|
|
81
|
-
|
|
82
|
-
# Normalize verbose to a symbol — done once here, stored back into @options.
|
|
83
|
-
# All subsequent checks are free symbol comparisons; no re-evaluation needed.
|
|
84
|
-
# :quiet — suppress all warnings and notices (good for production)
|
|
85
|
-
# :normal — show behavioral warnings (default; helpful for new users)
|
|
86
|
-
# :debug — :normal + print computed options and per-row diagnostics
|
|
87
|
-
# nil is silently normalized to :normal; true/false are deprecated.
|
|
88
|
-
case @options[:verbose]
|
|
89
|
-
when :quiet, :normal, :debug
|
|
90
|
-
# keep as is
|
|
91
|
-
when nil
|
|
92
|
-
@options[:verbose] = :normal
|
|
93
|
-
when false
|
|
94
|
-
warn "DEPRECATION WARNING: verbose: false is deprecated. Use verbose: :normal instead (or omit — it is the default)."
|
|
95
|
-
@options[:verbose] = :normal
|
|
96
|
-
when true
|
|
97
|
-
warn "DEPRECATION WARNING: verbose: true is deprecated. Use verbose: :debug instead."
|
|
98
|
-
@options[:verbose] = :debug
|
|
99
|
-
else
|
|
100
|
-
warn "WARNING: unknown verbose value #{@options[:verbose].inspect}, defaulting to :normal. Valid values: :quiet, :normal, :debug."
|
|
101
|
-
@options[:verbose] = :normal
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
# fix invalid input
|
|
105
|
-
@options[:invalid_byte_sequence] ||= ''
|
|
106
|
-
|
|
107
|
-
# Normalize headers: { only: [...] } / { except: [...] } to internal option names.
|
|
108
|
-
# The public API is headers: { only: } or headers: { except: }.
|
|
109
|
-
# Internally we use only_headers: / except_headers: (what the C extension reads).
|
|
110
|
-
if (hdr = @options.delete(:headers)).is_a?(Hash)
|
|
111
|
-
@options[:only_headers] = hdr[:only] if hdr.key?(:only)
|
|
112
|
-
@options[:except_headers] = hdr[:except] if hdr.key?(:except)
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
# Deprecation: direct use of only_headers: / except_headers: (use headers: { only: } instead)
|
|
116
|
-
if given_options.key?(:only_headers) && !given_options.key?(:headers)
|
|
117
|
-
warn "DEPRECATION WARNING: 'only_headers:' is deprecated. Use 'headers: { only: [...] }' instead." unless @options[:verbose] == :quiet
|
|
118
|
-
end
|
|
119
|
-
if given_options.key?(:except_headers) && !given_options.key?(:headers)
|
|
120
|
-
warn "DEPRECATION WARNING: 'except_headers:' is deprecated. Use 'headers: { except: [...] }' instead." unless @options[:verbose] == :quiet
|
|
121
|
-
end
|
|
122
|
-
|
|
123
|
-
# Normalize only_headers/except_headers to arrays of symbols (internal names, read by C extension)
|
|
124
|
-
if @options[:only_headers]
|
|
125
|
-
values = Array(@options[:only_headers])
|
|
126
|
-
bad = values.reject { |v| v.is_a?(Symbol) || v.is_a?(String) }
|
|
127
|
-
raise SmarterCSV::ValidationError, "headers: { only: } elements must be String or Symbol, got: #{bad.map(&:class).uniq.inspect}" if bad.any?
|
|
128
|
-
@options[:only_headers] = values.map(&:to_sym)
|
|
129
|
-
end
|
|
130
|
-
if @options[:except_headers]
|
|
131
|
-
values = Array(@options[:except_headers])
|
|
132
|
-
bad = values.reject { |v| v.is_a?(Symbol) || v.is_a?(String) }
|
|
133
|
-
raise SmarterCSV::ValidationError, "headers: { except: } elements must be String or Symbol, got: #{bad.map(&:class).uniq.inspect}" if bad.any?
|
|
134
|
-
@options[:except_headers] = values.map(&:to_sym)
|
|
135
|
-
end
|
|
136
|
-
|
|
137
|
-
# Deprecation: remove_values_matching → nil_values_matching
|
|
138
|
-
# Old behavior: removes the key-value pair entirely.
|
|
139
|
-
# New behavior: nil_values_matching sets the value to nil (key kept);
|
|
140
|
-
# combined with the default remove_empty_values: true the net effect is identical.
|
|
141
|
-
# With remove_empty_values: false, the key is retained with a nil value.
|
|
142
|
-
if given_options.key?(:remove_values_matching)
|
|
143
|
-
unless @options[:verbose] == :quiet
|
|
144
|
-
warn "DEPRECATION WARNING: 'remove_values_matching' is deprecated. " \
|
|
145
|
-
"Use 'nil_values_matching' instead. With the default 'remove_empty_values: true' " \
|
|
146
|
-
"the net behavior is identical. With 'remove_empty_values: false', matching values " \
|
|
147
|
-
"are set to nil but the key is retained in the result hash."
|
|
148
|
-
end
|
|
149
|
-
@options[:nil_values_matching] ||= @options[:remove_values_matching]
|
|
150
|
-
@options[:remove_values_matching] = nil # clear to prevent double-processing
|
|
151
|
-
end
|
|
152
|
-
|
|
153
|
-
# Translate deprecated :strict option to :missing_headers
|
|
154
|
-
if given_options.key?(:strict)
|
|
155
|
-
unless @options[:verbose] == :quiet
|
|
156
|
-
warn "DEPRECATION WARNING: 'strict' option is deprecated and will be removed in a future version. " \
|
|
157
|
-
"Use 'missing_headers: :raise' instead of 'strict: true', or 'missing_headers: :auto' instead of 'strict: false'."
|
|
158
|
-
end
|
|
159
|
-
@options[:missing_headers] = @options[:strict] ? :raise : :auto unless given_options.key?(:missing_headers)
|
|
160
|
-
end
|
|
161
|
-
|
|
162
|
-
# Keep :strict synchronized with :missing_headers (C extension reads :strict directly)
|
|
163
|
-
@options[:strict] = (@options[:missing_headers] == :raise)
|
|
164
|
-
|
|
165
|
-
$stderr.puts "Computed options:\n#{pp(@options)}\n" if @options[:verbose] == :debug
|
|
166
|
-
|
|
167
|
-
validate_options!(@options)
|
|
168
|
-
@options
|
|
169
|
-
end
|
|
170
|
-
|
|
171
|
-
private
|
|
172
|
-
|
|
173
|
-
def validate_options!(options)
|
|
174
|
-
# deprecate required_headers
|
|
175
|
-
unless options[:required_headers].nil?
|
|
176
|
-
warn "DEPRECATION WARNING: please use 'required_keys' instead of 'required_headers'" unless options[:verbose] == :quiet
|
|
177
|
-
if options[:required_keys].nil?
|
|
178
|
-
options[:required_keys] = options[:required_headers]
|
|
179
|
-
options[:required_headers] = nil
|
|
180
|
-
end
|
|
181
|
-
end
|
|
182
|
-
|
|
183
|
-
keys = options.keys
|
|
184
|
-
errors = []
|
|
185
|
-
errors << "invalid row_sep" if keys.include?(:row_sep) && !option_valid?(options[:row_sep])
|
|
186
|
-
errors << "invalid col_sep" if keys.include?(:col_sep) && !option_valid?(options[:col_sep])
|
|
187
|
-
errors << "invalid quote_char" if keys.include?(:quote_char) && !option_valid?(options[:quote_char])
|
|
188
|
-
if keys.include?(:quote_char) && options[:quote_char].is_a?(String) && options[:quote_char].bytesize > 1
|
|
189
|
-
errors << "invalid quote_char: must be a single byte (got #{options[:quote_char].inspect})"
|
|
190
|
-
end
|
|
191
|
-
unless %i[double_quotes backslash auto].include?(options[:quote_escaping])
|
|
192
|
-
errors << "invalid quote_escaping: must be :double_quotes, :backslash, or :auto"
|
|
193
|
-
end
|
|
194
|
-
unless %i[legacy standard].include?(options[:quote_boundary])
|
|
195
|
-
errors << "invalid quote_boundary: must be :legacy or :standard"
|
|
196
|
-
end
|
|
197
|
-
fsl = options[:field_size_limit]
|
|
198
|
-
unless fsl.nil? || (fsl.is_a?(Integer) && fsl > 0)
|
|
199
|
-
errors << "invalid field_size_limit: must be nil or a positive Integer (got #{fsl.inspect})"
|
|
200
|
-
end
|
|
201
|
-
obr = options[:on_bad_row]
|
|
202
|
-
unless %i[raise skip collect].include?(obr) || obr.respond_to?(:call)
|
|
203
|
-
errors << "invalid on_bad_row: must be :raise, :skip, :collect, or a callable"
|
|
204
|
-
end
|
|
205
|
-
%i[on_start on_chunk on_complete].each do |hook|
|
|
206
|
-
val = options[hook]
|
|
207
|
-
errors << "invalid #{hook}: must be nil or a callable" if !val.nil? && !val.respond_to?(:call)
|
|
208
|
-
end
|
|
209
|
-
unless %i[auto raise].include?(options[:missing_headers])
|
|
210
|
-
errors << "invalid missing_headers: must be :auto or :raise"
|
|
211
|
-
end
|
|
212
|
-
if options[:only_headers] && options[:except_headers]
|
|
213
|
-
errors << "cannot use both 'headers: { only: }' and 'headers: { except: }' at the same time"
|
|
214
|
-
end
|
|
215
|
-
raise SmarterCSV::ValidationError, errors.inspect if errors.any?
|
|
216
|
-
end
|
|
217
|
-
|
|
218
|
-
def option_valid?(str)
|
|
219
|
-
return true if str.is_a?(Symbol) && str == :auto
|
|
220
|
-
return true if str.is_a?(String) && !str.empty?
|
|
221
|
-
|
|
222
|
-
false
|
|
223
|
-
end
|
|
224
|
-
|
|
225
|
-
def pp(value)
|
|
226
|
-
defined?(AwesomePrint) ? value.awesome_inspect(index: nil) : value.inspect
|
|
227
|
-
end
|
|
228
|
-
end
|
|
229
|
-
end
|