smarter_csv 1.16.1 → 1.16.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -1
- data/CONTRIBUTORS.md +2 -1
- data/README.md +1 -1
- data/docs/basic_write_api.md +48 -0
- data/docs/options.md +2 -1
- data/docs/releases/1.16.0/changes.md +0 -2
- data/docs/ruby_csv_pitfalls.md +228 -197
- data/ext/smarter_csv/Makefile +14 -17
- data/lib/smarter_csv/hash_transformations.rb +1 -1
- data/lib/smarter_csv/header_transformations.rb +11 -9
- data/lib/smarter_csv/reader.rb +7 -3
- data/lib/smarter_csv/reader_options.rb +223 -0
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv/writer.rb +36 -26
- data/lib/smarter_csv/writer_options.rb +26 -0
- data/lib/smarter_csv.rb +2 -1
- metadata +4 -8
- data/ext/smarter_csv/smarter_csv.bundle +0 -0
- data/ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Info.plist +0 -20
- data/ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Resources/DWARF/smarter_csv.bundle +0 -0
- data/ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Resources/Relocations/aarch64/smarter_csv.bundle.yml +0 -5
- data/ext/smarter_csv/smarter_csv.o +0 -0
- data/lib/smarter_csv/options.rb +0 -229
data/ext/smarter_csv/Makefile
CHANGED
|
@@ -13,12 +13,12 @@ NULLCMD = :
|
|
|
13
13
|
#### Start of system configuration section. ####
|
|
14
14
|
|
|
15
15
|
srcdir = .
|
|
16
|
-
topdir = /Users/tilo/.rvm/rubies/ruby-3.
|
|
16
|
+
topdir = /Users/tilo/.rvm/rubies/ruby-3.2.2/include/ruby-3.2.0
|
|
17
17
|
hdrdir = $(topdir)
|
|
18
|
-
arch_hdrdir = /Users/tilo/.rvm/rubies/ruby-3.
|
|
18
|
+
arch_hdrdir = /Users/tilo/.rvm/rubies/ruby-3.2.2/include/ruby-3.2.0/arm64-darwin23
|
|
19
19
|
PATH_SEPARATOR = :
|
|
20
20
|
VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
|
|
21
|
-
prefix = $(DESTDIR)/Users/tilo/.rvm/rubies/ruby-3.
|
|
21
|
+
prefix = $(DESTDIR)/Users/tilo/.rvm/rubies/ruby-3.2.2
|
|
22
22
|
rubysitearchprefix = $(rubylibprefix)/$(sitearch)
|
|
23
23
|
rubyarchprefix = $(rubylibprefix)/$(arch)
|
|
24
24
|
rubylibprefix = $(libdir)/$(RUBY_BASE_NAME)
|
|
@@ -42,7 +42,6 @@ archincludedir = $(includedir)/$(arch)
|
|
|
42
42
|
sitearchlibdir = $(libdir)/$(sitearch)
|
|
43
43
|
archlibdir = $(libdir)/$(arch)
|
|
44
44
|
ridir = $(datarootdir)/$(RI_BASE_NAME)
|
|
45
|
-
modular_gc_dir = $(DESTDIR)
|
|
46
45
|
mandir = $(datarootdir)/man
|
|
47
46
|
localedir = $(datarootdir)/locale
|
|
48
47
|
libdir = $(exec_prefix)/lib
|
|
@@ -79,11 +78,11 @@ COUTFLAG = -o $(empty)
|
|
|
79
78
|
CSRCFLAG = $(empty)
|
|
80
79
|
|
|
81
80
|
RUBY_EXTCONF_H =
|
|
82
|
-
cflags =
|
|
81
|
+
cflags = -fdeclspec $(optflags) $(debugflags) $(warnflags)
|
|
83
82
|
cxxflags =
|
|
84
|
-
optflags = -O3
|
|
85
|
-
debugflags =
|
|
86
|
-
warnflags = -Wall -Wextra -Wextra-tokens -Wdeprecated-declarations -Wdivision-by-zero -Wdiv-by-zero -Wimplicit-function-declaration -Wimplicit-int -Wpointer-arith -Wshorten-64-to-32 -Wwrite-strings -Wold-style-definition -Wmissing-noreturn -Wno-cast-function-type -Wno-constant-logical-operand -Wno-long-long -Wno-missing-field-initializers -Wno-overlength-strings -Wno-parentheses-equality -Wno-self-assign -Wno-tautological-compare -Wno-unused-parameter -Wno-unused-value -Wunused-variable -
|
|
83
|
+
optflags = -O3
|
|
84
|
+
debugflags = -ggdb3
|
|
85
|
+
warnflags = -Wall -Wextra -Wextra-tokens -Wdeprecated-declarations -Wdivision-by-zero -Wdiv-by-zero -Wimplicit-function-declaration -Wimplicit-int -Wmisleading-indentation -Wpointer-arith -Wshorten-64-to-32 -Wwrite-strings -Wold-style-definition -Wmissing-noreturn -Wno-cast-function-type -Wno-constant-logical-operand -Wno-long-long -Wno-missing-field-initializers -Wno-overlength-strings -Wno-parentheses-equality -Wno-self-assign -Wno-tautological-compare -Wno-unused-parameter -Wno-unused-value -Wunused-variable -Wundef
|
|
87
86
|
cppflags =
|
|
88
87
|
CCDLFLAGS = -fno-common
|
|
89
88
|
CFLAGS = $(CCDLFLAGS) -O3 -I/opt/homebrew/opt/libyaml/include -I/opt/homebrew/opt/libksba/include -I/opt/homebrew/opt/readline/include -I/opt/homebrew/opt/zlib/include -I/opt/homebrew/opt/openssl@1.1/include $(cflags) -fno-common -pipe $(ARCH_FLAG)
|
|
@@ -92,26 +91,24 @@ DEFS =
|
|
|
92
91
|
CPPFLAGS = -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -D_DARWIN_UNLIMITED_SELECT -D_REENTRANT $(DEFS) $(cppflags)
|
|
93
92
|
CXXFLAGS = $(CCDLFLAGS) -fdeclspec $(ARCH_FLAG)
|
|
94
93
|
ldflags = -L. -L/opt/homebrew/opt/libyaml/lib -L/opt/homebrew/opt/libksba/lib -L/opt/homebrew/opt/readline/lib -L/opt/homebrew/opt/zlib/lib -L/opt/homebrew/opt/openssl@1.1/lib -fstack-protector-strong
|
|
95
|
-
dldflags = -L/opt/homebrew/opt/libyaml/lib -L/opt/homebrew/opt/libksba/lib -L/opt/homebrew/opt/readline/lib -L/opt/homebrew/opt/zlib/lib -L/opt/homebrew/opt/openssl@1.1/lib -Wl,-undefined,dynamic_lookup
|
|
96
|
-
ARCH_FLAG =
|
|
94
|
+
dldflags = -L/opt/homebrew/opt/libyaml/lib -L/opt/homebrew/opt/libksba/lib -L/opt/homebrew/opt/readline/lib -L/opt/homebrew/opt/zlib/lib -L/opt/homebrew/opt/openssl@1.1/lib -Wl,-undefined,dynamic_lookup $(LIBRUBYARG_SHARED)
|
|
95
|
+
ARCH_FLAG =
|
|
97
96
|
DLDFLAGS = $(ldflags) $(dldflags) $(ARCH_FLAG)
|
|
98
97
|
LDSHARED = $(CC) -dynamic -bundle
|
|
99
98
|
LDSHAREDXX = $(CXX) -dynamic -bundle
|
|
100
|
-
POSTLINK = dsymutil $@ 2>/dev/null; { test -z '$(RUBY_CODESIGN)' || codesign -s '$(RUBY_CODESIGN)' $@; }
|
|
101
99
|
AR = ar
|
|
102
|
-
LD = ld
|
|
103
100
|
EXEEXT =
|
|
104
101
|
|
|
105
102
|
RUBY_INSTALL_NAME = $(RUBY_BASE_NAME)
|
|
106
|
-
RUBY_SO_NAME = ruby.3.
|
|
103
|
+
RUBY_SO_NAME = ruby.3.2
|
|
107
104
|
RUBYW_INSTALL_NAME =
|
|
108
105
|
RUBY_VERSION_NAME = $(RUBY_BASE_NAME)-$(ruby_version)
|
|
109
106
|
RUBYW_BASE_NAME = rubyw
|
|
110
107
|
RUBY_BASE_NAME = ruby
|
|
111
108
|
|
|
112
|
-
arch = arm64-
|
|
109
|
+
arch = arm64-darwin23
|
|
113
110
|
sitearch = $(arch)
|
|
114
|
-
ruby_version = 3.
|
|
111
|
+
ruby_version = 3.2.0
|
|
115
112
|
ruby = $(bindir)/$(RUBY_BASE_NAME)
|
|
116
113
|
RUBY = $(ruby)
|
|
117
114
|
BUILTRUBY = $(bindir)/$(RUBY_BASE_NAME)
|
|
@@ -131,7 +128,7 @@ TOUCH = exit >
|
|
|
131
128
|
|
|
132
129
|
preload =
|
|
133
130
|
libpath = . $(libdir)
|
|
134
|
-
LIBPATH =
|
|
131
|
+
LIBPATH = -L. -L$(libdir)
|
|
135
132
|
DEFFILE =
|
|
136
133
|
|
|
137
134
|
CLEANFILES = mkmf.log
|
|
@@ -164,7 +161,7 @@ HDRDIR = $(sitehdrdir)$(target_prefix)
|
|
|
164
161
|
ARCHHDRDIR = $(sitearchhdrdir)$(target_prefix)
|
|
165
162
|
TARGET_SO_DIR =
|
|
166
163
|
TARGET_SO = $(TARGET_SO_DIR)$(DLLIB)
|
|
167
|
-
CLEANLIBS = $(TARGET_SO) $(TARGET_SO
|
|
164
|
+
CLEANLIBS = $(TARGET_SO) $(TARGET_SO).dSYM
|
|
168
165
|
CLEANOBJS = $(OBJS) *.bak
|
|
169
166
|
TARGET_SO_DIR_TIMESTAMP = $(TIMESTAMP_DIR)/.sitearchdir.-.smarter_csv.time
|
|
170
167
|
|
|
@@ -62,7 +62,7 @@ module SmarterCSV
|
|
|
62
62
|
# Apply value converters
|
|
63
63
|
if value_converters
|
|
64
64
|
converter = value_converters[k]
|
|
65
|
-
hash[k] = converter.convert(hash[k]) if converter
|
|
65
|
+
hash[k] = converter.respond_to?(:convert) ? converter.convert(hash[k]) : converter.call(hash[k]) if converter
|
|
66
66
|
end
|
|
67
67
|
end
|
|
68
68
|
|
|
@@ -27,19 +27,21 @@ module SmarterCSV
|
|
|
27
27
|
|
|
28
28
|
def disambiguate_headers(headers, options)
|
|
29
29
|
counts = Hash.new(0)
|
|
30
|
-
empty_count = 0
|
|
31
30
|
prefix = options[:missing_header_prefix] || 'column_'
|
|
32
31
|
# Pre-collect non-blank header names so auto-generated names can avoid collisions.
|
|
33
32
|
used = headers.reject { |h| blank?(h) }
|
|
34
|
-
headers.map do |header|
|
|
33
|
+
headers.each_with_index.map do |header, idx|
|
|
35
34
|
if blank?(header)
|
|
36
|
-
#
|
|
37
|
-
#
|
|
38
|
-
#
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
35
|
+
# Use absolute 1-based column position, consistent with how extra data columns
|
|
36
|
+
# beyond the header count are named. If the positional name collides with an
|
|
37
|
+
# existing header, append underscores until a free name is found — this avoids
|
|
38
|
+
# stealing the positional name from any subsequent blank header.
|
|
39
|
+
candidate = "#{prefix}#{idx + 1}"
|
|
40
|
+
suffix = ''
|
|
41
|
+
while used.include?(candidate)
|
|
42
|
+
suffix += '_'
|
|
43
|
+
candidate = "#{prefix}#{idx + 1}#{suffix}"
|
|
44
|
+
end
|
|
43
45
|
used << candidate
|
|
44
46
|
candidate
|
|
45
47
|
else
|
data/lib/smarter_csv/reader.rb
CHANGED
|
@@ -10,7 +10,7 @@ module SmarterCSV
|
|
|
10
10
|
# A warning is emitted to STDERR so users know to configure it explicitly.
|
|
11
11
|
DEFAULT_CHUNK_SIZE = 100
|
|
12
12
|
|
|
13
|
-
include ::SmarterCSV::Options
|
|
13
|
+
include ::SmarterCSV::Reader::Options
|
|
14
14
|
include ::SmarterCSV::FileIO
|
|
15
15
|
include ::SmarterCSV::AutoDetection
|
|
16
16
|
include ::SmarterCSV::Headers
|
|
@@ -24,6 +24,10 @@ module SmarterCSV
|
|
|
24
24
|
attr_reader :enforce_utf8, :has_rails, :has_acceleration
|
|
25
25
|
attr_reader :errors, :warnings, :headers, :raw_header, :result
|
|
26
26
|
|
|
27
|
+
def self.default_options
|
|
28
|
+
Options::DEFAULT_OPTIONS
|
|
29
|
+
end
|
|
30
|
+
|
|
27
31
|
# rubocop:disable Naming/MethodName
|
|
28
32
|
def headerA
|
|
29
33
|
warn "Deprecarion Warning: 'headerA' will be removed in future versions. Use 'headders'"
|
|
@@ -357,7 +361,7 @@ module SmarterCSV
|
|
|
357
361
|
|
|
358
362
|
if options[:value_converters]
|
|
359
363
|
options[:value_converters].each do |key, converter|
|
|
360
|
-
hash[key] = converter.convert(hash[key]) if hash.key?(key)
|
|
364
|
+
hash[key] = converter.respond_to?(:convert) ? converter.convert(hash[key]) : converter.call(hash[key]) if hash.key?(key)
|
|
361
365
|
end
|
|
362
366
|
end
|
|
363
367
|
else
|
|
@@ -755,7 +759,7 @@ module SmarterCSV
|
|
|
755
759
|
|
|
756
760
|
if options[:value_converters]
|
|
757
761
|
options[:value_converters].each do |key, converter|
|
|
758
|
-
hash[key] = converter.convert(hash[key]) if hash.key?(key)
|
|
762
|
+
hash[key] = converter.respond_to?(:convert) ? converter.convert(hash[key]) : converter.call(hash[key]) if hash.key?(key)
|
|
759
763
|
end
|
|
760
764
|
end
|
|
761
765
|
else
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SmarterCSV
|
|
4
|
+
class Reader
|
|
5
|
+
module Options
|
|
6
|
+
DEFAULT_OPTIONS = {
|
|
7
|
+
acceleration: true, # if user wants to use accelleration or not
|
|
8
|
+
auto_row_sep_chars: 500,
|
|
9
|
+
bad_row_limit: nil,
|
|
10
|
+
chunk_size: nil,
|
|
11
|
+
col_sep: :auto, # was: ',',
|
|
12
|
+
collect_raw_lines: true,
|
|
13
|
+
comment_regexp: nil, # was: /\A#/,
|
|
14
|
+
convert_values_to_numeric: true,
|
|
15
|
+
downcase_header: true,
|
|
16
|
+
duplicate_header_suffix: '', # was: nil,
|
|
17
|
+
field_size_limit: nil, # Integer (bytes) or nil for no limit. Raises FieldSizeLimitExceeded if any
|
|
18
|
+
# extracted field exceeds this size. Prevents DoS from runaway quoted
|
|
19
|
+
# fields (unbounded multiline stitching) or huge inline payloads.
|
|
20
|
+
file_encoding: 'utf-8',
|
|
21
|
+
force_utf8: false,
|
|
22
|
+
headers_in_file: true,
|
|
23
|
+
invalid_byte_sequence: '',
|
|
24
|
+
keep_original_headers: false,
|
|
25
|
+
key_mapping: nil,
|
|
26
|
+
strict: false, # DEPRECATED -> use missing_headers
|
|
27
|
+
missing_headers: :auto, # :auto (auto-generate names for extra cols) or :raise (raise HeaderSizeMismatch)
|
|
28
|
+
missing_header_prefix: 'column_',
|
|
29
|
+
nil_values_matching: nil, # regex: set matching values to nil (key kept); pairs with remove_empty_values
|
|
30
|
+
on_bad_row: :raise,
|
|
31
|
+
on_chunk: nil, # callable: fired after each chunk is parsed, before yielding to the block
|
|
32
|
+
on_complete: nil, # callable: fired once after the entire file is processed
|
|
33
|
+
on_start: nil, # callable: fired once before the first row is parsed
|
|
34
|
+
quote_boundary: :standard, # :standard (only at field boundary 👍) or :legacy (any quote toggles state 👎)
|
|
35
|
+
quote_char: '"',
|
|
36
|
+
quote_escaping: :auto,
|
|
37
|
+
remove_empty_hashes: true,
|
|
38
|
+
remove_empty_values: true,
|
|
39
|
+
remove_unmapped_keys: false,
|
|
40
|
+
remove_values_matching: nil, # DEPRECATED: use nil_values_matching instead
|
|
41
|
+
remove_zero_values: false,
|
|
42
|
+
required_headers: nil,
|
|
43
|
+
required_keys: nil,
|
|
44
|
+
row_sep: :auto, # was: $/,
|
|
45
|
+
silence_missing_keys: false,
|
|
46
|
+
skip_lines: nil,
|
|
47
|
+
strings_as_keys: false,
|
|
48
|
+
strip_chars_from_headers: nil,
|
|
49
|
+
strip_whitespace: true,
|
|
50
|
+
user_provided_headers: nil,
|
|
51
|
+
value_converters: nil,
|
|
52
|
+
verbose: :normal, # nil/:normal (default), :quiet (suppress warnings), :debug (print diagnostics); true/false are deprecated
|
|
53
|
+
with_line_numbers: false,
|
|
54
|
+
}.freeze
|
|
55
|
+
|
|
56
|
+
# NOTE: this is not called when "parse" methods are tested by themselves
|
|
57
|
+
def process_options(given_options = {})
|
|
58
|
+
# Debug output before merge — check raw verbose value (true or :debug)
|
|
59
|
+
$stderr.puts "User provided options:\n#{pp(given_options)}\n" if [true, :debug].include?(given_options[:verbose])
|
|
60
|
+
|
|
61
|
+
# Special case for :user_provided_headers:
|
|
62
|
+
#
|
|
63
|
+
# If we would use the default `headers_in_file: true`, and `:user_provided_headers` are given,
|
|
64
|
+
# we could lose the first data row
|
|
65
|
+
#
|
|
66
|
+
# We now err on the side of treating an actual header as data, rather than losing a data row.
|
|
67
|
+
#
|
|
68
|
+
if given_options[:user_provided_headers] && !given_options.keys.include?(:headers_in_file)
|
|
69
|
+
given_options[:headers_in_file] = false
|
|
70
|
+
warn "WARNING: setting `headers_in_file: false` as a precaution to not lose the first row. Set explicitly to `true` if you have headers." unless given_options[:verbose] == :quiet
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
@options = DEFAULT_OPTIONS.dup.merge!(given_options)
|
|
74
|
+
|
|
75
|
+
# Normalize verbose to a symbol — done once here, stored back into @options.
|
|
76
|
+
# All subsequent checks are free symbol comparisons; no re-evaluation needed.
|
|
77
|
+
# :quiet — suppress all warnings and notices (good for production)
|
|
78
|
+
# :normal — show behavioral warnings (default; helpful for new users)
|
|
79
|
+
# :debug — :normal + print computed options and per-row diagnostics
|
|
80
|
+
# nil is silently normalized to :normal; true/false are deprecated.
|
|
81
|
+
case @options[:verbose]
|
|
82
|
+
when :quiet, :normal, :debug
|
|
83
|
+
# keep as is
|
|
84
|
+
when nil
|
|
85
|
+
@options[:verbose] = :normal
|
|
86
|
+
when false
|
|
87
|
+
warn "DEPRECATION WARNING: verbose: false is deprecated. Use verbose: :normal instead (or omit — it is the default)."
|
|
88
|
+
@options[:verbose] = :normal
|
|
89
|
+
when true
|
|
90
|
+
warn "DEPRECATION WARNING: verbose: true is deprecated. Use verbose: :debug instead."
|
|
91
|
+
@options[:verbose] = :debug
|
|
92
|
+
else
|
|
93
|
+
warn "WARNING: unknown verbose value #{@options[:verbose].inspect}, defaulting to :normal. Valid values: :quiet, :normal, :debug."
|
|
94
|
+
@options[:verbose] = :normal
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# fix invalid input
|
|
98
|
+
@options[:invalid_byte_sequence] ||= ''
|
|
99
|
+
|
|
100
|
+
# Normalize headers: { only: [...] } / { except: [...] } to internal option names.
|
|
101
|
+
# The public API is headers: { only: } or headers: { except: }.
|
|
102
|
+
# Internally we use only_headers: / except_headers: (what the C extension reads).
|
|
103
|
+
if (hdr = @options.delete(:headers)).is_a?(Hash)
|
|
104
|
+
@options[:only_headers] = hdr[:only] if hdr.key?(:only)
|
|
105
|
+
@options[:except_headers] = hdr[:except] if hdr.key?(:except)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Deprecation: direct use of only_headers: / except_headers: (use headers: { only: } instead)
|
|
109
|
+
if given_options.key?(:only_headers) && !given_options.key?(:headers)
|
|
110
|
+
warn "DEPRECATION WARNING: 'only_headers:' is deprecated. Use 'headers: { only: [...] }' instead." unless @options[:verbose] == :quiet
|
|
111
|
+
end
|
|
112
|
+
if given_options.key?(:except_headers) && !given_options.key?(:headers)
|
|
113
|
+
warn "DEPRECATION WARNING: 'except_headers:' is deprecated. Use 'headers: { except: [...] }' instead." unless @options[:verbose] == :quiet
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Normalize only_headers/except_headers to arrays of symbols (internal names, read by C extension)
|
|
117
|
+
if @options[:only_headers]
|
|
118
|
+
values = Array(@options[:only_headers])
|
|
119
|
+
bad = values.reject { |v| v.is_a?(Symbol) || v.is_a?(String) }
|
|
120
|
+
raise SmarterCSV::ValidationError, "headers: { only: } elements must be String or Symbol, got: #{bad.map(&:class).uniq.inspect}" if bad.any?
|
|
121
|
+
@options[:only_headers] = values.map(&:to_sym)
|
|
122
|
+
end
|
|
123
|
+
if @options[:except_headers]
|
|
124
|
+
values = Array(@options[:except_headers])
|
|
125
|
+
bad = values.reject { |v| v.is_a?(Symbol) || v.is_a?(String) }
|
|
126
|
+
raise SmarterCSV::ValidationError, "headers: { except: } elements must be String or Symbol, got: #{bad.map(&:class).uniq.inspect}" if bad.any?
|
|
127
|
+
@options[:except_headers] = values.map(&:to_sym)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Deprecation: remove_values_matching → nil_values_matching
|
|
131
|
+
# Old behavior: removes the key-value pair entirely.
|
|
132
|
+
# New behavior: nil_values_matching sets the value to nil (key kept);
|
|
133
|
+
# combined with the default remove_empty_values: true the net effect is identical.
|
|
134
|
+
# With remove_empty_values: false, the key is retained with a nil value.
|
|
135
|
+
if given_options.key?(:remove_values_matching)
|
|
136
|
+
unless @options[:verbose] == :quiet
|
|
137
|
+
warn "DEPRECATION WARNING: 'remove_values_matching' is deprecated. " \
|
|
138
|
+
"Use 'nil_values_matching' instead. With the default 'remove_empty_values: true' " \
|
|
139
|
+
"the net behavior is identical. With 'remove_empty_values: false', matching values " \
|
|
140
|
+
"are set to nil but the key is retained in the result hash."
|
|
141
|
+
end
|
|
142
|
+
@options[:nil_values_matching] ||= @options[:remove_values_matching]
|
|
143
|
+
@options[:remove_values_matching] = nil # clear to prevent double-processing
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Translate deprecated :strict option to :missing_headers
|
|
147
|
+
if given_options.key?(:strict)
|
|
148
|
+
unless @options[:verbose] == :quiet
|
|
149
|
+
warn "DEPRECATION WARNING: 'strict' option is deprecated and will be removed in a future version. " \
|
|
150
|
+
"Use 'missing_headers: :raise' instead of 'strict: true', or 'missing_headers: :auto' instead of 'strict: false'."
|
|
151
|
+
end
|
|
152
|
+
@options[:missing_headers] = @options[:strict] ? :raise : :auto unless given_options.key?(:missing_headers)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Keep :strict synchronized with :missing_headers (C extension reads :strict directly)
|
|
156
|
+
@options[:strict] = (@options[:missing_headers] == :raise)
|
|
157
|
+
|
|
158
|
+
$stderr.puts "Computed options:\n#{pp(@options)}\n" if @options[:verbose] == :debug
|
|
159
|
+
|
|
160
|
+
validate_options!(@options)
|
|
161
|
+
@options
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
private
|
|
165
|
+
|
|
166
|
+
def validate_options!(options)
|
|
167
|
+
# deprecate required_headers
|
|
168
|
+
unless options[:required_headers].nil?
|
|
169
|
+
warn "DEPRECATION WARNING: please use 'required_keys' instead of 'required_headers'" unless options[:verbose] == :quiet
|
|
170
|
+
if options[:required_keys].nil?
|
|
171
|
+
options[:required_keys] = options[:required_headers]
|
|
172
|
+
options[:required_headers] = nil
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
keys = options.keys
|
|
177
|
+
errors = []
|
|
178
|
+
errors << "invalid row_sep" if keys.include?(:row_sep) && !option_valid?(options[:row_sep])
|
|
179
|
+
errors << "invalid col_sep" if keys.include?(:col_sep) && !option_valid?(options[:col_sep])
|
|
180
|
+
errors << "invalid quote_char" if keys.include?(:quote_char) && !option_valid?(options[:quote_char])
|
|
181
|
+
if keys.include?(:quote_char) && options[:quote_char].is_a?(String) && options[:quote_char].bytesize > 1
|
|
182
|
+
errors << "invalid quote_char: must be a single byte (got #{options[:quote_char].inspect})"
|
|
183
|
+
end
|
|
184
|
+
unless %i[double_quotes backslash auto].include?(options[:quote_escaping])
|
|
185
|
+
errors << "invalid quote_escaping: must be :double_quotes, :backslash, or :auto"
|
|
186
|
+
end
|
|
187
|
+
unless %i[legacy standard].include?(options[:quote_boundary])
|
|
188
|
+
errors << "invalid quote_boundary: must be :legacy or :standard"
|
|
189
|
+
end
|
|
190
|
+
fsl = options[:field_size_limit]
|
|
191
|
+
unless fsl.nil? || (fsl.is_a?(Integer) && fsl > 0)
|
|
192
|
+
errors << "invalid field_size_limit: must be nil or a positive Integer (got #{fsl.inspect})"
|
|
193
|
+
end
|
|
194
|
+
obr = options[:on_bad_row]
|
|
195
|
+
unless %i[raise skip collect].include?(obr) || obr.respond_to?(:call)
|
|
196
|
+
errors << "invalid on_bad_row: must be :raise, :skip, :collect, or a callable"
|
|
197
|
+
end
|
|
198
|
+
%i[on_start on_chunk on_complete].each do |hook|
|
|
199
|
+
val = options[hook]
|
|
200
|
+
errors << "invalid #{hook}: must be nil or a callable" if !val.nil? && !val.respond_to?(:call)
|
|
201
|
+
end
|
|
202
|
+
unless %i[auto raise].include?(options[:missing_headers])
|
|
203
|
+
errors << "invalid missing_headers: must be :auto or :raise"
|
|
204
|
+
end
|
|
205
|
+
if options[:only_headers] && options[:except_headers]
|
|
206
|
+
errors << "cannot use both 'headers: { only: }' and 'headers: { except: }' at the same time"
|
|
207
|
+
end
|
|
208
|
+
raise SmarterCSV::ValidationError, errors.inspect if errors.any?
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def option_valid?(str)
|
|
212
|
+
return true if str.is_a?(Symbol) && str == :auto
|
|
213
|
+
return true if str.is_a?(String) && !str.empty?
|
|
214
|
+
|
|
215
|
+
false
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def pp(value)
|
|
219
|
+
defined?(AwesomePrint) ? value.awesome_inspect(index: nil) : value.inspect
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
data/lib/smarter_csv/version.rb
CHANGED
data/lib/smarter_csv/writer.rb
CHANGED
|
@@ -25,6 +25,8 @@ module SmarterCSV
|
|
|
25
25
|
#
|
|
26
26
|
# The Writer automatically quotes fields containing the col_sep, row_sep, or the quote_char.
|
|
27
27
|
#
|
|
28
|
+
# See SmarterCSV::Writer::Options::DEFAULT_OPTIONS for all options and their defaults.
|
|
29
|
+
#
|
|
28
30
|
# Options:
|
|
29
31
|
# col_sep : defaults to , but can be set to any other character
|
|
30
32
|
# row_sep : defaults to LF \n , but can be set to \r\n or \r or anything else
|
|
@@ -42,7 +44,9 @@ module SmarterCSV
|
|
|
42
44
|
# write_empty_value: string written in place of empty-string field values (default: '')
|
|
43
45
|
# write_bom: when true, prepends a UTF-8 BOM (\xEF\xBB\xBF) to the output (default: false)
|
|
44
46
|
# Useful for Excel compatibility with non-ASCII content.
|
|
45
|
-
|
|
47
|
+
# write_headers: when false, suppresses the header line (default: true). Useful when appending to
|
|
48
|
+
# an existing CSV file opened in 'a' mode — the caller controls the file mode.
|
|
49
|
+
#
|
|
46
50
|
# IMPORTANT NOTES:
|
|
47
51
|
# * Data hashes could contain strings or symbols as keys.
|
|
48
52
|
# Make sure to use the correct form when specifying headers manually,
|
|
@@ -51,36 +55,42 @@ module SmarterCSV
|
|
|
51
55
|
attr_reader :options, :row_sep, :col_sep, :quote_char, :force_quotes, :discover_headers, :headers, :map_headers, :output_file
|
|
52
56
|
|
|
53
57
|
class Writer
|
|
54
|
-
|
|
55
|
-
|
|
58
|
+
include ::SmarterCSV::Writer::Options
|
|
59
|
+
|
|
60
|
+
def self.default_options
|
|
61
|
+
Options::DEFAULT_OPTIONS
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def initialize(file_path_or_io, given_options = {})
|
|
65
|
+
opts = Options::DEFAULT_OPTIONS.merge(given_options)
|
|
66
|
+
@options = opts
|
|
56
67
|
|
|
57
|
-
@row_sep =
|
|
58
|
-
@col_sep =
|
|
59
|
-
@quote_char =
|
|
68
|
+
@row_sep = opts[:row_sep]
|
|
69
|
+
@col_sep = opts[:col_sep]
|
|
70
|
+
@quote_char = opts[:quote_char]
|
|
60
71
|
@escaped_quote_char = @quote_char * 2
|
|
61
|
-
@force_quotes =
|
|
62
|
-
@quote_headers =
|
|
63
|
-
@disable_auto_quoting =
|
|
64
|
-
@value_converters =
|
|
65
|
-
@encoding =
|
|
66
|
-
@write_nil_value =
|
|
67
|
-
@write_empty_value =
|
|
68
|
-
@write_bom =
|
|
72
|
+
@force_quotes = opts[:force_quotes] == true
|
|
73
|
+
@quote_headers = opts[:quote_headers] == true
|
|
74
|
+
@disable_auto_quoting = opts[:disable_auto_quoting] == true
|
|
75
|
+
@value_converters = opts[:value_converters] || {}
|
|
76
|
+
@encoding = opts[:encoding]
|
|
77
|
+
@write_nil_value = opts[:write_nil_value]
|
|
78
|
+
@write_empty_value = opts[:write_empty_value]
|
|
79
|
+
@write_bom = opts[:write_bom] == true
|
|
80
|
+
@write_headers = opts[:write_headers] == true
|
|
69
81
|
@map_all_keys = @value_converters.has_key?(:_all)
|
|
70
82
|
@mapped_keys = Set.new(@value_converters.keys - [:_all])
|
|
71
|
-
@header_converter =
|
|
83
|
+
@header_converter = opts[:header_converter]
|
|
72
84
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
@discover_headers = options[:discover_headers] == true
|
|
85
|
+
if given_options.has_key?(:discover_headers)
|
|
86
|
+
@discover_headers = given_options[:discover_headers] == true
|
|
76
87
|
else
|
|
77
|
-
@discover_headers = !(
|
|
88
|
+
@discover_headers = !(given_options.has_key?(:map_headers) || given_options.has_key?(:headers))
|
|
78
89
|
end
|
|
79
90
|
|
|
80
|
-
@headers = []
|
|
81
|
-
@headers =
|
|
82
|
-
@
|
|
83
|
-
@map_headers = options[:map_headers] || {}
|
|
91
|
+
@headers = opts[:headers].dup
|
|
92
|
+
@headers = given_options[:map_headers].keys if given_options.has_key?(:map_headers) && !given_options.has_key?(:headers)
|
|
93
|
+
@map_headers = opts[:map_headers]
|
|
84
94
|
|
|
85
95
|
# Accept an IO-like object (StringIO, IO, etc.) or any path-like object (String, Pathname, etc.)
|
|
86
96
|
if file_path_or_io.respond_to?(:write)
|
|
@@ -110,7 +120,7 @@ module SmarterCSV
|
|
|
110
120
|
# and stream data rows directly to @output_file, bypassing the temp file entirely.
|
|
111
121
|
@temp_file = nil
|
|
112
122
|
@output_file.write("\xEF\xBB\xBF") if @write_bom
|
|
113
|
-
write_header_line
|
|
123
|
+
write_header_line if @write_headers
|
|
114
124
|
else
|
|
115
125
|
@temp_file = Tempfile.new('smarter_csv')
|
|
116
126
|
end
|
|
@@ -134,7 +144,7 @@ module SmarterCSV
|
|
|
134
144
|
# Header-discovery mode: headers were accumulated while writing rows;
|
|
135
145
|
# now prepend the header line and copy the buffered rows to the output.
|
|
136
146
|
@output_file.write("\xEF\xBB\xBF") if @write_bom
|
|
137
|
-
write_header_line
|
|
147
|
+
write_header_line if @write_headers
|
|
138
148
|
@temp_file.rewind
|
|
139
149
|
@output_file.write(@temp_file.read)
|
|
140
150
|
@temp_file.close!
|
|
@@ -149,7 +159,7 @@ module SmarterCSV
|
|
|
149
159
|
|
|
150
160
|
def write_header_line
|
|
151
161
|
mapped_headers = @headers.map { |header| @map_headers[header] || header }
|
|
152
|
-
mapped_headers =
|
|
162
|
+
mapped_headers = mapped_headers.map { |header| @header_converter.call(header) } if @header_converter
|
|
153
163
|
force_quotes = @quote_headers || @force_quotes
|
|
154
164
|
mapped_headers = mapped_headers.map { |x| escape_csv_field(x, force_quotes) }
|
|
155
165
|
@output_file.write(mapped_headers.join(@col_sep) + @row_sep) unless mapped_headers.empty?
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SmarterCSV
|
|
4
|
+
class Writer
|
|
5
|
+
module Options
|
|
6
|
+
DEFAULT_OPTIONS = {
|
|
7
|
+
col_sep: ',',
|
|
8
|
+
row_sep: $/,
|
|
9
|
+
quote_char: '"',
|
|
10
|
+
force_quotes: false,
|
|
11
|
+
quote_headers: false,
|
|
12
|
+
disable_auto_quoting: false,
|
|
13
|
+
value_converters: {},
|
|
14
|
+
encoding: nil,
|
|
15
|
+
write_nil_value: '',
|
|
16
|
+
write_empty_value: '',
|
|
17
|
+
write_bom: false,
|
|
18
|
+
write_headers: true,
|
|
19
|
+
header_converter: nil,
|
|
20
|
+
discover_headers: true,
|
|
21
|
+
headers: [],
|
|
22
|
+
map_headers: {},
|
|
23
|
+
}.freeze
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
data/lib/smarter_csv.rb
CHANGED
|
@@ -5,7 +5,8 @@ require "smarter_csv/version"
|
|
|
5
5
|
require "smarter_csv/errors"
|
|
6
6
|
|
|
7
7
|
require "smarter_csv/file_io"
|
|
8
|
-
require "smarter_csv/
|
|
8
|
+
require "smarter_csv/reader_options"
|
|
9
|
+
require "smarter_csv/writer_options"
|
|
9
10
|
require "smarter_csv/auto_detection"
|
|
10
11
|
require 'smarter_csv/header_transformations'
|
|
11
12
|
require 'smarter_csv/header_validations'
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: smarter_csv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.16.
|
|
4
|
+
version: 1.16.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Tilo Sloboda
|
|
8
8
|
bindir: bin
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date: 2026-
|
|
10
|
+
date: 2026-04-14 00:00:00.000000000 Z
|
|
11
11
|
dependencies: []
|
|
12
12
|
description: |
|
|
13
13
|
SmarterCSV is a high-performance CSV reader and writer for Ruby focused on
|
|
@@ -64,12 +64,7 @@ files:
|
|
|
64
64
|
- docs/value_converters.md
|
|
65
65
|
- ext/smarter_csv/Makefile
|
|
66
66
|
- ext/smarter_csv/extconf.rb
|
|
67
|
-
- ext/smarter_csv/smarter_csv.bundle
|
|
68
|
-
- ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Info.plist
|
|
69
|
-
- ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Resources/DWARF/smarter_csv.bundle
|
|
70
|
-
- ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Resources/Relocations/aarch64/smarter_csv.bundle.yml
|
|
71
67
|
- ext/smarter_csv/smarter_csv.c
|
|
72
|
-
- ext/smarter_csv/smarter_csv.o
|
|
73
68
|
- images/SmarterCSV_1.16.0_vs_RubyCSV_3.3.5_speedup.png
|
|
74
69
|
- images/SmarterCSV_1.16.0_vs_RubyCSV_3.3.5_speedup.svg
|
|
75
70
|
- images/SmarterCSV_1.16.0_vs_previous_C-speedup.png
|
|
@@ -84,11 +79,12 @@ files:
|
|
|
84
79
|
- lib/smarter_csv/header_transformations.rb
|
|
85
80
|
- lib/smarter_csv/header_validations.rb
|
|
86
81
|
- lib/smarter_csv/headers.rb
|
|
87
|
-
- lib/smarter_csv/options.rb
|
|
88
82
|
- lib/smarter_csv/parser.rb
|
|
89
83
|
- lib/smarter_csv/reader.rb
|
|
84
|
+
- lib/smarter_csv/reader_options.rb
|
|
90
85
|
- lib/smarter_csv/version.rb
|
|
91
86
|
- lib/smarter_csv/writer.rb
|
|
87
|
+
- lib/smarter_csv/writer_options.rb
|
|
92
88
|
- smarter_csv.gemspec
|
|
93
89
|
homepage: https://github.com/tilo/smarter_csv
|
|
94
90
|
licenses:
|
|
Binary file
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
-
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
3
|
-
<plist version="1.0">
|
|
4
|
-
<dict>
|
|
5
|
-
<key>CFBundleDevelopmentRegion</key>
|
|
6
|
-
<string>English</string>
|
|
7
|
-
<key>CFBundleIdentifier</key>
|
|
8
|
-
<string>com.apple.xcode.dsym.smarter_csv.bundle</string>
|
|
9
|
-
<key>CFBundleInfoDictionaryVersion</key>
|
|
10
|
-
<string>6.0</string>
|
|
11
|
-
<key>CFBundlePackageType</key>
|
|
12
|
-
<string>dSYM</string>
|
|
13
|
-
<key>CFBundleSignature</key>
|
|
14
|
-
<string>????</string>
|
|
15
|
-
<key>CFBundleShortVersionString</key>
|
|
16
|
-
<string>1.0</string>
|
|
17
|
-
<key>CFBundleVersion</key>
|
|
18
|
-
<string>1</string>
|
|
19
|
-
</dict>
|
|
20
|
-
</plist>
|
|
Binary file
|
|
Binary file
|