smarter_csv 1.16.2 → 1.16.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/docs/basic_write_api.md +48 -0
- data/docs/options.md +1 -0
- data/ext/smarter_csv/Makefile +14 -17
- data/lib/smarter_csv/reader.rb +5 -1
- data/lib/smarter_csv/reader_options.rb +223 -0
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv/writer.rb +35 -25
- data/lib/smarter_csv/writer_options.rb +26 -0
- data/lib/smarter_csv.rb +2 -1
- metadata +4 -8
- data/ext/smarter_csv/smarter_csv.bundle +0 -0
- data/ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Info.plist +0 -20
- data/ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Resources/DWARF/smarter_csv.bundle +0 -0
- data/ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Resources/Relocations/aarch64/smarter_csv.bundle.yml +0 -5
- data/ext/smarter_csv/smarter_csv.o +0 -0
- data/lib/smarter_csv/options.rb +0 -229
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b40fb76fef88599d7449691806af6f76a131ffcd41e2ee145d5c87f3554a2006
|
|
4
|
+
data.tar.gz: edb27057973c0a88524579f450dc8ed3ffadbd983de7155a84ff159495c31233
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5b9e2a17ae14a5d7b3dfddd854148f38c8892d67544b51fecba888aa11c540096057460880564b89251194fea513c3e2807c500d93f896826db885daf64271fe
|
|
7
|
+
data.tar.gz: 90b5aa10c6bc36cbc97662de879deaf61c2417a61e4cb5171924aac991a37c762588ff6413a924bfd083ca10c1fa1739049561f3c6e9de6ff196bdba85c7878f
|
data/CHANGELOG.md
CHANGED
|
@@ -1,6 +1,20 @@
|
|
|
1
1
|
|
|
2
2
|
# SmarterCSV 1.x Change Log
|
|
3
3
|
|
|
4
|
+
## 1.16.3 (2026-04-14) — New Feature
|
|
5
|
+
|
|
6
|
+
RSpec tests: **1,425 → 1,434** (+9 tests)
|
|
7
|
+
|
|
8
|
+
### New Features
|
|
9
|
+
|
|
10
|
+
* **`write_headers: false`** — new `SmarterCSV::Writer` option to suppress the header line when appending rows to an existing CSV file opened in `'a'` mode.
|
|
11
|
+
Defaults to `true` (existing behavior, fully backwards-compatible).
|
|
12
|
+
|
|
13
|
+
See [Appending to an Existing CSV File](docs/basic_write_api.md#appending-to-an-existing-csv-file).
|
|
14
|
+
|
|
15
|
+
### Other
|
|
16
|
+
* Refactor of internal options handling
|
|
17
|
+
|
|
4
18
|
## 1.16.2 (2026-03-30) — Bug Fixes
|
|
5
19
|
|
|
6
20
|
RSpec tests: **1,410 → 1,425** (+15 tests)
|
data/docs/basic_write_api.md
CHANGED
|
@@ -568,6 +568,54 @@ end
|
|
|
568
568
|
> **Note:** Only use `write_bom: true` with UTF-8 output. Adding a UTF-8 BOM to a
|
|
569
569
|
> non-UTF-8 file will corrupt it.
|
|
570
570
|
|
|
571
|
+
## Appending to an Existing CSV File
|
|
572
|
+
|
|
573
|
+
Use `write_headers: false` to suppress the header line when appending rows to an
|
|
574
|
+
existing CSV file. The caller is responsible for opening the file in append mode — the
|
|
575
|
+
Writer writes only what you ask it to write.
|
|
576
|
+
|
|
577
|
+
```ruby
|
|
578
|
+
# First write: create the file with header + first batch of rows
|
|
579
|
+
SmarterCSV.generate('output.csv') do |csv|
|
|
580
|
+
csv << { name: 'Alice', age: 30 }
|
|
581
|
+
end
|
|
582
|
+
# output.csv:
|
|
583
|
+
# name,age
|
|
584
|
+
# Alice,30
|
|
585
|
+
|
|
586
|
+
# Later: append more rows without repeating the header
|
|
587
|
+
File.open('output.csv', 'a') do |f|
|
|
588
|
+
SmarterCSV.generate(f, write_headers: false) do |csv|
|
|
589
|
+
csv << { name: 'Bob', age: 25 }
|
|
590
|
+
end
|
|
591
|
+
end
|
|
592
|
+
# output.csv:
|
|
593
|
+
# name,age
|
|
594
|
+
# Alice,30
|
|
595
|
+
# Bob,25
|
|
596
|
+
```
|
|
597
|
+
|
|
598
|
+
The Writer still uses the hash keys to determine column order, so the appended rows
|
|
599
|
+
will be aligned correctly as long as the same set of keys is used. If you need to
|
|
600
|
+
guarantee column order across both writes, pass `headers:` explicitly:
|
|
601
|
+
|
|
602
|
+
```ruby
|
|
603
|
+
HEADERS = %i[name age]
|
|
604
|
+
|
|
605
|
+
SmarterCSV.generate('output.csv', headers: HEADERS) do |csv|
|
|
606
|
+
csv << { name: 'Alice', age: 30 }
|
|
607
|
+
end
|
|
608
|
+
|
|
609
|
+
File.open('output.csv', 'a') do |f|
|
|
610
|
+
SmarterCSV.generate(f, headers: HEADERS, write_headers: false) do |csv|
|
|
611
|
+
csv << { name: 'Bob', age: 25 }
|
|
612
|
+
end
|
|
613
|
+
end
|
|
614
|
+
```
|
|
615
|
+
|
|
616
|
+
> **Note:** `write_headers: false` only suppresses the header line. All other
|
|
617
|
+
> options (`col_sep:`, `row_sep:`, `value_converters:`, etc.) apply as normal.
|
|
618
|
+
|
|
571
619
|
## More Examples
|
|
572
620
|
|
|
573
621
|
Check out the [RSpec tests](../spec/smarter_csv/writer_spec.rb) for more examples.
|
data/docs/options.md
CHANGED
|
@@ -45,6 +45,7 @@
|
|
|
45
45
|
| `:write_nil_value` | `''` | String written in place of `nil` field values. E.g. `write_nil_value: 'N/A'`. |
|
|
46
46
|
| `:write_empty_value` | `''` | String written in place of empty-string field values, including missing keys. E.g. `write_empty_value: 'EMPTY'`. |
|
|
47
47
|
| `:write_bom` | `false` | Prepends a UTF-8 BOM (`\xEF\xBB\xBF`) to the output. Use with `encoding: 'UTF-8'` for Excel compatibility. |
|
|
48
|
+
| `:write_headers` | `true` | When `false`, suppresses the header line entirely. Use when appending rows to an existing CSV file (open the file in `'a'` mode yourself and pass the IO object). |
|
|
48
49
|
|
|
49
50
|
|
|
50
51
|
## CSV Reading
|
data/ext/smarter_csv/Makefile
CHANGED
|
@@ -13,12 +13,12 @@ NULLCMD = :
|
|
|
13
13
|
#### Start of system configuration section. ####
|
|
14
14
|
|
|
15
15
|
srcdir = .
|
|
16
|
-
topdir = /Users/tilo/.rvm/rubies/ruby-3.
|
|
16
|
+
topdir = /Users/tilo/.rvm/rubies/ruby-3.2.2/include/ruby-3.2.0
|
|
17
17
|
hdrdir = $(topdir)
|
|
18
|
-
arch_hdrdir = /Users/tilo/.rvm/rubies/ruby-3.
|
|
18
|
+
arch_hdrdir = /Users/tilo/.rvm/rubies/ruby-3.2.2/include/ruby-3.2.0/arm64-darwin23
|
|
19
19
|
PATH_SEPARATOR = :
|
|
20
20
|
VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
|
|
21
|
-
prefix = $(DESTDIR)/Users/tilo/.rvm/rubies/ruby-3.
|
|
21
|
+
prefix = $(DESTDIR)/Users/tilo/.rvm/rubies/ruby-3.2.2
|
|
22
22
|
rubysitearchprefix = $(rubylibprefix)/$(sitearch)
|
|
23
23
|
rubyarchprefix = $(rubylibprefix)/$(arch)
|
|
24
24
|
rubylibprefix = $(libdir)/$(RUBY_BASE_NAME)
|
|
@@ -42,7 +42,6 @@ archincludedir = $(includedir)/$(arch)
|
|
|
42
42
|
sitearchlibdir = $(libdir)/$(sitearch)
|
|
43
43
|
archlibdir = $(libdir)/$(arch)
|
|
44
44
|
ridir = $(datarootdir)/$(RI_BASE_NAME)
|
|
45
|
-
modular_gc_dir = $(DESTDIR)
|
|
46
45
|
mandir = $(datarootdir)/man
|
|
47
46
|
localedir = $(datarootdir)/locale
|
|
48
47
|
libdir = $(exec_prefix)/lib
|
|
@@ -79,11 +78,11 @@ COUTFLAG = -o $(empty)
|
|
|
79
78
|
CSRCFLAG = $(empty)
|
|
80
79
|
|
|
81
80
|
RUBY_EXTCONF_H =
|
|
82
|
-
cflags =
|
|
81
|
+
cflags = -fdeclspec $(optflags) $(debugflags) $(warnflags)
|
|
83
82
|
cxxflags =
|
|
84
|
-
optflags = -O3
|
|
85
|
-
debugflags =
|
|
86
|
-
warnflags = -Wall -Wextra -Wextra-tokens -Wdeprecated-declarations -Wdivision-by-zero -Wdiv-by-zero -Wimplicit-function-declaration -Wimplicit-int -Wpointer-arith -Wshorten-64-to-32 -Wwrite-strings -Wold-style-definition -Wmissing-noreturn -Wno-cast-function-type -Wno-constant-logical-operand -Wno-long-long -Wno-missing-field-initializers -Wno-overlength-strings -Wno-parentheses-equality -Wno-self-assign -Wno-tautological-compare -Wno-unused-parameter -Wno-unused-value -Wunused-variable -
|
|
83
|
+
optflags = -O3
|
|
84
|
+
debugflags = -ggdb3
|
|
85
|
+
warnflags = -Wall -Wextra -Wextra-tokens -Wdeprecated-declarations -Wdivision-by-zero -Wdiv-by-zero -Wimplicit-function-declaration -Wimplicit-int -Wmisleading-indentation -Wpointer-arith -Wshorten-64-to-32 -Wwrite-strings -Wold-style-definition -Wmissing-noreturn -Wno-cast-function-type -Wno-constant-logical-operand -Wno-long-long -Wno-missing-field-initializers -Wno-overlength-strings -Wno-parentheses-equality -Wno-self-assign -Wno-tautological-compare -Wno-unused-parameter -Wno-unused-value -Wunused-variable -Wundef
|
|
87
86
|
cppflags =
|
|
88
87
|
CCDLFLAGS = -fno-common
|
|
89
88
|
CFLAGS = $(CCDLFLAGS) -O3 -I/opt/homebrew/opt/libyaml/include -I/opt/homebrew/opt/libksba/include -I/opt/homebrew/opt/readline/include -I/opt/homebrew/opt/zlib/include -I/opt/homebrew/opt/openssl@1.1/include $(cflags) -fno-common -pipe $(ARCH_FLAG)
|
|
@@ -92,26 +91,24 @@ DEFS =
|
|
|
92
91
|
CPPFLAGS = -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -D_DARWIN_UNLIMITED_SELECT -D_REENTRANT $(DEFS) $(cppflags)
|
|
93
92
|
CXXFLAGS = $(CCDLFLAGS) -fdeclspec $(ARCH_FLAG)
|
|
94
93
|
ldflags = -L. -L/opt/homebrew/opt/libyaml/lib -L/opt/homebrew/opt/libksba/lib -L/opt/homebrew/opt/readline/lib -L/opt/homebrew/opt/zlib/lib -L/opt/homebrew/opt/openssl@1.1/lib -fstack-protector-strong
|
|
95
|
-
dldflags = -L/opt/homebrew/opt/libyaml/lib -L/opt/homebrew/opt/libksba/lib -L/opt/homebrew/opt/readline/lib -L/opt/homebrew/opt/zlib/lib -L/opt/homebrew/opt/openssl@1.1/lib -Wl,-undefined,dynamic_lookup
|
|
96
|
-
ARCH_FLAG =
|
|
94
|
+
dldflags = -L/opt/homebrew/opt/libyaml/lib -L/opt/homebrew/opt/libksba/lib -L/opt/homebrew/opt/readline/lib -L/opt/homebrew/opt/zlib/lib -L/opt/homebrew/opt/openssl@1.1/lib -Wl,-undefined,dynamic_lookup $(LIBRUBYARG_SHARED)
|
|
95
|
+
ARCH_FLAG =
|
|
97
96
|
DLDFLAGS = $(ldflags) $(dldflags) $(ARCH_FLAG)
|
|
98
97
|
LDSHARED = $(CC) -dynamic -bundle
|
|
99
98
|
LDSHAREDXX = $(CXX) -dynamic -bundle
|
|
100
|
-
POSTLINK = dsymutil $@ 2>/dev/null; { test -z '$(RUBY_CODESIGN)' || codesign -s '$(RUBY_CODESIGN)' $@; }
|
|
101
99
|
AR = ar
|
|
102
|
-
LD = ld
|
|
103
100
|
EXEEXT =
|
|
104
101
|
|
|
105
102
|
RUBY_INSTALL_NAME = $(RUBY_BASE_NAME)
|
|
106
|
-
RUBY_SO_NAME = ruby.3.
|
|
103
|
+
RUBY_SO_NAME = ruby.3.2
|
|
107
104
|
RUBYW_INSTALL_NAME =
|
|
108
105
|
RUBY_VERSION_NAME = $(RUBY_BASE_NAME)-$(ruby_version)
|
|
109
106
|
RUBYW_BASE_NAME = rubyw
|
|
110
107
|
RUBY_BASE_NAME = ruby
|
|
111
108
|
|
|
112
|
-
arch = arm64-
|
|
109
|
+
arch = arm64-darwin23
|
|
113
110
|
sitearch = $(arch)
|
|
114
|
-
ruby_version = 3.
|
|
111
|
+
ruby_version = 3.2.0
|
|
115
112
|
ruby = $(bindir)/$(RUBY_BASE_NAME)
|
|
116
113
|
RUBY = $(ruby)
|
|
117
114
|
BUILTRUBY = $(bindir)/$(RUBY_BASE_NAME)
|
|
@@ -131,7 +128,7 @@ TOUCH = exit >
|
|
|
131
128
|
|
|
132
129
|
preload =
|
|
133
130
|
libpath = . $(libdir)
|
|
134
|
-
LIBPATH =
|
|
131
|
+
LIBPATH = -L. -L$(libdir)
|
|
135
132
|
DEFFILE =
|
|
136
133
|
|
|
137
134
|
CLEANFILES = mkmf.log
|
|
@@ -164,7 +161,7 @@ HDRDIR = $(sitehdrdir)$(target_prefix)
|
|
|
164
161
|
ARCHHDRDIR = $(sitearchhdrdir)$(target_prefix)
|
|
165
162
|
TARGET_SO_DIR =
|
|
166
163
|
TARGET_SO = $(TARGET_SO_DIR)$(DLLIB)
|
|
167
|
-
CLEANLIBS = $(TARGET_SO) $(TARGET_SO
|
|
164
|
+
CLEANLIBS = $(TARGET_SO) $(TARGET_SO).dSYM
|
|
168
165
|
CLEANOBJS = $(OBJS) *.bak
|
|
169
166
|
TARGET_SO_DIR_TIMESTAMP = $(TIMESTAMP_DIR)/.sitearchdir.-.smarter_csv.time
|
|
170
167
|
|
data/lib/smarter_csv/reader.rb
CHANGED
|
@@ -10,7 +10,7 @@ module SmarterCSV
|
|
|
10
10
|
# A warning is emitted to STDERR so users know to configure it explicitly.
|
|
11
11
|
DEFAULT_CHUNK_SIZE = 100
|
|
12
12
|
|
|
13
|
-
include ::SmarterCSV::Options
|
|
13
|
+
include ::SmarterCSV::Reader::Options
|
|
14
14
|
include ::SmarterCSV::FileIO
|
|
15
15
|
include ::SmarterCSV::AutoDetection
|
|
16
16
|
include ::SmarterCSV::Headers
|
|
@@ -24,6 +24,10 @@ module SmarterCSV
|
|
|
24
24
|
attr_reader :enforce_utf8, :has_rails, :has_acceleration
|
|
25
25
|
attr_reader :errors, :warnings, :headers, :raw_header, :result
|
|
26
26
|
|
|
27
|
+
def self.default_options
|
|
28
|
+
Options::DEFAULT_OPTIONS
|
|
29
|
+
end
|
|
30
|
+
|
|
27
31
|
# rubocop:disable Naming/MethodName
|
|
28
32
|
def headerA
|
|
29
33
|
warn "Deprecarion Warning: 'headerA' will be removed in future versions. Use 'headders'"
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SmarterCSV
|
|
4
|
+
class Reader
|
|
5
|
+
module Options
|
|
6
|
+
DEFAULT_OPTIONS = {
|
|
7
|
+
acceleration: true, # if user wants to use accelleration or not
|
|
8
|
+
auto_row_sep_chars: 500,
|
|
9
|
+
bad_row_limit: nil,
|
|
10
|
+
chunk_size: nil,
|
|
11
|
+
col_sep: :auto, # was: ',',
|
|
12
|
+
collect_raw_lines: true,
|
|
13
|
+
comment_regexp: nil, # was: /\A#/,
|
|
14
|
+
convert_values_to_numeric: true,
|
|
15
|
+
downcase_header: true,
|
|
16
|
+
duplicate_header_suffix: '', # was: nil,
|
|
17
|
+
field_size_limit: nil, # Integer (bytes) or nil for no limit. Raises FieldSizeLimitExceeded if any
|
|
18
|
+
# extracted field exceeds this size. Prevents DoS from runaway quoted
|
|
19
|
+
# fields (unbounded multiline stitching) or huge inline payloads.
|
|
20
|
+
file_encoding: 'utf-8',
|
|
21
|
+
force_utf8: false,
|
|
22
|
+
headers_in_file: true,
|
|
23
|
+
invalid_byte_sequence: '',
|
|
24
|
+
keep_original_headers: false,
|
|
25
|
+
key_mapping: nil,
|
|
26
|
+
strict: false, # DEPRECATED -> use missing_headers
|
|
27
|
+
missing_headers: :auto, # :auto (auto-generate names for extra cols) or :raise (raise HeaderSizeMismatch)
|
|
28
|
+
missing_header_prefix: 'column_',
|
|
29
|
+
nil_values_matching: nil, # regex: set matching values to nil (key kept); pairs with remove_empty_values
|
|
30
|
+
on_bad_row: :raise,
|
|
31
|
+
on_chunk: nil, # callable: fired after each chunk is parsed, before yielding to the block
|
|
32
|
+
on_complete: nil, # callable: fired once after the entire file is processed
|
|
33
|
+
on_start: nil, # callable: fired once before the first row is parsed
|
|
34
|
+
quote_boundary: :standard, # :standard (only at field boundary 👍) or :legacy (any quote toggles state 👎)
|
|
35
|
+
quote_char: '"',
|
|
36
|
+
quote_escaping: :auto,
|
|
37
|
+
remove_empty_hashes: true,
|
|
38
|
+
remove_empty_values: true,
|
|
39
|
+
remove_unmapped_keys: false,
|
|
40
|
+
remove_values_matching: nil, # DEPRECATED: use nil_values_matching instead
|
|
41
|
+
remove_zero_values: false,
|
|
42
|
+
required_headers: nil,
|
|
43
|
+
required_keys: nil,
|
|
44
|
+
row_sep: :auto, # was: $/,
|
|
45
|
+
silence_missing_keys: false,
|
|
46
|
+
skip_lines: nil,
|
|
47
|
+
strings_as_keys: false,
|
|
48
|
+
strip_chars_from_headers: nil,
|
|
49
|
+
strip_whitespace: true,
|
|
50
|
+
user_provided_headers: nil,
|
|
51
|
+
value_converters: nil,
|
|
52
|
+
verbose: :normal, # nil/:normal (default), :quiet (suppress warnings), :debug (print diagnostics); true/false are deprecated
|
|
53
|
+
with_line_numbers: false,
|
|
54
|
+
}.freeze
|
|
55
|
+
|
|
56
|
+
# NOTE: this is not called when "parse" methods are tested by themselves
|
|
57
|
+
def process_options(given_options = {})
|
|
58
|
+
# Debug output before merge — check raw verbose value (true or :debug)
|
|
59
|
+
$stderr.puts "User provided options:\n#{pp(given_options)}\n" if [true, :debug].include?(given_options[:verbose])
|
|
60
|
+
|
|
61
|
+
# Special case for :user_provided_headers:
|
|
62
|
+
#
|
|
63
|
+
# If we would use the default `headers_in_file: true`, and `:user_provided_headers` are given,
|
|
64
|
+
# we could lose the first data row
|
|
65
|
+
#
|
|
66
|
+
# We now err on the side of treating an actual header as data, rather than losing a data row.
|
|
67
|
+
#
|
|
68
|
+
if given_options[:user_provided_headers] && !given_options.keys.include?(:headers_in_file)
|
|
69
|
+
given_options[:headers_in_file] = false
|
|
70
|
+
warn "WARNING: setting `headers_in_file: false` as a precaution to not lose the first row. Set explicitly to `true` if you have headers." unless given_options[:verbose] == :quiet
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
@options = DEFAULT_OPTIONS.dup.merge!(given_options)
|
|
74
|
+
|
|
75
|
+
# Normalize verbose to a symbol — done once here, stored back into @options.
|
|
76
|
+
# All subsequent checks are free symbol comparisons; no re-evaluation needed.
|
|
77
|
+
# :quiet — suppress all warnings and notices (good for production)
|
|
78
|
+
# :normal — show behavioral warnings (default; helpful for new users)
|
|
79
|
+
# :debug — :normal + print computed options and per-row diagnostics
|
|
80
|
+
# nil is silently normalized to :normal; true/false are deprecated.
|
|
81
|
+
case @options[:verbose]
|
|
82
|
+
when :quiet, :normal, :debug
|
|
83
|
+
# keep as is
|
|
84
|
+
when nil
|
|
85
|
+
@options[:verbose] = :normal
|
|
86
|
+
when false
|
|
87
|
+
warn "DEPRECATION WARNING: verbose: false is deprecated. Use verbose: :normal instead (or omit — it is the default)."
|
|
88
|
+
@options[:verbose] = :normal
|
|
89
|
+
when true
|
|
90
|
+
warn "DEPRECATION WARNING: verbose: true is deprecated. Use verbose: :debug instead."
|
|
91
|
+
@options[:verbose] = :debug
|
|
92
|
+
else
|
|
93
|
+
warn "WARNING: unknown verbose value #{@options[:verbose].inspect}, defaulting to :normal. Valid values: :quiet, :normal, :debug."
|
|
94
|
+
@options[:verbose] = :normal
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# fix invalid input
|
|
98
|
+
@options[:invalid_byte_sequence] ||= ''
|
|
99
|
+
|
|
100
|
+
# Normalize headers: { only: [...] } / { except: [...] } to internal option names.
|
|
101
|
+
# The public API is headers: { only: } or headers: { except: }.
|
|
102
|
+
# Internally we use only_headers: / except_headers: (what the C extension reads).
|
|
103
|
+
if (hdr = @options.delete(:headers)).is_a?(Hash)
|
|
104
|
+
@options[:only_headers] = hdr[:only] if hdr.key?(:only)
|
|
105
|
+
@options[:except_headers] = hdr[:except] if hdr.key?(:except)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Deprecation: direct use of only_headers: / except_headers: (use headers: { only: } instead)
|
|
109
|
+
if given_options.key?(:only_headers) && !given_options.key?(:headers)
|
|
110
|
+
warn "DEPRECATION WARNING: 'only_headers:' is deprecated. Use 'headers: { only: [...] }' instead." unless @options[:verbose] == :quiet
|
|
111
|
+
end
|
|
112
|
+
if given_options.key?(:except_headers) && !given_options.key?(:headers)
|
|
113
|
+
warn "DEPRECATION WARNING: 'except_headers:' is deprecated. Use 'headers: { except: [...] }' instead." unless @options[:verbose] == :quiet
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Normalize only_headers/except_headers to arrays of symbols (internal names, read by C extension)
|
|
117
|
+
if @options[:only_headers]
|
|
118
|
+
values = Array(@options[:only_headers])
|
|
119
|
+
bad = values.reject { |v| v.is_a?(Symbol) || v.is_a?(String) }
|
|
120
|
+
raise SmarterCSV::ValidationError, "headers: { only: } elements must be String or Symbol, got: #{bad.map(&:class).uniq.inspect}" if bad.any?
|
|
121
|
+
@options[:only_headers] = values.map(&:to_sym)
|
|
122
|
+
end
|
|
123
|
+
if @options[:except_headers]
|
|
124
|
+
values = Array(@options[:except_headers])
|
|
125
|
+
bad = values.reject { |v| v.is_a?(Symbol) || v.is_a?(String) }
|
|
126
|
+
raise SmarterCSV::ValidationError, "headers: { except: } elements must be String or Symbol, got: #{bad.map(&:class).uniq.inspect}" if bad.any?
|
|
127
|
+
@options[:except_headers] = values.map(&:to_sym)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Deprecation: remove_values_matching → nil_values_matching
|
|
131
|
+
# Old behavior: removes the key-value pair entirely.
|
|
132
|
+
# New behavior: nil_values_matching sets the value to nil (key kept);
|
|
133
|
+
# combined with the default remove_empty_values: true the net effect is identical.
|
|
134
|
+
# With remove_empty_values: false, the key is retained with a nil value.
|
|
135
|
+
if given_options.key?(:remove_values_matching)
|
|
136
|
+
unless @options[:verbose] == :quiet
|
|
137
|
+
warn "DEPRECATION WARNING: 'remove_values_matching' is deprecated. " \
|
|
138
|
+
"Use 'nil_values_matching' instead. With the default 'remove_empty_values: true' " \
|
|
139
|
+
"the net behavior is identical. With 'remove_empty_values: false', matching values " \
|
|
140
|
+
"are set to nil but the key is retained in the result hash."
|
|
141
|
+
end
|
|
142
|
+
@options[:nil_values_matching] ||= @options[:remove_values_matching]
|
|
143
|
+
@options[:remove_values_matching] = nil # clear to prevent double-processing
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Translate deprecated :strict option to :missing_headers
|
|
147
|
+
if given_options.key?(:strict)
|
|
148
|
+
unless @options[:verbose] == :quiet
|
|
149
|
+
warn "DEPRECATION WARNING: 'strict' option is deprecated and will be removed in a future version. " \
|
|
150
|
+
"Use 'missing_headers: :raise' instead of 'strict: true', or 'missing_headers: :auto' instead of 'strict: false'."
|
|
151
|
+
end
|
|
152
|
+
@options[:missing_headers] = @options[:strict] ? :raise : :auto unless given_options.key?(:missing_headers)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Keep :strict synchronized with :missing_headers (C extension reads :strict directly)
|
|
156
|
+
@options[:strict] = (@options[:missing_headers] == :raise)
|
|
157
|
+
|
|
158
|
+
$stderr.puts "Computed options:\n#{pp(@options)}\n" if @options[:verbose] == :debug
|
|
159
|
+
|
|
160
|
+
validate_options!(@options)
|
|
161
|
+
@options
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
private
|
|
165
|
+
|
|
166
|
+
def validate_options!(options)
|
|
167
|
+
# deprecate required_headers
|
|
168
|
+
unless options[:required_headers].nil?
|
|
169
|
+
warn "DEPRECATION WARNING: please use 'required_keys' instead of 'required_headers'" unless options[:verbose] == :quiet
|
|
170
|
+
if options[:required_keys].nil?
|
|
171
|
+
options[:required_keys] = options[:required_headers]
|
|
172
|
+
options[:required_headers] = nil
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
keys = options.keys
|
|
177
|
+
errors = []
|
|
178
|
+
errors << "invalid row_sep" if keys.include?(:row_sep) && !option_valid?(options[:row_sep])
|
|
179
|
+
errors << "invalid col_sep" if keys.include?(:col_sep) && !option_valid?(options[:col_sep])
|
|
180
|
+
errors << "invalid quote_char" if keys.include?(:quote_char) && !option_valid?(options[:quote_char])
|
|
181
|
+
if keys.include?(:quote_char) && options[:quote_char].is_a?(String) && options[:quote_char].bytesize > 1
|
|
182
|
+
errors << "invalid quote_char: must be a single byte (got #{options[:quote_char].inspect})"
|
|
183
|
+
end
|
|
184
|
+
unless %i[double_quotes backslash auto].include?(options[:quote_escaping])
|
|
185
|
+
errors << "invalid quote_escaping: must be :double_quotes, :backslash, or :auto"
|
|
186
|
+
end
|
|
187
|
+
unless %i[legacy standard].include?(options[:quote_boundary])
|
|
188
|
+
errors << "invalid quote_boundary: must be :legacy or :standard"
|
|
189
|
+
end
|
|
190
|
+
fsl = options[:field_size_limit]
|
|
191
|
+
unless fsl.nil? || (fsl.is_a?(Integer) && fsl > 0)
|
|
192
|
+
errors << "invalid field_size_limit: must be nil or a positive Integer (got #{fsl.inspect})"
|
|
193
|
+
end
|
|
194
|
+
obr = options[:on_bad_row]
|
|
195
|
+
unless %i[raise skip collect].include?(obr) || obr.respond_to?(:call)
|
|
196
|
+
errors << "invalid on_bad_row: must be :raise, :skip, :collect, or a callable"
|
|
197
|
+
end
|
|
198
|
+
%i[on_start on_chunk on_complete].each do |hook|
|
|
199
|
+
val = options[hook]
|
|
200
|
+
errors << "invalid #{hook}: must be nil or a callable" if !val.nil? && !val.respond_to?(:call)
|
|
201
|
+
end
|
|
202
|
+
unless %i[auto raise].include?(options[:missing_headers])
|
|
203
|
+
errors << "invalid missing_headers: must be :auto or :raise"
|
|
204
|
+
end
|
|
205
|
+
if options[:only_headers] && options[:except_headers]
|
|
206
|
+
errors << "cannot use both 'headers: { only: }' and 'headers: { except: }' at the same time"
|
|
207
|
+
end
|
|
208
|
+
raise SmarterCSV::ValidationError, errors.inspect if errors.any?
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def option_valid?(str)
|
|
212
|
+
return true if str.is_a?(Symbol) && str == :auto
|
|
213
|
+
return true if str.is_a?(String) && !str.empty?
|
|
214
|
+
|
|
215
|
+
false
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def pp(value)
|
|
219
|
+
defined?(AwesomePrint) ? value.awesome_inspect(index: nil) : value.inspect
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
data/lib/smarter_csv/version.rb
CHANGED
data/lib/smarter_csv/writer.rb
CHANGED
|
@@ -25,6 +25,8 @@ module SmarterCSV
|
|
|
25
25
|
#
|
|
26
26
|
# The Writer automatically quotes fields containing the col_sep, row_sep, or the quote_char.
|
|
27
27
|
#
|
|
28
|
+
# See SmarterCSV::Writer::Options::DEFAULT_OPTIONS for all options and their defaults.
|
|
29
|
+
#
|
|
28
30
|
# Options:
|
|
29
31
|
# col_sep : defaults to , but can be set to any other character
|
|
30
32
|
# row_sep : defaults to LF \n , but can be set to \r\n or \r or anything else
|
|
@@ -42,7 +44,9 @@ module SmarterCSV
|
|
|
42
44
|
# write_empty_value: string written in place of empty-string field values (default: '')
|
|
43
45
|
# write_bom: when true, prepends a UTF-8 BOM (\xEF\xBB\xBF) to the output (default: false)
|
|
44
46
|
# Useful for Excel compatibility with non-ASCII content.
|
|
45
|
-
|
|
47
|
+
# write_headers: when false, suppresses the header line (default: true). Useful when appending to
|
|
48
|
+
# an existing CSV file opened in 'a' mode — the caller controls the file mode.
|
|
49
|
+
#
|
|
46
50
|
# IMPORTANT NOTES:
|
|
47
51
|
# * Data hashes could contain strings or symbols as keys.
|
|
48
52
|
# Make sure to use the correct form when specifying headers manually,
|
|
@@ -51,36 +55,42 @@ module SmarterCSV
|
|
|
51
55
|
attr_reader :options, :row_sep, :col_sep, :quote_char, :force_quotes, :discover_headers, :headers, :map_headers, :output_file
|
|
52
56
|
|
|
53
57
|
class Writer
|
|
54
|
-
|
|
55
|
-
|
|
58
|
+
include ::SmarterCSV::Writer::Options
|
|
59
|
+
|
|
60
|
+
def self.default_options
|
|
61
|
+
Options::DEFAULT_OPTIONS
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def initialize(file_path_or_io, given_options = {})
|
|
65
|
+
opts = Options::DEFAULT_OPTIONS.merge(given_options)
|
|
66
|
+
@options = opts
|
|
56
67
|
|
|
57
|
-
@row_sep =
|
|
58
|
-
@col_sep =
|
|
59
|
-
@quote_char =
|
|
68
|
+
@row_sep = opts[:row_sep]
|
|
69
|
+
@col_sep = opts[:col_sep]
|
|
70
|
+
@quote_char = opts[:quote_char]
|
|
60
71
|
@escaped_quote_char = @quote_char * 2
|
|
61
|
-
@force_quotes =
|
|
62
|
-
@quote_headers =
|
|
63
|
-
@disable_auto_quoting =
|
|
64
|
-
@value_converters =
|
|
65
|
-
@encoding =
|
|
66
|
-
@write_nil_value =
|
|
67
|
-
@write_empty_value =
|
|
68
|
-
@write_bom =
|
|
72
|
+
@force_quotes = opts[:force_quotes] == true
|
|
73
|
+
@quote_headers = opts[:quote_headers] == true
|
|
74
|
+
@disable_auto_quoting = opts[:disable_auto_quoting] == true
|
|
75
|
+
@value_converters = opts[:value_converters] || {}
|
|
76
|
+
@encoding = opts[:encoding]
|
|
77
|
+
@write_nil_value = opts[:write_nil_value]
|
|
78
|
+
@write_empty_value = opts[:write_empty_value]
|
|
79
|
+
@write_bom = opts[:write_bom] == true
|
|
80
|
+
@write_headers = opts[:write_headers] == true
|
|
69
81
|
@map_all_keys = @value_converters.has_key?(:_all)
|
|
70
82
|
@mapped_keys = Set.new(@value_converters.keys - [:_all])
|
|
71
|
-
@header_converter =
|
|
83
|
+
@header_converter = opts[:header_converter]
|
|
72
84
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
@discover_headers = options[:discover_headers] == true
|
|
85
|
+
if given_options.has_key?(:discover_headers)
|
|
86
|
+
@discover_headers = given_options[:discover_headers] == true
|
|
76
87
|
else
|
|
77
|
-
@discover_headers = !(
|
|
88
|
+
@discover_headers = !(given_options.has_key?(:map_headers) || given_options.has_key?(:headers))
|
|
78
89
|
end
|
|
79
90
|
|
|
80
|
-
@headers = []
|
|
81
|
-
@headers =
|
|
82
|
-
@
|
|
83
|
-
@map_headers = options[:map_headers] || {}
|
|
91
|
+
@headers = opts[:headers].dup
|
|
92
|
+
@headers = given_options[:map_headers].keys if given_options.has_key?(:map_headers) && !given_options.has_key?(:headers)
|
|
93
|
+
@map_headers = opts[:map_headers]
|
|
84
94
|
|
|
85
95
|
# Accept an IO-like object (StringIO, IO, etc.) or any path-like object (String, Pathname, etc.)
|
|
86
96
|
if file_path_or_io.respond_to?(:write)
|
|
@@ -110,7 +120,7 @@ module SmarterCSV
|
|
|
110
120
|
# and stream data rows directly to @output_file, bypassing the temp file entirely.
|
|
111
121
|
@temp_file = nil
|
|
112
122
|
@output_file.write("\xEF\xBB\xBF") if @write_bom
|
|
113
|
-
write_header_line
|
|
123
|
+
write_header_line if @write_headers
|
|
114
124
|
else
|
|
115
125
|
@temp_file = Tempfile.new('smarter_csv')
|
|
116
126
|
end
|
|
@@ -134,7 +144,7 @@ module SmarterCSV
|
|
|
134
144
|
# Header-discovery mode: headers were accumulated while writing rows;
|
|
135
145
|
# now prepend the header line and copy the buffered rows to the output.
|
|
136
146
|
@output_file.write("\xEF\xBB\xBF") if @write_bom
|
|
137
|
-
write_header_line
|
|
147
|
+
write_header_line if @write_headers
|
|
138
148
|
@temp_file.rewind
|
|
139
149
|
@output_file.write(@temp_file.read)
|
|
140
150
|
@temp_file.close!
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SmarterCSV
|
|
4
|
+
class Writer
|
|
5
|
+
module Options
|
|
6
|
+
DEFAULT_OPTIONS = {
|
|
7
|
+
col_sep: ',',
|
|
8
|
+
row_sep: $/,
|
|
9
|
+
quote_char: '"',
|
|
10
|
+
force_quotes: false,
|
|
11
|
+
quote_headers: false,
|
|
12
|
+
disable_auto_quoting: false,
|
|
13
|
+
value_converters: {},
|
|
14
|
+
encoding: nil,
|
|
15
|
+
write_nil_value: '',
|
|
16
|
+
write_empty_value: '',
|
|
17
|
+
write_bom: false,
|
|
18
|
+
write_headers: true,
|
|
19
|
+
header_converter: nil,
|
|
20
|
+
discover_headers: true,
|
|
21
|
+
headers: [],
|
|
22
|
+
map_headers: {},
|
|
23
|
+
}.freeze
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
data/lib/smarter_csv.rb
CHANGED
|
@@ -5,7 +5,8 @@ require "smarter_csv/version"
|
|
|
5
5
|
require "smarter_csv/errors"
|
|
6
6
|
|
|
7
7
|
require "smarter_csv/file_io"
|
|
8
|
-
require "smarter_csv/
|
|
8
|
+
require "smarter_csv/reader_options"
|
|
9
|
+
require "smarter_csv/writer_options"
|
|
9
10
|
require "smarter_csv/auto_detection"
|
|
10
11
|
require 'smarter_csv/header_transformations'
|
|
11
12
|
require 'smarter_csv/header_validations'
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: smarter_csv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.16.
|
|
4
|
+
version: 1.16.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Tilo Sloboda
|
|
8
8
|
bindir: bin
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date: 2026-
|
|
10
|
+
date: 2026-04-14 00:00:00.000000000 Z
|
|
11
11
|
dependencies: []
|
|
12
12
|
description: |
|
|
13
13
|
SmarterCSV is a high-performance CSV reader and writer for Ruby focused on
|
|
@@ -64,12 +64,7 @@ files:
|
|
|
64
64
|
- docs/value_converters.md
|
|
65
65
|
- ext/smarter_csv/Makefile
|
|
66
66
|
- ext/smarter_csv/extconf.rb
|
|
67
|
-
- ext/smarter_csv/smarter_csv.bundle
|
|
68
|
-
- ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Info.plist
|
|
69
|
-
- ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Resources/DWARF/smarter_csv.bundle
|
|
70
|
-
- ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Resources/Relocations/aarch64/smarter_csv.bundle.yml
|
|
71
67
|
- ext/smarter_csv/smarter_csv.c
|
|
72
|
-
- ext/smarter_csv/smarter_csv.o
|
|
73
68
|
- images/SmarterCSV_1.16.0_vs_RubyCSV_3.3.5_speedup.png
|
|
74
69
|
- images/SmarterCSV_1.16.0_vs_RubyCSV_3.3.5_speedup.svg
|
|
75
70
|
- images/SmarterCSV_1.16.0_vs_previous_C-speedup.png
|
|
@@ -84,11 +79,12 @@ files:
|
|
|
84
79
|
- lib/smarter_csv/header_transformations.rb
|
|
85
80
|
- lib/smarter_csv/header_validations.rb
|
|
86
81
|
- lib/smarter_csv/headers.rb
|
|
87
|
-
- lib/smarter_csv/options.rb
|
|
88
82
|
- lib/smarter_csv/parser.rb
|
|
89
83
|
- lib/smarter_csv/reader.rb
|
|
84
|
+
- lib/smarter_csv/reader_options.rb
|
|
90
85
|
- lib/smarter_csv/version.rb
|
|
91
86
|
- lib/smarter_csv/writer.rb
|
|
87
|
+
- lib/smarter_csv/writer_options.rb
|
|
92
88
|
- smarter_csv.gemspec
|
|
93
89
|
homepage: https://github.com/tilo/smarter_csv
|
|
94
90
|
licenses:
|
|
Binary file
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
-
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
3
|
-
<plist version="1.0">
|
|
4
|
-
<dict>
|
|
5
|
-
<key>CFBundleDevelopmentRegion</key>
|
|
6
|
-
<string>English</string>
|
|
7
|
-
<key>CFBundleIdentifier</key>
|
|
8
|
-
<string>com.apple.xcode.dsym.smarter_csv.bundle</string>
|
|
9
|
-
<key>CFBundleInfoDictionaryVersion</key>
|
|
10
|
-
<string>6.0</string>
|
|
11
|
-
<key>CFBundlePackageType</key>
|
|
12
|
-
<string>dSYM</string>
|
|
13
|
-
<key>CFBundleSignature</key>
|
|
14
|
-
<string>????</string>
|
|
15
|
-
<key>CFBundleShortVersionString</key>
|
|
16
|
-
<string>1.0</string>
|
|
17
|
-
<key>CFBundleVersion</key>
|
|
18
|
-
<string>1</string>
|
|
19
|
-
</dict>
|
|
20
|
-
</plist>
|
|
Binary file
|
|
Binary file
|
data/lib/smarter_csv/options.rb
DELETED
|
@@ -1,229 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module SmarterCSV
|
|
4
|
-
#
|
|
5
|
-
# NOTE: this is not called when "parse" methods are tested by themselves
|
|
6
|
-
#
|
|
7
|
-
# ONLY FOR BACKWARDS-COMPATIBILITY
|
|
8
|
-
def self.default_options
|
|
9
|
-
Options::DEFAULT_OPTIONS
|
|
10
|
-
end
|
|
11
|
-
|
|
12
|
-
module Options
|
|
13
|
-
DEFAULT_OPTIONS = {
|
|
14
|
-
acceleration: true, # if user wants to use accelleration or not
|
|
15
|
-
auto_row_sep_chars: 500,
|
|
16
|
-
bad_row_limit: nil,
|
|
17
|
-
chunk_size: nil,
|
|
18
|
-
col_sep: :auto, # was: ',',
|
|
19
|
-
collect_raw_lines: true,
|
|
20
|
-
comment_regexp: nil, # was: /\A#/,
|
|
21
|
-
convert_values_to_numeric: true,
|
|
22
|
-
downcase_header: true,
|
|
23
|
-
duplicate_header_suffix: '', # was: nil,
|
|
24
|
-
field_size_limit: nil, # Integer (bytes) or nil for no limit. Raises FieldSizeLimitExceeded if any
|
|
25
|
-
# extracted field exceeds this size. Prevents DoS from runaway quoted
|
|
26
|
-
# fields (unbounded multiline stitching) or huge inline payloads.
|
|
27
|
-
file_encoding: 'utf-8',
|
|
28
|
-
force_utf8: false,
|
|
29
|
-
headers_in_file: true,
|
|
30
|
-
invalid_byte_sequence: '',
|
|
31
|
-
keep_original_headers: false,
|
|
32
|
-
key_mapping: nil,
|
|
33
|
-
strict: false, # DEPRECATED -> use missing_headers
|
|
34
|
-
missing_headers: :auto, # :auto (auto-generate names for extra cols) or :raise (raise HeaderSizeMismatch)
|
|
35
|
-
missing_header_prefix: 'column_',
|
|
36
|
-
nil_values_matching: nil, # regex: set matching values to nil (key kept); pairs with remove_empty_values
|
|
37
|
-
on_bad_row: :raise,
|
|
38
|
-
on_chunk: nil, # callable: fired after each chunk is parsed, before yielding to the block
|
|
39
|
-
on_complete: nil, # callable: fired once after the entire file is processed
|
|
40
|
-
on_start: nil, # callable: fired once before the first row is parsed
|
|
41
|
-
quote_boundary: :standard, # :standard (only at field boundary 👍) or :legacy (any quote toggles state 👎)
|
|
42
|
-
quote_char: '"',
|
|
43
|
-
quote_escaping: :auto,
|
|
44
|
-
remove_empty_hashes: true,
|
|
45
|
-
remove_empty_values: true,
|
|
46
|
-
remove_unmapped_keys: false,
|
|
47
|
-
remove_values_matching: nil, # DEPRECATED: use nil_values_matching instead
|
|
48
|
-
remove_zero_values: false,
|
|
49
|
-
required_headers: nil,
|
|
50
|
-
required_keys: nil,
|
|
51
|
-
row_sep: :auto, # was: $/,
|
|
52
|
-
silence_missing_keys: false,
|
|
53
|
-
skip_lines: nil,
|
|
54
|
-
strings_as_keys: false,
|
|
55
|
-
strip_chars_from_headers: nil,
|
|
56
|
-
strip_whitespace: true,
|
|
57
|
-
user_provided_headers: nil,
|
|
58
|
-
value_converters: nil,
|
|
59
|
-
verbose: :normal, # nil/:normal (default), :quiet (suppress warnings), :debug (print diagnostics); true/false are deprecated
|
|
60
|
-
with_line_numbers: false,
|
|
61
|
-
}.freeze
|
|
62
|
-
|
|
63
|
-
# NOTE: this is not called when "parse" methods are tested by themselves
|
|
64
|
-
def process_options(given_options = {})
|
|
65
|
-
# Debug output before merge — check raw verbose value (true or :debug)
|
|
66
|
-
$stderr.puts "User provided options:\n#{pp(given_options)}\n" if [true, :debug].include?(given_options[:verbose])
|
|
67
|
-
|
|
68
|
-
# Special case for :user_provided_headers:
|
|
69
|
-
#
|
|
70
|
-
# If we would use the default `headers_in_file: true`, and `:user_provided_headers` are given,
|
|
71
|
-
# we could lose the first data row
|
|
72
|
-
#
|
|
73
|
-
# We now err on the side of treating an actual header as data, rather than losing a data row.
|
|
74
|
-
#
|
|
75
|
-
if given_options[:user_provided_headers] && !given_options.keys.include?(:headers_in_file)
|
|
76
|
-
given_options[:headers_in_file] = false
|
|
77
|
-
warn "WARNING: setting `headers_in_file: false` as a precaution to not lose the first row. Set explicitly to `true` if you have headers." unless given_options[:verbose] == :quiet
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
@options = DEFAULT_OPTIONS.dup.merge!(given_options)
|
|
81
|
-
|
|
82
|
-
# Normalize verbose to a symbol — done once here, stored back into @options.
|
|
83
|
-
# All subsequent checks are free symbol comparisons; no re-evaluation needed.
|
|
84
|
-
# :quiet — suppress all warnings and notices (good for production)
|
|
85
|
-
# :normal — show behavioral warnings (default; helpful for new users)
|
|
86
|
-
# :debug — :normal + print computed options and per-row diagnostics
|
|
87
|
-
# nil is silently normalized to :normal; true/false are deprecated.
|
|
88
|
-
case @options[:verbose]
|
|
89
|
-
when :quiet, :normal, :debug
|
|
90
|
-
# keep as is
|
|
91
|
-
when nil
|
|
92
|
-
@options[:verbose] = :normal
|
|
93
|
-
when false
|
|
94
|
-
warn "DEPRECATION WARNING: verbose: false is deprecated. Use verbose: :normal instead (or omit — it is the default)."
|
|
95
|
-
@options[:verbose] = :normal
|
|
96
|
-
when true
|
|
97
|
-
warn "DEPRECATION WARNING: verbose: true is deprecated. Use verbose: :debug instead."
|
|
98
|
-
@options[:verbose] = :debug
|
|
99
|
-
else
|
|
100
|
-
warn "WARNING: unknown verbose value #{@options[:verbose].inspect}, defaulting to :normal. Valid values: :quiet, :normal, :debug."
|
|
101
|
-
@options[:verbose] = :normal
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
# fix invalid input
|
|
105
|
-
@options[:invalid_byte_sequence] ||= ''
|
|
106
|
-
|
|
107
|
-
# Normalize headers: { only: [...] } / { except: [...] } to internal option names.
|
|
108
|
-
# The public API is headers: { only: } or headers: { except: }.
|
|
109
|
-
# Internally we use only_headers: / except_headers: (what the C extension reads).
|
|
110
|
-
if (hdr = @options.delete(:headers)).is_a?(Hash)
|
|
111
|
-
@options[:only_headers] = hdr[:only] if hdr.key?(:only)
|
|
112
|
-
@options[:except_headers] = hdr[:except] if hdr.key?(:except)
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
# Deprecation: direct use of only_headers: / except_headers: (use headers: { only: } instead)
|
|
116
|
-
if given_options.key?(:only_headers) && !given_options.key?(:headers)
|
|
117
|
-
warn "DEPRECATION WARNING: 'only_headers:' is deprecated. Use 'headers: { only: [...] }' instead." unless @options[:verbose] == :quiet
|
|
118
|
-
end
|
|
119
|
-
if given_options.key?(:except_headers) && !given_options.key?(:headers)
|
|
120
|
-
warn "DEPRECATION WARNING: 'except_headers:' is deprecated. Use 'headers: { except: [...] }' instead." unless @options[:verbose] == :quiet
|
|
121
|
-
end
|
|
122
|
-
|
|
123
|
-
# Normalize only_headers/except_headers to arrays of symbols (internal names, read by C extension)
|
|
124
|
-
if @options[:only_headers]
|
|
125
|
-
values = Array(@options[:only_headers])
|
|
126
|
-
bad = values.reject { |v| v.is_a?(Symbol) || v.is_a?(String) }
|
|
127
|
-
raise SmarterCSV::ValidationError, "headers: { only: } elements must be String or Symbol, got: #{bad.map(&:class).uniq.inspect}" if bad.any?
|
|
128
|
-
@options[:only_headers] = values.map(&:to_sym)
|
|
129
|
-
end
|
|
130
|
-
if @options[:except_headers]
|
|
131
|
-
values = Array(@options[:except_headers])
|
|
132
|
-
bad = values.reject { |v| v.is_a?(Symbol) || v.is_a?(String) }
|
|
133
|
-
raise SmarterCSV::ValidationError, "headers: { except: } elements must be String or Symbol, got: #{bad.map(&:class).uniq.inspect}" if bad.any?
|
|
134
|
-
@options[:except_headers] = values.map(&:to_sym)
|
|
135
|
-
end
|
|
136
|
-
|
|
137
|
-
# Deprecation: remove_values_matching → nil_values_matching
|
|
138
|
-
# Old behavior: removes the key-value pair entirely.
|
|
139
|
-
# New behavior: nil_values_matching sets the value to nil (key kept);
|
|
140
|
-
# combined with the default remove_empty_values: true the net effect is identical.
|
|
141
|
-
# With remove_empty_values: false, the key is retained with a nil value.
|
|
142
|
-
if given_options.key?(:remove_values_matching)
|
|
143
|
-
unless @options[:verbose] == :quiet
|
|
144
|
-
warn "DEPRECATION WARNING: 'remove_values_matching' is deprecated. " \
|
|
145
|
-
"Use 'nil_values_matching' instead. With the default 'remove_empty_values: true' " \
|
|
146
|
-
"the net behavior is identical. With 'remove_empty_values: false', matching values " \
|
|
147
|
-
"are set to nil but the key is retained in the result hash."
|
|
148
|
-
end
|
|
149
|
-
@options[:nil_values_matching] ||= @options[:remove_values_matching]
|
|
150
|
-
@options[:remove_values_matching] = nil # clear to prevent double-processing
|
|
151
|
-
end
|
|
152
|
-
|
|
153
|
-
# Translate deprecated :strict option to :missing_headers
|
|
154
|
-
if given_options.key?(:strict)
|
|
155
|
-
unless @options[:verbose] == :quiet
|
|
156
|
-
warn "DEPRECATION WARNING: 'strict' option is deprecated and will be removed in a future version. " \
|
|
157
|
-
"Use 'missing_headers: :raise' instead of 'strict: true', or 'missing_headers: :auto' instead of 'strict: false'."
|
|
158
|
-
end
|
|
159
|
-
@options[:missing_headers] = @options[:strict] ? :raise : :auto unless given_options.key?(:missing_headers)
|
|
160
|
-
end
|
|
161
|
-
|
|
162
|
-
# Keep :strict synchronized with :missing_headers (C extension reads :strict directly)
|
|
163
|
-
@options[:strict] = (@options[:missing_headers] == :raise)
|
|
164
|
-
|
|
165
|
-
$stderr.puts "Computed options:\n#{pp(@options)}\n" if @options[:verbose] == :debug
|
|
166
|
-
|
|
167
|
-
validate_options!(@options)
|
|
168
|
-
@options
|
|
169
|
-
end
|
|
170
|
-
|
|
171
|
-
private
|
|
172
|
-
|
|
173
|
-
def validate_options!(options)
|
|
174
|
-
# deprecate required_headers
|
|
175
|
-
unless options[:required_headers].nil?
|
|
176
|
-
warn "DEPRECATION WARNING: please use 'required_keys' instead of 'required_headers'" unless options[:verbose] == :quiet
|
|
177
|
-
if options[:required_keys].nil?
|
|
178
|
-
options[:required_keys] = options[:required_headers]
|
|
179
|
-
options[:required_headers] = nil
|
|
180
|
-
end
|
|
181
|
-
end
|
|
182
|
-
|
|
183
|
-
keys = options.keys
|
|
184
|
-
errors = []
|
|
185
|
-
errors << "invalid row_sep" if keys.include?(:row_sep) && !option_valid?(options[:row_sep])
|
|
186
|
-
errors << "invalid col_sep" if keys.include?(:col_sep) && !option_valid?(options[:col_sep])
|
|
187
|
-
errors << "invalid quote_char" if keys.include?(:quote_char) && !option_valid?(options[:quote_char])
|
|
188
|
-
if keys.include?(:quote_char) && options[:quote_char].is_a?(String) && options[:quote_char].bytesize > 1
|
|
189
|
-
errors << "invalid quote_char: must be a single byte (got #{options[:quote_char].inspect})"
|
|
190
|
-
end
|
|
191
|
-
unless %i[double_quotes backslash auto].include?(options[:quote_escaping])
|
|
192
|
-
errors << "invalid quote_escaping: must be :double_quotes, :backslash, or :auto"
|
|
193
|
-
end
|
|
194
|
-
unless %i[legacy standard].include?(options[:quote_boundary])
|
|
195
|
-
errors << "invalid quote_boundary: must be :legacy or :standard"
|
|
196
|
-
end
|
|
197
|
-
fsl = options[:field_size_limit]
|
|
198
|
-
unless fsl.nil? || (fsl.is_a?(Integer) && fsl > 0)
|
|
199
|
-
errors << "invalid field_size_limit: must be nil or a positive Integer (got #{fsl.inspect})"
|
|
200
|
-
end
|
|
201
|
-
obr = options[:on_bad_row]
|
|
202
|
-
unless %i[raise skip collect].include?(obr) || obr.respond_to?(:call)
|
|
203
|
-
errors << "invalid on_bad_row: must be :raise, :skip, :collect, or a callable"
|
|
204
|
-
end
|
|
205
|
-
%i[on_start on_chunk on_complete].each do |hook|
|
|
206
|
-
val = options[hook]
|
|
207
|
-
errors << "invalid #{hook}: must be nil or a callable" if !val.nil? && !val.respond_to?(:call)
|
|
208
|
-
end
|
|
209
|
-
unless %i[auto raise].include?(options[:missing_headers])
|
|
210
|
-
errors << "invalid missing_headers: must be :auto or :raise"
|
|
211
|
-
end
|
|
212
|
-
if options[:only_headers] && options[:except_headers]
|
|
213
|
-
errors << "cannot use both 'headers: { only: }' and 'headers: { except: }' at the same time"
|
|
214
|
-
end
|
|
215
|
-
raise SmarterCSV::ValidationError, errors.inspect if errors.any?
|
|
216
|
-
end
|
|
217
|
-
|
|
218
|
-
def option_valid?(str)
|
|
219
|
-
return true if str.is_a?(Symbol) && str == :auto
|
|
220
|
-
return true if str.is_a?(String) && !str.empty?
|
|
221
|
-
|
|
222
|
-
false
|
|
223
|
-
end
|
|
224
|
-
|
|
225
|
-
def pp(value)
|
|
226
|
-
defined?(AwesomePrint) ? value.awesome_inspect(index: nil) : value.inspect
|
|
227
|
-
end
|
|
228
|
-
end
|
|
229
|
-
end
|