smarter_csv 1.16.2 → 1.16.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +6 -0
- data/CHANGELOG.md +26 -0
- data/docs/basic_write_api.md +48 -0
- data/docs/options.md +1 -0
- data/ext/smarter_csv/Makefile +14 -17
- data/lib/smarter_csv/reader.rb +34 -28
- data/lib/smarter_csv/reader_options.rb +223 -0
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv/writer.rb +35 -25
- data/lib/smarter_csv/writer_options.rb +26 -0
- data/lib/smarter_csv.rb +15 -11
- metadata +4 -8
- data/ext/smarter_csv/smarter_csv.bundle +0 -0
- data/ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Info.plist +0 -20
- data/ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Resources/DWARF/smarter_csv.bundle +0 -0
- data/ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Resources/Relocations/aarch64/smarter_csv.bundle.yml +0 -5
- data/ext/smarter_csv/smarter_csv.o +0 -0
- data/lib/smarter_csv/options.rb +0 -229
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9f0dc97fe8b296d479efa58b5e404636fe66dbe768e032de987e4c2736b619a4
|
|
4
|
+
data.tar.gz: 6ffaa0b2f74fb6a48c22a28c21a254a0e9b962bcfb9d1b979e72e54ae446a5c1
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8e16f3d049432df188373da120fd4d5f04fd4a49d6a3bb3e91abf6f5722fa6fc90ee302e6db5348349f65290226e0da046cb10d71b2d267fc1be4d63af18107c
|
|
7
|
+
data.tar.gz: 473ce5f7d1b2bceb7a82898b90c9a19e4076675eea1d748af97d5c7c04abaf15e1cf95a8b774bab40ba5f36cad6ad517e874eab3e63854302ea9d6d4465fefc8
|
data/.rubocop.yml
CHANGED
|
@@ -121,6 +121,9 @@ Style/PercentLiteralDelimiters:
|
|
|
121
121
|
Style/RegexpLiteral:
|
|
122
122
|
Enabled: false
|
|
123
123
|
|
|
124
|
+
Style/RescueModifier:
|
|
125
|
+
Enabled: false
|
|
126
|
+
|
|
124
127
|
Style/SafeNavigation:
|
|
125
128
|
Enabled: false
|
|
126
129
|
|
|
@@ -153,6 +156,9 @@ Style/SymbolArray:
|
|
|
153
156
|
Style/SymbolProc: # old Ruby versions can't do this
|
|
154
157
|
Enabled: false
|
|
155
158
|
|
|
159
|
+
Style/TernaryParentheses:
|
|
160
|
+
Enabled: false
|
|
161
|
+
|
|
156
162
|
Style/TrailingCommaInArrayLiteral:
|
|
157
163
|
Enabled: false
|
|
158
164
|
EnforcedStyleForMultiline: consistent_comma
|
data/CHANGELOG.md
CHANGED
|
@@ -1,6 +1,32 @@
|
|
|
1
1
|
|
|
2
2
|
# SmarterCSV 1.x Change Log
|
|
3
3
|
|
|
4
|
+
## 1.16.4 (2026-04-21) — Bug Fixes
|
|
5
|
+
|
|
6
|
+
RSpec tests: **1,434 → 1,467** (+33 tests)
|
|
7
|
+
|
|
8
|
+
### Bug Fixes
|
|
9
|
+
|
|
10
|
+
* Fixed bug in `SmarterCSV.errors` that could lose collected records when processing raises mid-stream,
|
|
11
|
+
e.g. when `bad_row_limit:` was exceeded (`TooManyBadRows`), or when a user's block raised through `.process` / `.each` / `.each_chunk`.
|
|
12
|
+
|
|
13
|
+
* Fixed `enforce_utf8_encoding` incorrectly replacing all non-ASCII bytes when the input string was tagged as `ASCII-8BIT` (binary).
|
|
14
|
+
The encoding is now relabeled to UTF-8 before transcoding, so only genuinely invalid byte sequences are replaced.
|
|
15
|
+
|
|
16
|
+
## 1.16.3 (2026-04-14) — New Feature
|
|
17
|
+
|
|
18
|
+
RSpec tests: **1,425 → 1,434** (+9 tests)
|
|
19
|
+
|
|
20
|
+
### New Features
|
|
21
|
+
|
|
22
|
+
* **`write_headers: false`** — new `SmarterCSV::Writer` option to suppress the header line when appending rows to an existing CSV file opened in `'a'` mode.
|
|
23
|
+
Defaults to `true` (existing behavior, fully backwards-compatible).
|
|
24
|
+
|
|
25
|
+
See [Appending to an Existing CSV File](docs/basic_write_api.md#appending-to-an-existing-csv-file).
|
|
26
|
+
|
|
27
|
+
### Other
|
|
28
|
+
* Refactor of internal options handling
|
|
29
|
+
|
|
4
30
|
## 1.16.2 (2026-03-30) — Bug Fixes
|
|
5
31
|
|
|
6
32
|
RSpec tests: **1,410 → 1,425** (+15 tests)
|
data/docs/basic_write_api.md
CHANGED
|
@@ -568,6 +568,54 @@ end
|
|
|
568
568
|
> **Note:** Only use `write_bom: true` with UTF-8 output. Adding a UTF-8 BOM to a
|
|
569
569
|
> non-UTF-8 file will corrupt it.
|
|
570
570
|
|
|
571
|
+
## Appending to an Existing CSV File
|
|
572
|
+
|
|
573
|
+
Use `write_headers: false` to suppress the header line when appending rows to an
|
|
574
|
+
existing CSV file. The caller is responsible for opening the file in append mode — the
|
|
575
|
+
Writer writes only what you ask it to write.
|
|
576
|
+
|
|
577
|
+
```ruby
|
|
578
|
+
# First write: create the file with header + first batch of rows
|
|
579
|
+
SmarterCSV.generate('output.csv') do |csv|
|
|
580
|
+
csv << { name: 'Alice', age: 30 }
|
|
581
|
+
end
|
|
582
|
+
# output.csv:
|
|
583
|
+
# name,age
|
|
584
|
+
# Alice,30
|
|
585
|
+
|
|
586
|
+
# Later: append more rows without repeating the header
|
|
587
|
+
File.open('output.csv', 'a') do |f|
|
|
588
|
+
SmarterCSV.generate(f, write_headers: false) do |csv|
|
|
589
|
+
csv << { name: 'Bob', age: 25 }
|
|
590
|
+
end
|
|
591
|
+
end
|
|
592
|
+
# output.csv:
|
|
593
|
+
# name,age
|
|
594
|
+
# Alice,30
|
|
595
|
+
# Bob,25
|
|
596
|
+
```
|
|
597
|
+
|
|
598
|
+
The Writer still uses the hash keys to determine column order, so the appended rows
|
|
599
|
+
will be aligned correctly as long as the same set of keys is used. If you need to
|
|
600
|
+
guarantee column order across both writes, pass `headers:` explicitly:
|
|
601
|
+
|
|
602
|
+
```ruby
|
|
603
|
+
HEADERS = %i[name age]
|
|
604
|
+
|
|
605
|
+
SmarterCSV.generate('output.csv', headers: HEADERS) do |csv|
|
|
606
|
+
csv << { name: 'Alice', age: 30 }
|
|
607
|
+
end
|
|
608
|
+
|
|
609
|
+
File.open('output.csv', 'a') do |f|
|
|
610
|
+
SmarterCSV.generate(f, headers: HEADERS, write_headers: false) do |csv|
|
|
611
|
+
csv << { name: 'Bob', age: 25 }
|
|
612
|
+
end
|
|
613
|
+
end
|
|
614
|
+
```
|
|
615
|
+
|
|
616
|
+
> **Note:** `write_headers: false` only suppresses the header line. All other
|
|
617
|
+
> options (`col_sep:`, `row_sep:`, `value_converters:`, etc.) apply as normal.
|
|
618
|
+
|
|
571
619
|
## More Examples
|
|
572
620
|
|
|
573
621
|
Check out the [RSpec tests](../spec/smarter_csv/writer_spec.rb) for more examples.
|
data/docs/options.md
CHANGED
|
@@ -45,6 +45,7 @@
|
|
|
45
45
|
| `:write_nil_value` | `''` | String written in place of `nil` field values. E.g. `write_nil_value: 'N/A'`. |
|
|
46
46
|
| `:write_empty_value` | `''` | String written in place of empty-string field values, including missing keys. E.g. `write_empty_value: 'EMPTY'`. |
|
|
47
47
|
| `:write_bom` | `false` | Prepends a UTF-8 BOM (`\xEF\xBB\xBF`) to the output. Use with `encoding: 'UTF-8'` for Excel compatibility. |
|
|
48
|
+
| `:write_headers` | `true` | When `false`, suppresses the header line entirely. Use when appending rows to an existing CSV file (open the file in `'a'` mode yourself and pass the IO object). |
|
|
48
49
|
|
|
49
50
|
|
|
50
51
|
## CSV Reading
|
data/ext/smarter_csv/Makefile
CHANGED
|
@@ -13,12 +13,12 @@ NULLCMD = :
|
|
|
13
13
|
#### Start of system configuration section. ####
|
|
14
14
|
|
|
15
15
|
srcdir = .
|
|
16
|
-
topdir = /Users/tilo/.rvm/rubies/ruby-3.
|
|
16
|
+
topdir = /Users/tilo/.rvm/rubies/ruby-3.2.2/include/ruby-3.2.0
|
|
17
17
|
hdrdir = $(topdir)
|
|
18
|
-
arch_hdrdir = /Users/tilo/.rvm/rubies/ruby-3.
|
|
18
|
+
arch_hdrdir = /Users/tilo/.rvm/rubies/ruby-3.2.2/include/ruby-3.2.0/arm64-darwin23
|
|
19
19
|
PATH_SEPARATOR = :
|
|
20
20
|
VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
|
|
21
|
-
prefix = $(DESTDIR)/Users/tilo/.rvm/rubies/ruby-3.
|
|
21
|
+
prefix = $(DESTDIR)/Users/tilo/.rvm/rubies/ruby-3.2.2
|
|
22
22
|
rubysitearchprefix = $(rubylibprefix)/$(sitearch)
|
|
23
23
|
rubyarchprefix = $(rubylibprefix)/$(arch)
|
|
24
24
|
rubylibprefix = $(libdir)/$(RUBY_BASE_NAME)
|
|
@@ -42,7 +42,6 @@ archincludedir = $(includedir)/$(arch)
|
|
|
42
42
|
sitearchlibdir = $(libdir)/$(sitearch)
|
|
43
43
|
archlibdir = $(libdir)/$(arch)
|
|
44
44
|
ridir = $(datarootdir)/$(RI_BASE_NAME)
|
|
45
|
-
modular_gc_dir = $(DESTDIR)
|
|
46
45
|
mandir = $(datarootdir)/man
|
|
47
46
|
localedir = $(datarootdir)/locale
|
|
48
47
|
libdir = $(exec_prefix)/lib
|
|
@@ -79,11 +78,11 @@ COUTFLAG = -o $(empty)
|
|
|
79
78
|
CSRCFLAG = $(empty)
|
|
80
79
|
|
|
81
80
|
RUBY_EXTCONF_H =
|
|
82
|
-
cflags =
|
|
81
|
+
cflags = -fdeclspec $(optflags) $(debugflags) $(warnflags)
|
|
83
82
|
cxxflags =
|
|
84
|
-
optflags = -O3
|
|
85
|
-
debugflags =
|
|
86
|
-
warnflags = -Wall -Wextra -Wextra-tokens -Wdeprecated-declarations -Wdivision-by-zero -Wdiv-by-zero -Wimplicit-function-declaration -Wimplicit-int -Wpointer-arith -Wshorten-64-to-32 -Wwrite-strings -Wold-style-definition -Wmissing-noreturn -Wno-cast-function-type -Wno-constant-logical-operand -Wno-long-long -Wno-missing-field-initializers -Wno-overlength-strings -Wno-parentheses-equality -Wno-self-assign -Wno-tautological-compare -Wno-unused-parameter -Wno-unused-value -Wunused-variable -
|
|
83
|
+
optflags = -O3
|
|
84
|
+
debugflags = -ggdb3
|
|
85
|
+
warnflags = -Wall -Wextra -Wextra-tokens -Wdeprecated-declarations -Wdivision-by-zero -Wdiv-by-zero -Wimplicit-function-declaration -Wimplicit-int -Wmisleading-indentation -Wpointer-arith -Wshorten-64-to-32 -Wwrite-strings -Wold-style-definition -Wmissing-noreturn -Wno-cast-function-type -Wno-constant-logical-operand -Wno-long-long -Wno-missing-field-initializers -Wno-overlength-strings -Wno-parentheses-equality -Wno-self-assign -Wno-tautological-compare -Wno-unused-parameter -Wno-unused-value -Wunused-variable -Wundef
|
|
87
86
|
cppflags =
|
|
88
87
|
CCDLFLAGS = -fno-common
|
|
89
88
|
CFLAGS = $(CCDLFLAGS) -O3 -I/opt/homebrew/opt/libyaml/include -I/opt/homebrew/opt/libksba/include -I/opt/homebrew/opt/readline/include -I/opt/homebrew/opt/zlib/include -I/opt/homebrew/opt/openssl@1.1/include $(cflags) -fno-common -pipe $(ARCH_FLAG)
|
|
@@ -92,26 +91,24 @@ DEFS =
|
|
|
92
91
|
CPPFLAGS = -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -D_DARWIN_UNLIMITED_SELECT -D_REENTRANT $(DEFS) $(cppflags)
|
|
93
92
|
CXXFLAGS = $(CCDLFLAGS) -fdeclspec $(ARCH_FLAG)
|
|
94
93
|
ldflags = -L. -L/opt/homebrew/opt/libyaml/lib -L/opt/homebrew/opt/libksba/lib -L/opt/homebrew/opt/readline/lib -L/opt/homebrew/opt/zlib/lib -L/opt/homebrew/opt/openssl@1.1/lib -fstack-protector-strong
|
|
95
|
-
dldflags = -L/opt/homebrew/opt/libyaml/lib -L/opt/homebrew/opt/libksba/lib -L/opt/homebrew/opt/readline/lib -L/opt/homebrew/opt/zlib/lib -L/opt/homebrew/opt/openssl@1.1/lib -Wl,-undefined,dynamic_lookup
|
|
96
|
-
ARCH_FLAG =
|
|
94
|
+
dldflags = -L/opt/homebrew/opt/libyaml/lib -L/opt/homebrew/opt/libksba/lib -L/opt/homebrew/opt/readline/lib -L/opt/homebrew/opt/zlib/lib -L/opt/homebrew/opt/openssl@1.1/lib -Wl,-undefined,dynamic_lookup $(LIBRUBYARG_SHARED)
|
|
95
|
+
ARCH_FLAG =
|
|
97
96
|
DLDFLAGS = $(ldflags) $(dldflags) $(ARCH_FLAG)
|
|
98
97
|
LDSHARED = $(CC) -dynamic -bundle
|
|
99
98
|
LDSHAREDXX = $(CXX) -dynamic -bundle
|
|
100
|
-
POSTLINK = dsymutil $@ 2>/dev/null; { test -z '$(RUBY_CODESIGN)' || codesign -s '$(RUBY_CODESIGN)' $@; }
|
|
101
99
|
AR = ar
|
|
102
|
-
LD = ld
|
|
103
100
|
EXEEXT =
|
|
104
101
|
|
|
105
102
|
RUBY_INSTALL_NAME = $(RUBY_BASE_NAME)
|
|
106
|
-
RUBY_SO_NAME = ruby.3.
|
|
103
|
+
RUBY_SO_NAME = ruby.3.2
|
|
107
104
|
RUBYW_INSTALL_NAME =
|
|
108
105
|
RUBY_VERSION_NAME = $(RUBY_BASE_NAME)-$(ruby_version)
|
|
109
106
|
RUBYW_BASE_NAME = rubyw
|
|
110
107
|
RUBY_BASE_NAME = ruby
|
|
111
108
|
|
|
112
|
-
arch = arm64-
|
|
109
|
+
arch = arm64-darwin23
|
|
113
110
|
sitearch = $(arch)
|
|
114
|
-
ruby_version = 3.
|
|
111
|
+
ruby_version = 3.2.0
|
|
115
112
|
ruby = $(bindir)/$(RUBY_BASE_NAME)
|
|
116
113
|
RUBY = $(ruby)
|
|
117
114
|
BUILTRUBY = $(bindir)/$(RUBY_BASE_NAME)
|
|
@@ -131,7 +128,7 @@ TOUCH = exit >
|
|
|
131
128
|
|
|
132
129
|
preload =
|
|
133
130
|
libpath = . $(libdir)
|
|
134
|
-
LIBPATH =
|
|
131
|
+
LIBPATH = -L. -L$(libdir)
|
|
135
132
|
DEFFILE =
|
|
136
133
|
|
|
137
134
|
CLEANFILES = mkmf.log
|
|
@@ -164,7 +161,7 @@ HDRDIR = $(sitehdrdir)$(target_prefix)
|
|
|
164
161
|
ARCHHDRDIR = $(sitearchhdrdir)$(target_prefix)
|
|
165
162
|
TARGET_SO_DIR =
|
|
166
163
|
TARGET_SO = $(TARGET_SO_DIR)$(DLLIB)
|
|
167
|
-
CLEANLIBS = $(TARGET_SO) $(TARGET_SO
|
|
164
|
+
CLEANLIBS = $(TARGET_SO) $(TARGET_SO).dSYM
|
|
168
165
|
CLEANOBJS = $(OBJS) *.bak
|
|
169
166
|
TARGET_SO_DIR_TIMESTAMP = $(TIMESTAMP_DIR)/.sitearchdir.-.smarter_csv.time
|
|
170
167
|
|
data/lib/smarter_csv/reader.rb
CHANGED
|
@@ -10,7 +10,7 @@ module SmarterCSV
|
|
|
10
10
|
# A warning is emitted to STDERR so users know to configure it explicitly.
|
|
11
11
|
DEFAULT_CHUNK_SIZE = 100
|
|
12
12
|
|
|
13
|
-
include ::SmarterCSV::Options
|
|
13
|
+
include ::SmarterCSV::Reader::Options
|
|
14
14
|
include ::SmarterCSV::FileIO
|
|
15
15
|
include ::SmarterCSV::AutoDetection
|
|
16
16
|
include ::SmarterCSV::Headers
|
|
@@ -24,6 +24,10 @@ module SmarterCSV
|
|
|
24
24
|
attr_reader :enforce_utf8, :has_rails, :has_acceleration
|
|
25
25
|
attr_reader :errors, :warnings, :headers, :raw_header, :result
|
|
26
26
|
|
|
27
|
+
def self.default_options
|
|
28
|
+
Options::DEFAULT_OPTIONS
|
|
29
|
+
end
|
|
30
|
+
|
|
27
31
|
# rubocop:disable Naming/MethodName
|
|
28
32
|
def headerA
|
|
29
33
|
warn "Deprecarion Warning: 'headerA' will be removed in future versions. Use 'headders'"
|
|
@@ -143,9 +147,9 @@ module SmarterCSV
|
|
|
143
147
|
options[:_keep_bitmap] = keep_flags.map { |f| f ? 1 : 0 }.pack('C*').freeze
|
|
144
148
|
options[:_keep_extra_cols] = @only_headers_set ? false : true
|
|
145
149
|
options[:_early_exit_after] = (@only_headers_set && !options[:strict]) ? (keep_flags.rindex(true) || -1) : -1
|
|
146
|
-
options[:_keep_cols] = nil
|
|
150
|
+
options[:_keep_cols] = nil # nil signals C: "filter active, check _keep_bitmap"
|
|
147
151
|
else
|
|
148
|
-
options[:_keep_cols] = false
|
|
152
|
+
options[:_keep_cols] = false # sentinel: no filtering active — C skips all bitmap paths
|
|
149
153
|
# Do NOT insert _keep_bitmap/_keep_extra_cols/_early_exit_after when unused.
|
|
150
154
|
# Keeping the options hash as small as possible avoids hash table resize and
|
|
151
155
|
# keeps all 10 per-row rb_hash_aref lookups hitting the same cache lines.
|
|
@@ -210,18 +214,18 @@ module SmarterCSV
|
|
|
210
214
|
# on_start / on_chunk / on_complete are optional callables (nil by default).
|
|
211
215
|
# Hooks only fire from `process` (library-controlled iteration). Enumerator
|
|
212
216
|
# modes (each / each_chunk) do not fire hooks — the caller owns the lifecycle.
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
+
on_start = options[:on_start]
|
|
218
|
+
on_chunk = options[:on_chunk]
|
|
219
|
+
on_complete = options[:on_complete]
|
|
220
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC) if on_start || on_complete
|
|
217
221
|
|
|
218
|
-
if
|
|
219
|
-
|
|
222
|
+
if on_start
|
|
223
|
+
input_meta = if @input.is_a?(String)
|
|
220
224
|
{ input: @input, file_size: (File.size(@input) rescue nil) }
|
|
221
225
|
else
|
|
222
226
|
{ input: @input.class.name, file_size: nil }
|
|
223
|
-
|
|
224
|
-
|
|
227
|
+
end
|
|
228
|
+
on_start.call(input_meta.merge(col_sep: options[:col_sep], row_sep: options[:row_sep]))
|
|
225
229
|
end
|
|
226
230
|
|
|
227
231
|
# now on to processing all the rest of the lines in the CSV file:
|
|
@@ -381,7 +385,7 @@ module SmarterCSV
|
|
|
381
385
|
chunk << hash # append temp result to chunk
|
|
382
386
|
|
|
383
387
|
if chunk.size >= chunk_size || fh.eof? # if chunk if full, or EOF reached
|
|
384
|
-
|
|
388
|
+
on_chunk&.call({ chunk_number: @chunk_count + 1, rows_in_chunk: chunk.size, total_rows_so_far: @csv_line_count })
|
|
385
389
|
# do something with the chunk
|
|
386
390
|
if block_given?
|
|
387
391
|
yield chunk, @chunk_count # do something with the hashes in the chunk in the block
|
|
@@ -410,7 +414,7 @@ module SmarterCSV
|
|
|
410
414
|
|
|
411
415
|
# handling of last chunk:
|
|
412
416
|
if !chunk.nil? && chunk.size > 0
|
|
413
|
-
|
|
417
|
+
on_chunk&.call({ chunk_number: @chunk_count + 1, rows_in_chunk: chunk.size, total_rows_so_far: @csv_line_count })
|
|
414
418
|
# do something with the chunk
|
|
415
419
|
if block_given?
|
|
416
420
|
yield chunk, @chunk_count # do something with the hashes in the chunk in the block
|
|
@@ -421,13 +425,13 @@ module SmarterCSV
|
|
|
421
425
|
# chunk = [] # initialize for next chunk of data
|
|
422
426
|
end
|
|
423
427
|
|
|
424
|
-
if
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
428
|
+
if on_complete
|
|
429
|
+
on_complete.call({
|
|
430
|
+
total_rows: @csv_line_count,
|
|
431
|
+
total_chunks: @chunk_count,
|
|
432
|
+
duration: Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time,
|
|
433
|
+
bad_rows: @errors[:bad_row_count] || 0,
|
|
434
|
+
})
|
|
431
435
|
end
|
|
432
436
|
ensure
|
|
433
437
|
fh.close if fh.respond_to?(:close)
|
|
@@ -658,12 +662,10 @@ module SmarterCSV
|
|
|
658
662
|
# else: mid-field quote → literal, no state change
|
|
659
663
|
elsif !in_quotes
|
|
660
664
|
# Non-quote character: track whether field has started
|
|
661
|
-
if strip
|
|
662
|
-
# rubocop:disable Style/MultipleComparison -- two direct == comparisons are faster than Array#include? in this hot loop
|
|
665
|
+
if strip # -- two direct == comparisons are faster than Array#include? in this hot loop
|
|
663
666
|
field_started = true unless line[i] == ' ' || line[i] == "\t"
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
field_started = true
|
|
667
|
+
else
|
|
668
|
+
field_started = true
|
|
667
669
|
end
|
|
668
670
|
end
|
|
669
671
|
i += 1
|
|
@@ -776,9 +778,13 @@ module SmarterCSV
|
|
|
776
778
|
end
|
|
777
779
|
|
|
778
780
|
def enforce_utf8_encoding(line, options)
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
781
|
+
replace = options[:invalid_byte_sequence]
|
|
782
|
+
# ASCII_8BIT (Encoding::BINARY is an alias) has no codepoint mapping above 0x7F,
|
|
783
|
+
# so encode('utf-8', ASCII_8BIT) would replace every non-ASCII byte. Relabel as
|
|
784
|
+
# UTF-8 first so encode() treats the bytes as already-UTF-8 and only replaces
|
|
785
|
+
# sequences that are actually invalid.
|
|
786
|
+
line = line.force_encoding('utf-8') if line.encoding == Encoding::ASCII_8BIT
|
|
787
|
+
line.encode('utf-8', line.encoding, invalid: :replace, undef: :replace, replace: replace)
|
|
782
788
|
end
|
|
783
789
|
|
|
784
790
|
def handle_bad_row(error, line, start_csv_line, start_file_line, options)
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SmarterCSV
|
|
4
|
+
class Reader
|
|
5
|
+
module Options
|
|
6
|
+
DEFAULT_OPTIONS = {
|
|
7
|
+
acceleration: true, # if user wants to use accelleration or not
|
|
8
|
+
auto_row_sep_chars: 500,
|
|
9
|
+
bad_row_limit: nil,
|
|
10
|
+
chunk_size: nil,
|
|
11
|
+
col_sep: :auto, # was: ',',
|
|
12
|
+
collect_raw_lines: true,
|
|
13
|
+
comment_regexp: nil, # was: /\A#/,
|
|
14
|
+
convert_values_to_numeric: true,
|
|
15
|
+
downcase_header: true,
|
|
16
|
+
duplicate_header_suffix: '', # was: nil,
|
|
17
|
+
field_size_limit: nil, # Integer (bytes) or nil for no limit. Raises FieldSizeLimitExceeded if any
|
|
18
|
+
# extracted field exceeds this size. Prevents DoS from runaway quoted
|
|
19
|
+
# fields (unbounded multiline stitching) or huge inline payloads.
|
|
20
|
+
file_encoding: 'utf-8',
|
|
21
|
+
force_utf8: false,
|
|
22
|
+
headers_in_file: true,
|
|
23
|
+
invalid_byte_sequence: '',
|
|
24
|
+
keep_original_headers: false,
|
|
25
|
+
key_mapping: nil,
|
|
26
|
+
strict: false, # DEPRECATED -> use missing_headers
|
|
27
|
+
missing_headers: :auto, # :auto (auto-generate names for extra cols) or :raise (raise HeaderSizeMismatch)
|
|
28
|
+
missing_header_prefix: 'column_',
|
|
29
|
+
nil_values_matching: nil, # regex: set matching values to nil (key kept); pairs with remove_empty_values
|
|
30
|
+
on_bad_row: :raise,
|
|
31
|
+
on_chunk: nil, # callable: fired after each chunk is parsed, before yielding to the block
|
|
32
|
+
on_complete: nil, # callable: fired once after the entire file is processed
|
|
33
|
+
on_start: nil, # callable: fired once before the first row is parsed
|
|
34
|
+
quote_boundary: :standard, # :standard (only at field boundary 👍) or :legacy (any quote toggles state 👎)
|
|
35
|
+
quote_char: '"',
|
|
36
|
+
quote_escaping: :auto,
|
|
37
|
+
remove_empty_hashes: true,
|
|
38
|
+
remove_empty_values: true,
|
|
39
|
+
remove_unmapped_keys: false,
|
|
40
|
+
remove_values_matching: nil, # DEPRECATED: use nil_values_matching instead
|
|
41
|
+
remove_zero_values: false,
|
|
42
|
+
required_headers: nil,
|
|
43
|
+
required_keys: nil,
|
|
44
|
+
row_sep: :auto, # was: $/,
|
|
45
|
+
silence_missing_keys: false,
|
|
46
|
+
skip_lines: nil,
|
|
47
|
+
strings_as_keys: false,
|
|
48
|
+
strip_chars_from_headers: nil,
|
|
49
|
+
strip_whitespace: true,
|
|
50
|
+
user_provided_headers: nil,
|
|
51
|
+
value_converters: nil,
|
|
52
|
+
verbose: :normal, # nil/:normal (default), :quiet (suppress warnings), :debug (print diagnostics); true/false are deprecated
|
|
53
|
+
with_line_numbers: false,
|
|
54
|
+
}.freeze
|
|
55
|
+
|
|
56
|
+
# NOTE: this is not called when "parse" methods are tested by themselves
|
|
57
|
+
def process_options(given_options = {})
|
|
58
|
+
# Debug output before merge — check raw verbose value (true or :debug)
|
|
59
|
+
$stderr.puts "User provided options:\n#{pp(given_options)}\n" if [true, :debug].include?(given_options[:verbose])
|
|
60
|
+
|
|
61
|
+
# Special case for :user_provided_headers:
|
|
62
|
+
#
|
|
63
|
+
# If we would use the default `headers_in_file: true`, and `:user_provided_headers` are given,
|
|
64
|
+
# we could lose the first data row
|
|
65
|
+
#
|
|
66
|
+
# We now err on the side of treating an actual header as data, rather than losing a data row.
|
|
67
|
+
#
|
|
68
|
+
if given_options[:user_provided_headers] && !given_options.keys.include?(:headers_in_file)
|
|
69
|
+
given_options[:headers_in_file] = false
|
|
70
|
+
warn "WARNING: setting `headers_in_file: false` as a precaution to not lose the first row. Set explicitly to `true` if you have headers." unless given_options[:verbose] == :quiet
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
@options = DEFAULT_OPTIONS.dup.merge!(given_options)
|
|
74
|
+
|
|
75
|
+
# Normalize verbose to a symbol — done once here, stored back into @options.
|
|
76
|
+
# All subsequent checks are free symbol comparisons; no re-evaluation needed.
|
|
77
|
+
# :quiet — suppress all warnings and notices (good for production)
|
|
78
|
+
# :normal — show behavioral warnings (default; helpful for new users)
|
|
79
|
+
# :debug — :normal + print computed options and per-row diagnostics
|
|
80
|
+
# nil is silently normalized to :normal; true/false are deprecated.
|
|
81
|
+
case @options[:verbose]
|
|
82
|
+
when :quiet, :normal, :debug
|
|
83
|
+
# keep as is
|
|
84
|
+
when nil
|
|
85
|
+
@options[:verbose] = :normal
|
|
86
|
+
when false
|
|
87
|
+
warn "DEPRECATION WARNING: verbose: false is deprecated. Use verbose: :normal instead (or omit — it is the default)."
|
|
88
|
+
@options[:verbose] = :normal
|
|
89
|
+
when true
|
|
90
|
+
warn "DEPRECATION WARNING: verbose: true is deprecated. Use verbose: :debug instead."
|
|
91
|
+
@options[:verbose] = :debug
|
|
92
|
+
else
|
|
93
|
+
warn "WARNING: unknown verbose value #{@options[:verbose].inspect}, defaulting to :normal. Valid values: :quiet, :normal, :debug."
|
|
94
|
+
@options[:verbose] = :normal
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# fix invalid input
|
|
98
|
+
@options[:invalid_byte_sequence] ||= ''
|
|
99
|
+
|
|
100
|
+
# Normalize headers: { only: [...] } / { except: [...] } to internal option names.
|
|
101
|
+
# The public API is headers: { only: } or headers: { except: }.
|
|
102
|
+
# Internally we use only_headers: / except_headers: (what the C extension reads).
|
|
103
|
+
if (hdr = @options.delete(:headers)).is_a?(Hash)
|
|
104
|
+
@options[:only_headers] = hdr[:only] if hdr.key?(:only)
|
|
105
|
+
@options[:except_headers] = hdr[:except] if hdr.key?(:except)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Deprecation: direct use of only_headers: / except_headers: (use headers: { only: } instead)
|
|
109
|
+
if given_options.key?(:only_headers) && !given_options.key?(:headers)
|
|
110
|
+
warn "DEPRECATION WARNING: 'only_headers:' is deprecated. Use 'headers: { only: [...] }' instead." unless @options[:verbose] == :quiet
|
|
111
|
+
end
|
|
112
|
+
if given_options.key?(:except_headers) && !given_options.key?(:headers)
|
|
113
|
+
warn "DEPRECATION WARNING: 'except_headers:' is deprecated. Use 'headers: { except: [...] }' instead." unless @options[:verbose] == :quiet
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Normalize only_headers/except_headers to arrays of symbols (internal names, read by C extension)
|
|
117
|
+
if @options[:only_headers]
|
|
118
|
+
values = Array(@options[:only_headers])
|
|
119
|
+
bad = values.reject { |v| v.is_a?(Symbol) || v.is_a?(String) }
|
|
120
|
+
raise SmarterCSV::ValidationError, "headers: { only: } elements must be String or Symbol, got: #{bad.map(&:class).uniq.inspect}" if bad.any?
|
|
121
|
+
@options[:only_headers] = values.map(&:to_sym)
|
|
122
|
+
end
|
|
123
|
+
if @options[:except_headers]
|
|
124
|
+
values = Array(@options[:except_headers])
|
|
125
|
+
bad = values.reject { |v| v.is_a?(Symbol) || v.is_a?(String) }
|
|
126
|
+
raise SmarterCSV::ValidationError, "headers: { except: } elements must be String or Symbol, got: #{bad.map(&:class).uniq.inspect}" if bad.any?
|
|
127
|
+
@options[:except_headers] = values.map(&:to_sym)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Deprecation: remove_values_matching → nil_values_matching
|
|
131
|
+
# Old behavior: removes the key-value pair entirely.
|
|
132
|
+
# New behavior: nil_values_matching sets the value to nil (key kept);
|
|
133
|
+
# combined with the default remove_empty_values: true the net effect is identical.
|
|
134
|
+
# With remove_empty_values: false, the key is retained with a nil value.
|
|
135
|
+
if given_options.key?(:remove_values_matching)
|
|
136
|
+
unless @options[:verbose] == :quiet
|
|
137
|
+
warn "DEPRECATION WARNING: 'remove_values_matching' is deprecated. " \
|
|
138
|
+
"Use 'nil_values_matching' instead. With the default 'remove_empty_values: true' " \
|
|
139
|
+
"the net behavior is identical. With 'remove_empty_values: false', matching values " \
|
|
140
|
+
"are set to nil but the key is retained in the result hash."
|
|
141
|
+
end
|
|
142
|
+
@options[:nil_values_matching] ||= @options[:remove_values_matching]
|
|
143
|
+
@options[:remove_values_matching] = nil # clear to prevent double-processing
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Translate deprecated :strict option to :missing_headers
|
|
147
|
+
if given_options.key?(:strict)
|
|
148
|
+
unless @options[:verbose] == :quiet
|
|
149
|
+
warn "DEPRECATION WARNING: 'strict' option is deprecated and will be removed in a future version. " \
|
|
150
|
+
"Use 'missing_headers: :raise' instead of 'strict: true', or 'missing_headers: :auto' instead of 'strict: false'."
|
|
151
|
+
end
|
|
152
|
+
@options[:missing_headers] = @options[:strict] ? :raise : :auto unless given_options.key?(:missing_headers)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Keep :strict synchronized with :missing_headers (C extension reads :strict directly)
|
|
156
|
+
@options[:strict] = (@options[:missing_headers] == :raise)
|
|
157
|
+
|
|
158
|
+
$stderr.puts "Computed options:\n#{pp(@options)}\n" if @options[:verbose] == :debug
|
|
159
|
+
|
|
160
|
+
validate_options!(@options)
|
|
161
|
+
@options
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
private
|
|
165
|
+
|
|
166
|
+
def validate_options!(options)
|
|
167
|
+
# deprecate required_headers
|
|
168
|
+
unless options[:required_headers].nil?
|
|
169
|
+
warn "DEPRECATION WARNING: please use 'required_keys' instead of 'required_headers'" unless options[:verbose] == :quiet
|
|
170
|
+
if options[:required_keys].nil?
|
|
171
|
+
options[:required_keys] = options[:required_headers]
|
|
172
|
+
options[:required_headers] = nil
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
keys = options.keys
|
|
177
|
+
errors = []
|
|
178
|
+
errors << "invalid row_sep" if keys.include?(:row_sep) && !option_valid?(options[:row_sep])
|
|
179
|
+
errors << "invalid col_sep" if keys.include?(:col_sep) && !option_valid?(options[:col_sep])
|
|
180
|
+
errors << "invalid quote_char" if keys.include?(:quote_char) && !option_valid?(options[:quote_char])
|
|
181
|
+
if keys.include?(:quote_char) && options[:quote_char].is_a?(String) && options[:quote_char].bytesize > 1
|
|
182
|
+
errors << "invalid quote_char: must be a single byte (got #{options[:quote_char].inspect})"
|
|
183
|
+
end
|
|
184
|
+
unless %i[double_quotes backslash auto].include?(options[:quote_escaping])
|
|
185
|
+
errors << "invalid quote_escaping: must be :double_quotes, :backslash, or :auto"
|
|
186
|
+
end
|
|
187
|
+
unless %i[legacy standard].include?(options[:quote_boundary])
|
|
188
|
+
errors << "invalid quote_boundary: must be :legacy or :standard"
|
|
189
|
+
end
|
|
190
|
+
fsl = options[:field_size_limit]
|
|
191
|
+
unless fsl.nil? || (fsl.is_a?(Integer) && fsl > 0)
|
|
192
|
+
errors << "invalid field_size_limit: must be nil or a positive Integer (got #{fsl.inspect})"
|
|
193
|
+
end
|
|
194
|
+
obr = options[:on_bad_row]
|
|
195
|
+
unless %i[raise skip collect].include?(obr) || obr.respond_to?(:call)
|
|
196
|
+
errors << "invalid on_bad_row: must be :raise, :skip, :collect, or a callable"
|
|
197
|
+
end
|
|
198
|
+
%i[on_start on_chunk on_complete].each do |hook|
|
|
199
|
+
val = options[hook]
|
|
200
|
+
errors << "invalid #{hook}: must be nil or a callable" if !val.nil? && !val.respond_to?(:call)
|
|
201
|
+
end
|
|
202
|
+
unless %i[auto raise].include?(options[:missing_headers])
|
|
203
|
+
errors << "invalid missing_headers: must be :auto or :raise"
|
|
204
|
+
end
|
|
205
|
+
if options[:only_headers] && options[:except_headers]
|
|
206
|
+
errors << "cannot use both 'headers: { only: }' and 'headers: { except: }' at the same time"
|
|
207
|
+
end
|
|
208
|
+
raise SmarterCSV::ValidationError, errors.inspect if errors.any?
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def option_valid?(str)
|
|
212
|
+
return true if str.is_a?(Symbol) && str == :auto
|
|
213
|
+
return true if str.is_a?(String) && !str.empty?
|
|
214
|
+
|
|
215
|
+
false
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def pp(value)
|
|
219
|
+
defined?(AwesomePrint) ? value.awesome_inspect(index: nil) : value.inspect
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
data/lib/smarter_csv/version.rb
CHANGED
data/lib/smarter_csv/writer.rb
CHANGED
|
@@ -25,6 +25,8 @@ module SmarterCSV
|
|
|
25
25
|
#
|
|
26
26
|
# The Writer automatically quotes fields containing the col_sep, row_sep, or the quote_char.
|
|
27
27
|
#
|
|
28
|
+
# See SmarterCSV::Writer::Options::DEFAULT_OPTIONS for all options and their defaults.
|
|
29
|
+
#
|
|
28
30
|
# Options:
|
|
29
31
|
# col_sep : defaults to , but can be set to any other character
|
|
30
32
|
# row_sep : defaults to LF \n , but can be set to \r\n or \r or anything else
|
|
@@ -42,7 +44,9 @@ module SmarterCSV
|
|
|
42
44
|
# write_empty_value: string written in place of empty-string field values (default: '')
|
|
43
45
|
# write_bom: when true, prepends a UTF-8 BOM (\xEF\xBB\xBF) to the output (default: false)
|
|
44
46
|
# Useful for Excel compatibility with non-ASCII content.
|
|
45
|
-
|
|
47
|
+
# write_headers: when false, suppresses the header line (default: true). Useful when appending to
|
|
48
|
+
# an existing CSV file opened in 'a' mode — the caller controls the file mode.
|
|
49
|
+
#
|
|
46
50
|
# IMPORTANT NOTES:
|
|
47
51
|
# * Data hashes could contain strings or symbols as keys.
|
|
48
52
|
# Make sure to use the correct form when specifying headers manually,
|
|
@@ -51,36 +55,42 @@ module SmarterCSV
|
|
|
51
55
|
attr_reader :options, :row_sep, :col_sep, :quote_char, :force_quotes, :discover_headers, :headers, :map_headers, :output_file
|
|
52
56
|
|
|
53
57
|
class Writer
|
|
54
|
-
|
|
55
|
-
|
|
58
|
+
include ::SmarterCSV::Writer::Options
|
|
59
|
+
|
|
60
|
+
def self.default_options
|
|
61
|
+
Options::DEFAULT_OPTIONS
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def initialize(file_path_or_io, given_options = {})
|
|
65
|
+
opts = Options::DEFAULT_OPTIONS.merge(given_options)
|
|
66
|
+
@options = opts
|
|
56
67
|
|
|
57
|
-
@row_sep =
|
|
58
|
-
@col_sep =
|
|
59
|
-
@quote_char =
|
|
68
|
+
@row_sep = opts[:row_sep]
|
|
69
|
+
@col_sep = opts[:col_sep]
|
|
70
|
+
@quote_char = opts[:quote_char]
|
|
60
71
|
@escaped_quote_char = @quote_char * 2
|
|
61
|
-
@force_quotes =
|
|
62
|
-
@quote_headers =
|
|
63
|
-
@disable_auto_quoting =
|
|
64
|
-
@value_converters =
|
|
65
|
-
@encoding =
|
|
66
|
-
@write_nil_value =
|
|
67
|
-
@write_empty_value =
|
|
68
|
-
@write_bom =
|
|
72
|
+
@force_quotes = opts[:force_quotes] == true
|
|
73
|
+
@quote_headers = opts[:quote_headers] == true
|
|
74
|
+
@disable_auto_quoting = opts[:disable_auto_quoting] == true
|
|
75
|
+
@value_converters = opts[:value_converters] || {}
|
|
76
|
+
@encoding = opts[:encoding]
|
|
77
|
+
@write_nil_value = opts[:write_nil_value]
|
|
78
|
+
@write_empty_value = opts[:write_empty_value]
|
|
79
|
+
@write_bom = opts[:write_bom] == true
|
|
80
|
+
@write_headers = opts[:write_headers] == true
|
|
69
81
|
@map_all_keys = @value_converters.has_key?(:_all)
|
|
70
82
|
@mapped_keys = Set.new(@value_converters.keys - [:_all])
|
|
71
|
-
@header_converter =
|
|
83
|
+
@header_converter = opts[:header_converter]
|
|
72
84
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
@discover_headers = options[:discover_headers] == true
|
|
85
|
+
if given_options.has_key?(:discover_headers)
|
|
86
|
+
@discover_headers = given_options[:discover_headers] == true
|
|
76
87
|
else
|
|
77
|
-
@discover_headers = !(
|
|
88
|
+
@discover_headers = !(given_options.has_key?(:map_headers) || given_options.has_key?(:headers))
|
|
78
89
|
end
|
|
79
90
|
|
|
80
|
-
@headers = []
|
|
81
|
-
@headers =
|
|
82
|
-
@
|
|
83
|
-
@map_headers = options[:map_headers] || {}
|
|
91
|
+
@headers = opts[:headers].dup
|
|
92
|
+
@headers = given_options[:map_headers].keys if given_options.has_key?(:map_headers) && !given_options.has_key?(:headers)
|
|
93
|
+
@map_headers = opts[:map_headers]
|
|
84
94
|
|
|
85
95
|
# Accept an IO-like object (StringIO, IO, etc.) or any path-like object (String, Pathname, etc.)
|
|
86
96
|
if file_path_or_io.respond_to?(:write)
|
|
@@ -110,7 +120,7 @@ module SmarterCSV
|
|
|
110
120
|
# and stream data rows directly to @output_file, bypassing the temp file entirely.
|
|
111
121
|
@temp_file = nil
|
|
112
122
|
@output_file.write("\xEF\xBB\xBF") if @write_bom
|
|
113
|
-
write_header_line
|
|
123
|
+
write_header_line if @write_headers
|
|
114
124
|
else
|
|
115
125
|
@temp_file = Tempfile.new('smarter_csv')
|
|
116
126
|
end
|
|
@@ -134,7 +144,7 @@ module SmarterCSV
|
|
|
134
144
|
# Header-discovery mode: headers were accumulated while writing rows;
|
|
135
145
|
# now prepend the header line and copy the buffered rows to the output.
|
|
136
146
|
@output_file.write("\xEF\xBB\xBF") if @write_bom
|
|
137
|
-
write_header_line
|
|
147
|
+
write_header_line if @write_headers
|
|
138
148
|
@temp_file.rewind
|
|
139
149
|
@output_file.write(@temp_file.read)
|
|
140
150
|
@temp_file.close!
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SmarterCSV
|
|
4
|
+
class Writer
|
|
5
|
+
module Options
|
|
6
|
+
DEFAULT_OPTIONS = {
|
|
7
|
+
col_sep: ',',
|
|
8
|
+
row_sep: $/,
|
|
9
|
+
quote_char: '"',
|
|
10
|
+
force_quotes: false,
|
|
11
|
+
quote_headers: false,
|
|
12
|
+
disable_auto_quoting: false,
|
|
13
|
+
value_converters: {},
|
|
14
|
+
encoding: nil,
|
|
15
|
+
write_nil_value: '',
|
|
16
|
+
write_empty_value: '',
|
|
17
|
+
write_bom: false,
|
|
18
|
+
write_headers: true,
|
|
19
|
+
header_converter: nil,
|
|
20
|
+
discover_headers: true,
|
|
21
|
+
headers: [],
|
|
22
|
+
map_headers: {},
|
|
23
|
+
}.freeze
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
data/lib/smarter_csv.rb
CHANGED
|
@@ -5,7 +5,8 @@ require "smarter_csv/version"
|
|
|
5
5
|
require "smarter_csv/errors"
|
|
6
6
|
|
|
7
7
|
require "smarter_csv/file_io"
|
|
8
|
-
require "smarter_csv/
|
|
8
|
+
require "smarter_csv/reader_options"
|
|
9
|
+
require "smarter_csv/writer_options"
|
|
9
10
|
require "smarter_csv/auto_detection"
|
|
10
11
|
require 'smarter_csv/header_transformations'
|
|
11
12
|
require 'smarter_csv/header_validations'
|
|
@@ -77,9 +78,12 @@ module SmarterCSV
|
|
|
77
78
|
def self.process(input, given_options = {}, &block)
|
|
78
79
|
Thread.current[:current_thread_recent_errors] = {}
|
|
79
80
|
reader = Reader.new(input, given_options)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
81
|
+
reader.process(&block)
|
|
82
|
+
ensure
|
|
83
|
+
# Preserve partial error state when processing raises mid-stream
|
|
84
|
+
# (e.g. TooManyBadRows, or a user block raising). `reader` is nil if
|
|
85
|
+
# Reader.new itself raised before the local was assigned.
|
|
86
|
+
Thread.current[:current_thread_recent_errors] = reader.errors if reader
|
|
83
87
|
end
|
|
84
88
|
|
|
85
89
|
# Convenience method for parsing a CSV string directly.
|
|
@@ -108,9 +112,9 @@ module SmarterCSV
|
|
|
108
112
|
def self.each(input, options = {}, &block)
|
|
109
113
|
Thread.current[:current_thread_recent_errors] = {}
|
|
110
114
|
reader = Reader.new(input, options)
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
115
|
+
reader.each(&block)
|
|
116
|
+
ensure
|
|
117
|
+
Thread.current[:current_thread_recent_errors] = reader.errors if reader
|
|
114
118
|
end
|
|
115
119
|
|
|
116
120
|
# Yields each chunk as Array<Hash> plus its 0-based chunk index.
|
|
@@ -125,9 +129,9 @@ module SmarterCSV
|
|
|
125
129
|
def self.each_chunk(input, options = {}, &block)
|
|
126
130
|
Thread.current[:current_thread_recent_errors] = {}
|
|
127
131
|
reader = Reader.new(input, options)
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
132
|
+
reader.each_chunk(&block)
|
|
133
|
+
ensure
|
|
134
|
+
Thread.current[:current_thread_recent_errors] = reader.errors if reader
|
|
131
135
|
end
|
|
132
136
|
|
|
133
137
|
# Returns the errors from the most recent call to .process, .parse, .each, or .each_chunk
|
|
@@ -197,7 +201,7 @@ module SmarterCSV
|
|
|
197
201
|
begin
|
|
198
202
|
yield writer
|
|
199
203
|
ensure
|
|
200
|
-
writer&.finalize
|
|
204
|
+
writer&.finalize # must finalize before reading io.string
|
|
201
205
|
end
|
|
202
206
|
io.string
|
|
203
207
|
else
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: smarter_csv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.16.
|
|
4
|
+
version: 1.16.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Tilo Sloboda
|
|
8
8
|
bindir: bin
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date: 2026-
|
|
10
|
+
date: 2026-04-21 00:00:00.000000000 Z
|
|
11
11
|
dependencies: []
|
|
12
12
|
description: |
|
|
13
13
|
SmarterCSV is a high-performance CSV reader and writer for Ruby focused on
|
|
@@ -64,12 +64,7 @@ files:
|
|
|
64
64
|
- docs/value_converters.md
|
|
65
65
|
- ext/smarter_csv/Makefile
|
|
66
66
|
- ext/smarter_csv/extconf.rb
|
|
67
|
-
- ext/smarter_csv/smarter_csv.bundle
|
|
68
|
-
- ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Info.plist
|
|
69
|
-
- ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Resources/DWARF/smarter_csv.bundle
|
|
70
|
-
- ext/smarter_csv/smarter_csv.bundle.dSYM/Contents/Resources/Relocations/aarch64/smarter_csv.bundle.yml
|
|
71
67
|
- ext/smarter_csv/smarter_csv.c
|
|
72
|
-
- ext/smarter_csv/smarter_csv.o
|
|
73
68
|
- images/SmarterCSV_1.16.0_vs_RubyCSV_3.3.5_speedup.png
|
|
74
69
|
- images/SmarterCSV_1.16.0_vs_RubyCSV_3.3.5_speedup.svg
|
|
75
70
|
- images/SmarterCSV_1.16.0_vs_previous_C-speedup.png
|
|
@@ -84,11 +79,12 @@ files:
|
|
|
84
79
|
- lib/smarter_csv/header_transformations.rb
|
|
85
80
|
- lib/smarter_csv/header_validations.rb
|
|
86
81
|
- lib/smarter_csv/headers.rb
|
|
87
|
-
- lib/smarter_csv/options.rb
|
|
88
82
|
- lib/smarter_csv/parser.rb
|
|
89
83
|
- lib/smarter_csv/reader.rb
|
|
84
|
+
- lib/smarter_csv/reader_options.rb
|
|
90
85
|
- lib/smarter_csv/version.rb
|
|
91
86
|
- lib/smarter_csv/writer.rb
|
|
87
|
+
- lib/smarter_csv/writer_options.rb
|
|
92
88
|
- smarter_csv.gemspec
|
|
93
89
|
homepage: https://github.com/tilo/smarter_csv
|
|
94
90
|
licenses:
|
|
Binary file
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
-
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
3
|
-
<plist version="1.0">
|
|
4
|
-
<dict>
|
|
5
|
-
<key>CFBundleDevelopmentRegion</key>
|
|
6
|
-
<string>English</string>
|
|
7
|
-
<key>CFBundleIdentifier</key>
|
|
8
|
-
<string>com.apple.xcode.dsym.smarter_csv.bundle</string>
|
|
9
|
-
<key>CFBundleInfoDictionaryVersion</key>
|
|
10
|
-
<string>6.0</string>
|
|
11
|
-
<key>CFBundlePackageType</key>
|
|
12
|
-
<string>dSYM</string>
|
|
13
|
-
<key>CFBundleSignature</key>
|
|
14
|
-
<string>????</string>
|
|
15
|
-
<key>CFBundleShortVersionString</key>
|
|
16
|
-
<string>1.0</string>
|
|
17
|
-
<key>CFBundleVersion</key>
|
|
18
|
-
<string>1</string>
|
|
19
|
-
</dict>
|
|
20
|
-
</plist>
|
|
Binary file
|
|
Binary file
|
data/lib/smarter_csv/options.rb
DELETED
|
@@ -1,229 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module SmarterCSV
|
|
4
|
-
#
|
|
5
|
-
# NOTE: this is not called when "parse" methods are tested by themselves
|
|
6
|
-
#
|
|
7
|
-
# ONLY FOR BACKWARDS-COMPATIBILITY
|
|
8
|
-
def self.default_options
|
|
9
|
-
Options::DEFAULT_OPTIONS
|
|
10
|
-
end
|
|
11
|
-
|
|
12
|
-
module Options
|
|
13
|
-
DEFAULT_OPTIONS = {
|
|
14
|
-
acceleration: true, # if user wants to use accelleration or not
|
|
15
|
-
auto_row_sep_chars: 500,
|
|
16
|
-
bad_row_limit: nil,
|
|
17
|
-
chunk_size: nil,
|
|
18
|
-
col_sep: :auto, # was: ',',
|
|
19
|
-
collect_raw_lines: true,
|
|
20
|
-
comment_regexp: nil, # was: /\A#/,
|
|
21
|
-
convert_values_to_numeric: true,
|
|
22
|
-
downcase_header: true,
|
|
23
|
-
duplicate_header_suffix: '', # was: nil,
|
|
24
|
-
field_size_limit: nil, # Integer (bytes) or nil for no limit. Raises FieldSizeLimitExceeded if any
|
|
25
|
-
# extracted field exceeds this size. Prevents DoS from runaway quoted
|
|
26
|
-
# fields (unbounded multiline stitching) or huge inline payloads.
|
|
27
|
-
file_encoding: 'utf-8',
|
|
28
|
-
force_utf8: false,
|
|
29
|
-
headers_in_file: true,
|
|
30
|
-
invalid_byte_sequence: '',
|
|
31
|
-
keep_original_headers: false,
|
|
32
|
-
key_mapping: nil,
|
|
33
|
-
strict: false, # DEPRECATED -> use missing_headers
|
|
34
|
-
missing_headers: :auto, # :auto (auto-generate names for extra cols) or :raise (raise HeaderSizeMismatch)
|
|
35
|
-
missing_header_prefix: 'column_',
|
|
36
|
-
nil_values_matching: nil, # regex: set matching values to nil (key kept); pairs with remove_empty_values
|
|
37
|
-
on_bad_row: :raise,
|
|
38
|
-
on_chunk: nil, # callable: fired after each chunk is parsed, before yielding to the block
|
|
39
|
-
on_complete: nil, # callable: fired once after the entire file is processed
|
|
40
|
-
on_start: nil, # callable: fired once before the first row is parsed
|
|
41
|
-
quote_boundary: :standard, # :standard (only at field boundary 👍) or :legacy (any quote toggles state 👎)
|
|
42
|
-
quote_char: '"',
|
|
43
|
-
quote_escaping: :auto,
|
|
44
|
-
remove_empty_hashes: true,
|
|
45
|
-
remove_empty_values: true,
|
|
46
|
-
remove_unmapped_keys: false,
|
|
47
|
-
remove_values_matching: nil, # DEPRECATED: use nil_values_matching instead
|
|
48
|
-
remove_zero_values: false,
|
|
49
|
-
required_headers: nil,
|
|
50
|
-
required_keys: nil,
|
|
51
|
-
row_sep: :auto, # was: $/,
|
|
52
|
-
silence_missing_keys: false,
|
|
53
|
-
skip_lines: nil,
|
|
54
|
-
strings_as_keys: false,
|
|
55
|
-
strip_chars_from_headers: nil,
|
|
56
|
-
strip_whitespace: true,
|
|
57
|
-
user_provided_headers: nil,
|
|
58
|
-
value_converters: nil,
|
|
59
|
-
verbose: :normal, # nil/:normal (default), :quiet (suppress warnings), :debug (print diagnostics); true/false are deprecated
|
|
60
|
-
with_line_numbers: false,
|
|
61
|
-
}.freeze
|
|
62
|
-
|
|
63
|
-
# NOTE: this is not called when "parse" methods are tested by themselves
|
|
64
|
-
def process_options(given_options = {})
|
|
65
|
-
# Debug output before merge — check raw verbose value (true or :debug)
|
|
66
|
-
$stderr.puts "User provided options:\n#{pp(given_options)}\n" if [true, :debug].include?(given_options[:verbose])
|
|
67
|
-
|
|
68
|
-
# Special case for :user_provided_headers:
|
|
69
|
-
#
|
|
70
|
-
# If we would use the default `headers_in_file: true`, and `:user_provided_headers` are given,
|
|
71
|
-
# we could lose the first data row
|
|
72
|
-
#
|
|
73
|
-
# We now err on the side of treating an actual header as data, rather than losing a data row.
|
|
74
|
-
#
|
|
75
|
-
if given_options[:user_provided_headers] && !given_options.keys.include?(:headers_in_file)
|
|
76
|
-
given_options[:headers_in_file] = false
|
|
77
|
-
warn "WARNING: setting `headers_in_file: false` as a precaution to not lose the first row. Set explicitly to `true` if you have headers." unless given_options[:verbose] == :quiet
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
@options = DEFAULT_OPTIONS.dup.merge!(given_options)
|
|
81
|
-
|
|
82
|
-
# Normalize verbose to a symbol — done once here, stored back into @options.
|
|
83
|
-
# All subsequent checks are free symbol comparisons; no re-evaluation needed.
|
|
84
|
-
# :quiet — suppress all warnings and notices (good for production)
|
|
85
|
-
# :normal — show behavioral warnings (default; helpful for new users)
|
|
86
|
-
# :debug — :normal + print computed options and per-row diagnostics
|
|
87
|
-
# nil is silently normalized to :normal; true/false are deprecated.
|
|
88
|
-
case @options[:verbose]
|
|
89
|
-
when :quiet, :normal, :debug
|
|
90
|
-
# keep as is
|
|
91
|
-
when nil
|
|
92
|
-
@options[:verbose] = :normal
|
|
93
|
-
when false
|
|
94
|
-
warn "DEPRECATION WARNING: verbose: false is deprecated. Use verbose: :normal instead (or omit — it is the default)."
|
|
95
|
-
@options[:verbose] = :normal
|
|
96
|
-
when true
|
|
97
|
-
warn "DEPRECATION WARNING: verbose: true is deprecated. Use verbose: :debug instead."
|
|
98
|
-
@options[:verbose] = :debug
|
|
99
|
-
else
|
|
100
|
-
warn "WARNING: unknown verbose value #{@options[:verbose].inspect}, defaulting to :normal. Valid values: :quiet, :normal, :debug."
|
|
101
|
-
@options[:verbose] = :normal
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
# fix invalid input
|
|
105
|
-
@options[:invalid_byte_sequence] ||= ''
|
|
106
|
-
|
|
107
|
-
# Normalize headers: { only: [...] } / { except: [...] } to internal option names.
|
|
108
|
-
# The public API is headers: { only: } or headers: { except: }.
|
|
109
|
-
# Internally we use only_headers: / except_headers: (what the C extension reads).
|
|
110
|
-
if (hdr = @options.delete(:headers)).is_a?(Hash)
|
|
111
|
-
@options[:only_headers] = hdr[:only] if hdr.key?(:only)
|
|
112
|
-
@options[:except_headers] = hdr[:except] if hdr.key?(:except)
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
# Deprecation: direct use of only_headers: / except_headers: (use headers: { only: } instead)
|
|
116
|
-
if given_options.key?(:only_headers) && !given_options.key?(:headers)
|
|
117
|
-
warn "DEPRECATION WARNING: 'only_headers:' is deprecated. Use 'headers: { only: [...] }' instead." unless @options[:verbose] == :quiet
|
|
118
|
-
end
|
|
119
|
-
if given_options.key?(:except_headers) && !given_options.key?(:headers)
|
|
120
|
-
warn "DEPRECATION WARNING: 'except_headers:' is deprecated. Use 'headers: { except: [...] }' instead." unless @options[:verbose] == :quiet
|
|
121
|
-
end
|
|
122
|
-
|
|
123
|
-
# Normalize only_headers/except_headers to arrays of symbols (internal names, read by C extension)
|
|
124
|
-
if @options[:only_headers]
|
|
125
|
-
values = Array(@options[:only_headers])
|
|
126
|
-
bad = values.reject { |v| v.is_a?(Symbol) || v.is_a?(String) }
|
|
127
|
-
raise SmarterCSV::ValidationError, "headers: { only: } elements must be String or Symbol, got: #{bad.map(&:class).uniq.inspect}" if bad.any?
|
|
128
|
-
@options[:only_headers] = values.map(&:to_sym)
|
|
129
|
-
end
|
|
130
|
-
if @options[:except_headers]
|
|
131
|
-
values = Array(@options[:except_headers])
|
|
132
|
-
bad = values.reject { |v| v.is_a?(Symbol) || v.is_a?(String) }
|
|
133
|
-
raise SmarterCSV::ValidationError, "headers: { except: } elements must be String or Symbol, got: #{bad.map(&:class).uniq.inspect}" if bad.any?
|
|
134
|
-
@options[:except_headers] = values.map(&:to_sym)
|
|
135
|
-
end
|
|
136
|
-
|
|
137
|
-
# Deprecation: remove_values_matching → nil_values_matching
|
|
138
|
-
# Old behavior: removes the key-value pair entirely.
|
|
139
|
-
# New behavior: nil_values_matching sets the value to nil (key kept);
|
|
140
|
-
# combined with the default remove_empty_values: true the net effect is identical.
|
|
141
|
-
# With remove_empty_values: false, the key is retained with a nil value.
|
|
142
|
-
if given_options.key?(:remove_values_matching)
|
|
143
|
-
unless @options[:verbose] == :quiet
|
|
144
|
-
warn "DEPRECATION WARNING: 'remove_values_matching' is deprecated. " \
|
|
145
|
-
"Use 'nil_values_matching' instead. With the default 'remove_empty_values: true' " \
|
|
146
|
-
"the net behavior is identical. With 'remove_empty_values: false', matching values " \
|
|
147
|
-
"are set to nil but the key is retained in the result hash."
|
|
148
|
-
end
|
|
149
|
-
@options[:nil_values_matching] ||= @options[:remove_values_matching]
|
|
150
|
-
@options[:remove_values_matching] = nil # clear to prevent double-processing
|
|
151
|
-
end
|
|
152
|
-
|
|
153
|
-
# Translate deprecated :strict option to :missing_headers
|
|
154
|
-
if given_options.key?(:strict)
|
|
155
|
-
unless @options[:verbose] == :quiet
|
|
156
|
-
warn "DEPRECATION WARNING: 'strict' option is deprecated and will be removed in a future version. " \
|
|
157
|
-
"Use 'missing_headers: :raise' instead of 'strict: true', or 'missing_headers: :auto' instead of 'strict: false'."
|
|
158
|
-
end
|
|
159
|
-
@options[:missing_headers] = @options[:strict] ? :raise : :auto unless given_options.key?(:missing_headers)
|
|
160
|
-
end
|
|
161
|
-
|
|
162
|
-
# Keep :strict synchronized with :missing_headers (C extension reads :strict directly)
|
|
163
|
-
@options[:strict] = (@options[:missing_headers] == :raise)
|
|
164
|
-
|
|
165
|
-
$stderr.puts "Computed options:\n#{pp(@options)}\n" if @options[:verbose] == :debug
|
|
166
|
-
|
|
167
|
-
validate_options!(@options)
|
|
168
|
-
@options
|
|
169
|
-
end
|
|
170
|
-
|
|
171
|
-
private
|
|
172
|
-
|
|
173
|
-
def validate_options!(options)
|
|
174
|
-
# deprecate required_headers
|
|
175
|
-
unless options[:required_headers].nil?
|
|
176
|
-
warn "DEPRECATION WARNING: please use 'required_keys' instead of 'required_headers'" unless options[:verbose] == :quiet
|
|
177
|
-
if options[:required_keys].nil?
|
|
178
|
-
options[:required_keys] = options[:required_headers]
|
|
179
|
-
options[:required_headers] = nil
|
|
180
|
-
end
|
|
181
|
-
end
|
|
182
|
-
|
|
183
|
-
keys = options.keys
|
|
184
|
-
errors = []
|
|
185
|
-
errors << "invalid row_sep" if keys.include?(:row_sep) && !option_valid?(options[:row_sep])
|
|
186
|
-
errors << "invalid col_sep" if keys.include?(:col_sep) && !option_valid?(options[:col_sep])
|
|
187
|
-
errors << "invalid quote_char" if keys.include?(:quote_char) && !option_valid?(options[:quote_char])
|
|
188
|
-
if keys.include?(:quote_char) && options[:quote_char].is_a?(String) && options[:quote_char].bytesize > 1
|
|
189
|
-
errors << "invalid quote_char: must be a single byte (got #{options[:quote_char].inspect})"
|
|
190
|
-
end
|
|
191
|
-
unless %i[double_quotes backslash auto].include?(options[:quote_escaping])
|
|
192
|
-
errors << "invalid quote_escaping: must be :double_quotes, :backslash, or :auto"
|
|
193
|
-
end
|
|
194
|
-
unless %i[legacy standard].include?(options[:quote_boundary])
|
|
195
|
-
errors << "invalid quote_boundary: must be :legacy or :standard"
|
|
196
|
-
end
|
|
197
|
-
fsl = options[:field_size_limit]
|
|
198
|
-
unless fsl.nil? || (fsl.is_a?(Integer) && fsl > 0)
|
|
199
|
-
errors << "invalid field_size_limit: must be nil or a positive Integer (got #{fsl.inspect})"
|
|
200
|
-
end
|
|
201
|
-
obr = options[:on_bad_row]
|
|
202
|
-
unless %i[raise skip collect].include?(obr) || obr.respond_to?(:call)
|
|
203
|
-
errors << "invalid on_bad_row: must be :raise, :skip, :collect, or a callable"
|
|
204
|
-
end
|
|
205
|
-
%i[on_start on_chunk on_complete].each do |hook|
|
|
206
|
-
val = options[hook]
|
|
207
|
-
errors << "invalid #{hook}: must be nil or a callable" if !val.nil? && !val.respond_to?(:call)
|
|
208
|
-
end
|
|
209
|
-
unless %i[auto raise].include?(options[:missing_headers])
|
|
210
|
-
errors << "invalid missing_headers: must be :auto or :raise"
|
|
211
|
-
end
|
|
212
|
-
if options[:only_headers] && options[:except_headers]
|
|
213
|
-
errors << "cannot use both 'headers: { only: }' and 'headers: { except: }' at the same time"
|
|
214
|
-
end
|
|
215
|
-
raise SmarterCSV::ValidationError, errors.inspect if errors.any?
|
|
216
|
-
end
|
|
217
|
-
|
|
218
|
-
def option_valid?(str)
|
|
219
|
-
return true if str.is_a?(Symbol) && str == :auto
|
|
220
|
-
return true if str.is_a?(String) && !str.empty?
|
|
221
|
-
|
|
222
|
-
false
|
|
223
|
-
end
|
|
224
|
-
|
|
225
|
-
def pp(value)
|
|
226
|
-
defined?(AwesomePrint) ? value.awesome_inspect(index: nil) : value.inspect
|
|
227
|
-
end
|
|
228
|
-
end
|
|
229
|
-
end
|