smarter_csv 1.16.3 → 1.16.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +6 -0
- data/CHANGELOG.md +12 -0
- data/lib/smarter_csv/reader.rb +29 -27
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv.rb +13 -10
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9f0dc97fe8b296d479efa58b5e404636fe66dbe768e032de987e4c2736b619a4
|
|
4
|
+
data.tar.gz: 6ffaa0b2f74fb6a48c22a28c21a254a0e9b962bcfb9d1b979e72e54ae446a5c1
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8e16f3d049432df188373da120fd4d5f04fd4a49d6a3bb3e91abf6f5722fa6fc90ee302e6db5348349f65290226e0da046cb10d71b2d267fc1be4d63af18107c
|
|
7
|
+
data.tar.gz: 473ce5f7d1b2bceb7a82898b90c9a19e4076675eea1d748af97d5c7c04abaf15e1cf95a8b774bab40ba5f36cad6ad517e874eab3e63854302ea9d6d4465fefc8
|
data/.rubocop.yml
CHANGED
|
@@ -121,6 +121,9 @@ Style/PercentLiteralDelimiters:
|
|
|
121
121
|
Style/RegexpLiteral:
|
|
122
122
|
Enabled: false
|
|
123
123
|
|
|
124
|
+
Style/RescueModifier:
|
|
125
|
+
Enabled: false
|
|
126
|
+
|
|
124
127
|
Style/SafeNavigation:
|
|
125
128
|
Enabled: false
|
|
126
129
|
|
|
@@ -153,6 +156,9 @@ Style/SymbolArray:
|
|
|
153
156
|
Style/SymbolProc: # old Ruby versions can't do this
|
|
154
157
|
Enabled: false
|
|
155
158
|
|
|
159
|
+
Style/TernaryParentheses:
|
|
160
|
+
Enabled: false
|
|
161
|
+
|
|
156
162
|
Style/TrailingCommaInArrayLiteral:
|
|
157
163
|
Enabled: false
|
|
158
164
|
EnforcedStyleForMultiline: consistent_comma
|
data/CHANGELOG.md
CHANGED
|
@@ -1,6 +1,18 @@
|
|
|
1
1
|
|
|
2
2
|
# SmarterCSV 1.x Change Log
|
|
3
3
|
|
|
4
|
+
## 1.16.4 (2026-04-21) — Bug Fixes
|
|
5
|
+
|
|
6
|
+
RSpec tests: **1,434 → 1,467** (+33 tests)
|
|
7
|
+
|
|
8
|
+
### Bug Fixes
|
|
9
|
+
|
|
10
|
+
* Fixed bug in `SmarterCSV.errors` that could lose collected records when processing raises mid-stream,
|
|
11
|
+
e.g. when `bad_row_limit:` was exceeded (`TooManyBadRows`), or when a user's block raised through `.process` / `.each` / `.each_chunk`.
|
|
12
|
+
|
|
13
|
+
* Fixed `enforce_utf8_encoding` incorrectly replacing all non-ASCII bytes when the input string was tagged as `ASCII-8BIT` (binary).
|
|
14
|
+
The encoding is now relabeled to UTF-8 before transcoding, so only genuinely invalid byte sequences are replaced.
|
|
15
|
+
|
|
4
16
|
## 1.16.3 (2026-04-14) — New Feature
|
|
5
17
|
|
|
6
18
|
RSpec tests: **1,425 → 1,434** (+9 tests)
|
data/lib/smarter_csv/reader.rb
CHANGED
|
@@ -147,9 +147,9 @@ module SmarterCSV
|
|
|
147
147
|
options[:_keep_bitmap] = keep_flags.map { |f| f ? 1 : 0 }.pack('C*').freeze
|
|
148
148
|
options[:_keep_extra_cols] = @only_headers_set ? false : true
|
|
149
149
|
options[:_early_exit_after] = (@only_headers_set && !options[:strict]) ? (keep_flags.rindex(true) || -1) : -1
|
|
150
|
-
options[:_keep_cols] = nil
|
|
150
|
+
options[:_keep_cols] = nil # nil signals C: "filter active, check _keep_bitmap"
|
|
151
151
|
else
|
|
152
|
-
options[:_keep_cols] = false
|
|
152
|
+
options[:_keep_cols] = false # sentinel: no filtering active — C skips all bitmap paths
|
|
153
153
|
# Do NOT insert _keep_bitmap/_keep_extra_cols/_early_exit_after when unused.
|
|
154
154
|
# Keeping the options hash as small as possible avoids hash table resize and
|
|
155
155
|
# keeps all 10 per-row rb_hash_aref lookups hitting the same cache lines.
|
|
@@ -214,18 +214,18 @@ module SmarterCSV
|
|
|
214
214
|
# on_start / on_chunk / on_complete are optional callables (nil by default).
|
|
215
215
|
# Hooks only fire from `process` (library-controlled iteration). Enumerator
|
|
216
216
|
# modes (each / each_chunk) do not fire hooks — the caller owns the lifecycle.
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
217
|
+
on_start = options[:on_start]
|
|
218
|
+
on_chunk = options[:on_chunk]
|
|
219
|
+
on_complete = options[:on_complete]
|
|
220
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC) if on_start || on_complete
|
|
221
221
|
|
|
222
|
-
if
|
|
223
|
-
|
|
222
|
+
if on_start
|
|
223
|
+
input_meta = if @input.is_a?(String)
|
|
224
224
|
{ input: @input, file_size: (File.size(@input) rescue nil) }
|
|
225
225
|
else
|
|
226
226
|
{ input: @input.class.name, file_size: nil }
|
|
227
|
-
|
|
228
|
-
|
|
227
|
+
end
|
|
228
|
+
on_start.call(input_meta.merge(col_sep: options[:col_sep], row_sep: options[:row_sep]))
|
|
229
229
|
end
|
|
230
230
|
|
|
231
231
|
# now on to processing all the rest of the lines in the CSV file:
|
|
@@ -385,7 +385,7 @@ module SmarterCSV
|
|
|
385
385
|
chunk << hash # append temp result to chunk
|
|
386
386
|
|
|
387
387
|
if chunk.size >= chunk_size || fh.eof? # if chunk if full, or EOF reached
|
|
388
|
-
|
|
388
|
+
on_chunk&.call({ chunk_number: @chunk_count + 1, rows_in_chunk: chunk.size, total_rows_so_far: @csv_line_count })
|
|
389
389
|
# do something with the chunk
|
|
390
390
|
if block_given?
|
|
391
391
|
yield chunk, @chunk_count # do something with the hashes in the chunk in the block
|
|
@@ -414,7 +414,7 @@ module SmarterCSV
|
|
|
414
414
|
|
|
415
415
|
# handling of last chunk:
|
|
416
416
|
if !chunk.nil? && chunk.size > 0
|
|
417
|
-
|
|
417
|
+
on_chunk&.call({ chunk_number: @chunk_count + 1, rows_in_chunk: chunk.size, total_rows_so_far: @csv_line_count })
|
|
418
418
|
# do something with the chunk
|
|
419
419
|
if block_given?
|
|
420
420
|
yield chunk, @chunk_count # do something with the hashes in the chunk in the block
|
|
@@ -425,13 +425,13 @@ module SmarterCSV
|
|
|
425
425
|
# chunk = [] # initialize for next chunk of data
|
|
426
426
|
end
|
|
427
427
|
|
|
428
|
-
if
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
428
|
+
if on_complete
|
|
429
|
+
on_complete.call({
|
|
430
|
+
total_rows: @csv_line_count,
|
|
431
|
+
total_chunks: @chunk_count,
|
|
432
|
+
duration: Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time,
|
|
433
|
+
bad_rows: @errors[:bad_row_count] || 0,
|
|
434
|
+
})
|
|
435
435
|
end
|
|
436
436
|
ensure
|
|
437
437
|
fh.close if fh.respond_to?(:close)
|
|
@@ -662,12 +662,10 @@ module SmarterCSV
|
|
|
662
662
|
# else: mid-field quote → literal, no state change
|
|
663
663
|
elsif !in_quotes
|
|
664
664
|
# Non-quote character: track whether field has started
|
|
665
|
-
if strip
|
|
666
|
-
# rubocop:disable Style/MultipleComparison -- two direct == comparisons are faster than Array#include? in this hot loop
|
|
665
|
+
if strip # -- two direct == comparisons are faster than Array#include? in this hot loop
|
|
667
666
|
field_started = true unless line[i] == ' ' || line[i] == "\t"
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
field_started = true
|
|
667
|
+
else
|
|
668
|
+
field_started = true
|
|
671
669
|
end
|
|
672
670
|
end
|
|
673
671
|
i += 1
|
|
@@ -780,9 +778,13 @@ module SmarterCSV
|
|
|
780
778
|
end
|
|
781
779
|
|
|
782
780
|
def enforce_utf8_encoding(line, options)
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
781
|
+
replace = options[:invalid_byte_sequence]
|
|
782
|
+
# ASCII_8BIT (Encoding::BINARY is an alias) has no codepoint mapping above 0x7F,
|
|
783
|
+
# so encode('utf-8', ASCII_8BIT) would replace every non-ASCII byte. Relabel as
|
|
784
|
+
# UTF-8 first so encode() treats the bytes as already-UTF-8 and only replaces
|
|
785
|
+
# sequences that are actually invalid.
|
|
786
|
+
line = line.force_encoding('utf-8') if line.encoding == Encoding::ASCII_8BIT
|
|
787
|
+
line.encode('utf-8', line.encoding, invalid: :replace, undef: :replace, replace: replace)
|
|
786
788
|
end
|
|
787
789
|
|
|
788
790
|
def handle_bad_row(error, line, start_csv_line, start_file_line, options)
|
data/lib/smarter_csv/version.rb
CHANGED
data/lib/smarter_csv.rb
CHANGED
|
@@ -78,9 +78,12 @@ module SmarterCSV
|
|
|
78
78
|
def self.process(input, given_options = {}, &block)
|
|
79
79
|
Thread.current[:current_thread_recent_errors] = {}
|
|
80
80
|
reader = Reader.new(input, given_options)
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
81
|
+
reader.process(&block)
|
|
82
|
+
ensure
|
|
83
|
+
# Preserve partial error state when processing raises mid-stream
|
|
84
|
+
# (e.g. TooManyBadRows, or a user block raising). `reader` is nil if
|
|
85
|
+
# Reader.new itself raised before the local was assigned.
|
|
86
|
+
Thread.current[:current_thread_recent_errors] = reader.errors if reader
|
|
84
87
|
end
|
|
85
88
|
|
|
86
89
|
# Convenience method for parsing a CSV string directly.
|
|
@@ -109,9 +112,9 @@ module SmarterCSV
|
|
|
109
112
|
def self.each(input, options = {}, &block)
|
|
110
113
|
Thread.current[:current_thread_recent_errors] = {}
|
|
111
114
|
reader = Reader.new(input, options)
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
+
reader.each(&block)
|
|
116
|
+
ensure
|
|
117
|
+
Thread.current[:current_thread_recent_errors] = reader.errors if reader
|
|
115
118
|
end
|
|
116
119
|
|
|
117
120
|
# Yields each chunk as Array<Hash> plus its 0-based chunk index.
|
|
@@ -126,9 +129,9 @@ module SmarterCSV
|
|
|
126
129
|
def self.each_chunk(input, options = {}, &block)
|
|
127
130
|
Thread.current[:current_thread_recent_errors] = {}
|
|
128
131
|
reader = Reader.new(input, options)
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
+
reader.each_chunk(&block)
|
|
133
|
+
ensure
|
|
134
|
+
Thread.current[:current_thread_recent_errors] = reader.errors if reader
|
|
132
135
|
end
|
|
133
136
|
|
|
134
137
|
# Returns the errors from the most recent call to .process, .parse, .each, or .each_chunk
|
|
@@ -198,7 +201,7 @@ module SmarterCSV
|
|
|
198
201
|
begin
|
|
199
202
|
yield writer
|
|
200
203
|
ensure
|
|
201
|
-
writer&.finalize
|
|
204
|
+
writer&.finalize # must finalize before reading io.string
|
|
202
205
|
end
|
|
203
206
|
io.string
|
|
204
207
|
else
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: smarter_csv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.16.
|
|
4
|
+
version: 1.16.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Tilo Sloboda
|
|
8
8
|
bindir: bin
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date: 2026-04-
|
|
10
|
+
date: 2026-04-21 00:00:00.000000000 Z
|
|
11
11
|
dependencies: []
|
|
12
12
|
description: |
|
|
13
13
|
SmarterCSV is a high-performance CSV reader and writer for Ruby focused on
|