smarter_csv 1.16.3 → 1.16.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b40fb76fef88599d7449691806af6f76a131ffcd41e2ee145d5c87f3554a2006
4
- data.tar.gz: edb27057973c0a88524579f450dc8ed3ffadbd983de7155a84ff159495c31233
3
+ metadata.gz: 9f0dc97fe8b296d479efa58b5e404636fe66dbe768e032de987e4c2736b619a4
4
+ data.tar.gz: 6ffaa0b2f74fb6a48c22a28c21a254a0e9b962bcfb9d1b979e72e54ae446a5c1
5
5
  SHA512:
6
- metadata.gz: 5b9e2a17ae14a5d7b3dfddd854148f38c8892d67544b51fecba888aa11c540096057460880564b89251194fea513c3e2807c500d93f896826db885daf64271fe
7
- data.tar.gz: 90b5aa10c6bc36cbc97662de879deaf61c2417a61e4cb5171924aac991a37c762588ff6413a924bfd083ca10c1fa1739049561f3c6e9de6ff196bdba85c7878f
6
+ metadata.gz: 8e16f3d049432df188373da120fd4d5f04fd4a49d6a3bb3e91abf6f5722fa6fc90ee302e6db5348349f65290226e0da046cb10d71b2d267fc1be4d63af18107c
7
+ data.tar.gz: 473ce5f7d1b2bceb7a82898b90c9a19e4076675eea1d748af97d5c7c04abaf15e1cf95a8b774bab40ba5f36cad6ad517e874eab3e63854302ea9d6d4465fefc8
data/.rubocop.yml CHANGED
@@ -121,6 +121,9 @@ Style/PercentLiteralDelimiters:
121
121
  Style/RegexpLiteral:
122
122
  Enabled: false
123
123
 
124
+ Style/RescueModifier:
125
+ Enabled: false
126
+
124
127
  Style/SafeNavigation:
125
128
  Enabled: false
126
129
 
@@ -153,6 +156,9 @@ Style/SymbolArray:
153
156
  Style/SymbolProc: # old Ruby versions can't do this
154
157
  Enabled: false
155
158
 
159
+ Style/TernaryParentheses:
160
+ Enabled: false
161
+
156
162
  Style/TrailingCommaInArrayLiteral:
157
163
  Enabled: false
158
164
  EnforcedStyleForMultiline: consistent_comma
data/CHANGELOG.md CHANGED
@@ -1,6 +1,18 @@
1
1
 
2
2
  # SmarterCSV 1.x Change Log
3
3
 
4
+ ## 1.16.4 (2026-04-21) — Bug Fixes
5
+
6
+ RSpec tests: **1,434 → 1,467** (+33 tests)
7
+
8
+ ### Bug Fixes
9
+
10
+ * Fixed bug in `SmarterCSV.errors` that could lose collected records when processing raises mid-stream,
11
+ e.g. when `bad_row_limit:` was exceeded (`TooManyBadRows`), or when a user's block raised through `.process` / `.each` / `.each_chunk`.
12
+
13
+ * Fixed `enforce_utf8_encoding` incorrectly replacing all non-ASCII bytes when the input string was tagged as `ASCII-8BIT` (binary).
14
+ The encoding is now relabeled to UTF-8 before transcoding, so only genuinely invalid byte sequences are replaced.
15
+
4
16
  ## 1.16.3 (2026-04-14) — New Feature
5
17
 
6
18
  RSpec tests: **1,425 → 1,434** (+9 tests)
@@ -147,9 +147,9 @@ module SmarterCSV
147
147
  options[:_keep_bitmap] = keep_flags.map { |f| f ? 1 : 0 }.pack('C*').freeze
148
148
  options[:_keep_extra_cols] = @only_headers_set ? false : true
149
149
  options[:_early_exit_after] = (@only_headers_set && !options[:strict]) ? (keep_flags.rindex(true) || -1) : -1
150
- options[:_keep_cols] = nil # nil signals C: "filter active, check _keep_bitmap"
150
+ options[:_keep_cols] = nil # nil signals C: "filter active, check _keep_bitmap"
151
151
  else
152
- options[:_keep_cols] = false # sentinel: no filtering active — C skips all bitmap paths
152
+ options[:_keep_cols] = false # sentinel: no filtering active — C skips all bitmap paths
153
153
  # Do NOT insert _keep_bitmap/_keep_extra_cols/_early_exit_after when unused.
154
154
  # Keeping the options hash as small as possible avoids hash table resize and
155
155
  # keeps all 10 per-row rb_hash_aref lookups hitting the same cache lines.
@@ -214,18 +214,18 @@ module SmarterCSV
214
214
  # on_start / on_chunk / on_complete are optional callables (nil by default).
215
215
  # Hooks only fire from `process` (library-controlled iteration). Enumerator
216
216
  # modes (each / each_chunk) do not fire hooks — the caller owns the lifecycle.
217
- _on_start = options[:on_start]
218
- _on_chunk = options[:on_chunk]
219
- _on_complete = options[:on_complete]
220
- _start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC) if _on_start || _on_complete
217
+ on_start = options[:on_start]
218
+ on_chunk = options[:on_chunk]
219
+ on_complete = options[:on_complete]
220
+ start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC) if on_start || on_complete
221
221
 
222
- if _on_start
223
- _input_meta = if @input.is_a?(String)
222
+ if on_start
223
+ input_meta = if @input.is_a?(String)
224
224
  { input: @input, file_size: (File.size(@input) rescue nil) }
225
225
  else
226
226
  { input: @input.class.name, file_size: nil }
227
- end
228
- _on_start.call(_input_meta.merge(col_sep: options[:col_sep], row_sep: options[:row_sep]))
227
+ end
228
+ on_start.call(input_meta.merge(col_sep: options[:col_sep], row_sep: options[:row_sep]))
229
229
  end
230
230
 
231
231
  # now on to processing all the rest of the lines in the CSV file:
@@ -385,7 +385,7 @@ module SmarterCSV
385
385
  chunk << hash # append temp result to chunk
386
386
 
387
387
  if chunk.size >= chunk_size || fh.eof? # if chunk if full, or EOF reached
388
- _on_chunk&.call({ chunk_number: @chunk_count + 1, rows_in_chunk: chunk.size, total_rows_so_far: @csv_line_count })
388
+ on_chunk&.call({ chunk_number: @chunk_count + 1, rows_in_chunk: chunk.size, total_rows_so_far: @csv_line_count })
389
389
  # do something with the chunk
390
390
  if block_given?
391
391
  yield chunk, @chunk_count # do something with the hashes in the chunk in the block
@@ -414,7 +414,7 @@ module SmarterCSV
414
414
 
415
415
  # handling of last chunk:
416
416
  if !chunk.nil? && chunk.size > 0
417
- _on_chunk&.call({ chunk_number: @chunk_count + 1, rows_in_chunk: chunk.size, total_rows_so_far: @csv_line_count })
417
+ on_chunk&.call({ chunk_number: @chunk_count + 1, rows_in_chunk: chunk.size, total_rows_so_far: @csv_line_count })
418
418
  # do something with the chunk
419
419
  if block_given?
420
420
  yield chunk, @chunk_count # do something with the hashes in the chunk in the block
@@ -425,13 +425,13 @@ module SmarterCSV
425
425
  # chunk = [] # initialize for next chunk of data
426
426
  end
427
427
 
428
- if _on_complete
429
- _on_complete.call({
430
- total_rows: @csv_line_count,
431
- total_chunks: @chunk_count,
432
- duration: Process.clock_gettime(Process::CLOCK_MONOTONIC) - _start_time,
433
- bad_rows: @errors[:bad_row_count] || 0,
434
- })
428
+ if on_complete
429
+ on_complete.call({
430
+ total_rows: @csv_line_count,
431
+ total_chunks: @chunk_count,
432
+ duration: Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time,
433
+ bad_rows: @errors[:bad_row_count] || 0,
434
+ })
435
435
  end
436
436
  ensure
437
437
  fh.close if fh.respond_to?(:close)
@@ -662,12 +662,10 @@ module SmarterCSV
662
662
  # else: mid-field quote → literal, no state change
663
663
  elsif !in_quotes
664
664
  # Non-quote character: track whether field has started
665
- if strip
666
- # rubocop:disable Style/MultipleComparison -- two direct == comparisons are faster than Array#include? in this hot loop
665
+ if strip # -- two direct == comparisons are faster than Array#include? in this hot loop
667
666
  field_started = true unless line[i] == ' ' || line[i] == "\t"
668
- # rubocop:enable Style/MultipleComparison
669
- else
670
- field_started = true
667
+ else
668
+ field_started = true
671
669
  end
672
670
  end
673
671
  i += 1
@@ -780,9 +778,13 @@ module SmarterCSV
780
778
  end
781
779
 
782
780
  def enforce_utf8_encoding(line, options)
783
- # return line unless options[:force_utf8] || options[:file_encoding] !~ /utf-8/i
784
-
785
- line.force_encoding('utf-8').encode('utf-8', invalid: :replace, undef: :replace, replace: options[:invalid_byte_sequence])
781
+ replace = options[:invalid_byte_sequence]
782
+ # ASCII_8BIT (Encoding::BINARY is an alias) has no codepoint mapping above 0x7F,
783
+ # so encode('utf-8', ASCII_8BIT) would replace every non-ASCII byte. Relabel as
784
+ # UTF-8 first so encode() treats the bytes as already-UTF-8 and only replaces
785
+ # sequences that are actually invalid.
786
+ line = line.force_encoding('utf-8') if line.encoding == Encoding::ASCII_8BIT
787
+ line.encode('utf-8', line.encoding, invalid: :replace, undef: :replace, replace: replace)
786
788
  end
787
789
 
788
790
  def handle_bad_row(error, line, start_csv_line, start_file_line, options)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SmarterCSV
4
- VERSION = "1.16.3"
4
+ VERSION = "1.16.4"
5
5
  end
data/lib/smarter_csv.rb CHANGED
@@ -78,9 +78,12 @@ module SmarterCSV
78
78
  def self.process(input, given_options = {}, &block)
79
79
  Thread.current[:current_thread_recent_errors] = {}
80
80
  reader = Reader.new(input, given_options)
81
- result = reader.process(&block)
82
- Thread.current[:current_thread_recent_errors] = reader.errors
83
- result
81
+ reader.process(&block)
82
+ ensure
83
+ # Preserve partial error state when processing raises mid-stream
84
+ # (e.g. TooManyBadRows, or a user block raising). `reader` is nil if
85
+ # Reader.new itself raised before the local was assigned.
86
+ Thread.current[:current_thread_recent_errors] = reader.errors if reader
84
87
  end
85
88
 
86
89
  # Convenience method for parsing a CSV string directly.
@@ -109,9 +112,9 @@ module SmarterCSV
109
112
  def self.each(input, options = {}, &block)
110
113
  Thread.current[:current_thread_recent_errors] = {}
111
114
  reader = Reader.new(input, options)
112
- result = reader.each(&block)
113
- Thread.current[:current_thread_recent_errors] = reader.errors
114
- result
115
+ reader.each(&block)
116
+ ensure
117
+ Thread.current[:current_thread_recent_errors] = reader.errors if reader
115
118
  end
116
119
 
117
120
  # Yields each chunk as Array<Hash> plus its 0-based chunk index.
@@ -126,9 +129,9 @@ module SmarterCSV
126
129
  def self.each_chunk(input, options = {}, &block)
127
130
  Thread.current[:current_thread_recent_errors] = {}
128
131
  reader = Reader.new(input, options)
129
- result = reader.each_chunk(&block)
130
- Thread.current[:current_thread_recent_errors] = reader.errors
131
- result
132
+ reader.each_chunk(&block)
133
+ ensure
134
+ Thread.current[:current_thread_recent_errors] = reader.errors if reader
132
135
  end
133
136
 
134
137
  # Returns the errors from the most recent call to .process, .parse, .each, or .each_chunk
@@ -198,7 +201,7 @@ module SmarterCSV
198
201
  begin
199
202
  yield writer
200
203
  ensure
201
- writer&.finalize # must finalize before reading io.string
204
+ writer&.finalize # must finalize before reading io.string
202
205
  end
203
206
  io.string
204
207
  else
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: smarter_csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.16.3
4
+ version: 1.16.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tilo Sloboda
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2026-04-14 00:00:00.000000000 Z
10
+ date: 2026-04-21 00:00:00.000000000 Z
11
11
  dependencies: []
12
12
  description: |
13
13
  SmarterCSV is a high-performance CSV reader and writer for Ruby focused on