smarter_csv 1.16.4 → 1.16.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9f0dc97fe8b296d479efa58b5e404636fe66dbe768e032de987e4c2736b619a4
4
- data.tar.gz: 6ffaa0b2f74fb6a48c22a28c21a254a0e9b962bcfb9d1b979e72e54ae446a5c1
3
+ metadata.gz: 6e900772903374e904003b0e2219542eed5889619682133731ac15c1392dfcd2
4
+ data.tar.gz: '03039f42cd4bd03bc8353047a888ef8ff510216de887fa62b0c2bcd8614c2a52'
5
5
  SHA512:
6
- metadata.gz: 8e16f3d049432df188373da120fd4d5f04fd4a49d6a3bb3e91abf6f5722fa6fc90ee302e6db5348349f65290226e0da046cb10d71b2d267fc1be4d63af18107c
7
- data.tar.gz: 473ce5f7d1b2bceb7a82898b90c9a19e4076675eea1d748af97d5c7c04abaf15e1cf95a8b774bab40ba5f36cad6ad517e874eab3e63854302ea9d6d4465fefc8
6
+ metadata.gz: af60ffbef48ddaeb0a508624572be722d5ec8c748cbf3519fd277161287ffe406e8fa0209f1b9d21eedad36aa9cf7587dc8dbf6939cf346bce54dfa188c19756
7
+ data.tar.gz: 798f10a02f1add2f42e4a14d5e6abb60c13cdbfe1ade2e3b44f95068503c3827faaf5aa29fcf38eb6031f9ce3f0e68db3cb26b5df8a4afeb50a1a020917b6338
data/CHANGELOG.md CHANGED
@@ -1,6 +1,22 @@
1
1
 
2
2
  # SmarterCSV 1.x Change Log
3
3
 
4
+ ## 1.16.6 (2026-05-21)
5
+
6
+ RSpec tests: **1,467 → 1,591** (+124 tests)
7
+
8
+ ### Bug Fix
9
+
10
+ - fixed [Issue #334](https://github.com/tilo/smarter_csv/issues/334) with escaped double quote followed by comma. Thanks to [conorg](https://github.com/conorg)
11
+ - fixed bug when using `headers: { except: }`
12
+ - added more tests
13
+
14
+ ## 1.16.5 (2026-05-18)
15
+
16
+ ### Bug Fix
17
+
18
+ - fixing issue with `remove_empty_hashes: false` not being honored in accelerated path (does not affect you when you use default settings)
19
+
4
20
  ## 1.16.4 (2026-04-21) — Bug Fixes
5
21
 
6
22
  RSpec tests: **1,434 → 1,467** (+33 tests)
data/CONTRIBUTORS.md CHANGED
@@ -1,4 +1,4 @@
1
- # A Big Thank You to all 63 Contributors!!
1
+ # A Big Thank You to all 64 Contributors!!
2
2
 
3
3
 
4
4
  A Big Thank you to everyone who filed issues, sent comments, and who contributed with pull requests:
@@ -66,3 +66,4 @@ A Big Thank you to everyone who filed issues, sent comments, and who contributed
66
66
  * [Dom Lebron](https://github.com/biglebronski)
67
67
  * [Paho Lurie-Gregg](https://github.com/paholg)
68
68
  * [Jonas Staškevičius](https://github.com/pirminis)
69
+ * [conorg](https://github.com/conorg)
data/README.md CHANGED
@@ -249,7 +249,7 @@ For reporting issues, please:
249
249
  * open a pull-request adding a test that demonstrates the issue
250
250
  * mention your version of SmarterCSV, Ruby, Rails
251
251
 
252
- # [A Special Thanks to all 63 Contributors!](CONTRIBUTORS.md) 🎉🎉🎉
252
+ # [A Special Thanks to all 64 Contributors!](CONTRIBUTORS.md) 🎉🎉🎉
253
253
 
254
254
 
255
255
  ## Contributing
data/Rakefile CHANGED
@@ -21,8 +21,13 @@ RuboCop::RakeTask.new
21
21
 
22
22
  require "rake/extensiontask"
23
23
 
24
- if RUBY_ENGINE == 'jruby'
25
-
24
+ if RUBY_ENGINE != 'ruby'
25
+ # Non-MRI (JRuby, TruffleRuby, ...): there is no C extension to build. Define the
26
+ # native-build tasks as no-ops so the same default task list works on every engine
27
+ # and any caller (CI, rake-compiler, downstream) succeeds without trying to build or
28
+ # copy a .so. Runtime uses the pure-Ruby parser.
29
+ task :compile # no-op
30
+ task :clobber # no-op
26
31
  task default: %i[spec]
27
32
 
28
33
  else
@@ -3,6 +3,15 @@
3
3
  require 'mkmf'
4
4
  require "rbconfig"
5
5
 
6
+ # On non-MRI Rubies (JRuby, TruffleRuby, ...) there is no C extension to build, and trying to build
7
+ # it breaks `gem install` for anything that depends on smarter_csv. Write a no-op Makefile so install
8
+ # succeeds, then stop. At runtime SmarterCSV falls back to its pure-Ruby parser (it checks whether the
9
+ # C functions actually loaded via respond_to?(:parse_csv_line_c)).
10
+ if RUBY_ENGINE != 'ruby'
11
+ File.write('Makefile', dummy_makefile($srcdir).join)
12
+ exit 0
13
+ end
14
+
6
15
  if RbConfig::MAKEFILE_CONFIG["CFLAGS"].include?("-g -O3")
7
16
  fixed_CFLAGS = RbConfig::MAKEFILE_CONFIG["CFLAGS"].sub("-g -O3", "-O3 $(cflags)")
8
17
  puts("Fix CFLAGS: #{RbConfig::MAKEFILE_CONFIG["CFLAGS"]} -> #{fixed_CFLAGS}")
@@ -304,25 +304,40 @@ static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quot
304
304
  if (!allow_escaped_quotes || backslash_count % 2 == 0) {
305
305
  if (__builtin_expect(quote_boundary_standard, 1)) {
306
306
  if (in_quotes) {
307
- // closing quote: only valid if followed by col_sep, row_sep, or end of line
308
- bool valid_close = (p + 1 >= endP);
309
- if (!valid_close) {
310
- valid_close = true;
311
- for (long j = 0; j < col_sep_len; j++) {
312
- if (*(p + 1 + j) != *(col_sepP + j)) { valid_close = false; break; }
307
+ if (p + 2 < endP && *(p + 1) == quote_char_val) {
308
+ /* RFC doubled quote inside a quoted field ("" → ").
309
+ * Give this precedence over the closing-quote check, but only
310
+ * when another byte follows the doubled pair.
311
+ *
312
+ * Compatibility note: we intentionally do NOT force terminal
313
+ * "" to be consumed here. SmarterCSV has a long-standing lenient
314
+ * behavior for malformed tails like ...\"" in :double_quotes mode:
315
+ * the final quote may still close the field instead of turning the
316
+ * row into an unclosed-quote error. Issue #334 needs doubled-quote
317
+ * precedence for ..."",... (more content follows), but we keep the
318
+ * historical leniency for terminal ..."". */
319
+ p++;
320
+ } else {
321
+ // closing quote: only valid if followed by col_sep, row_sep, or end of line
322
+ bool valid_close = (p + 1 >= endP);
323
+ if (!valid_close) {
324
+ valid_close = true;
325
+ for (long j = 0; j < col_sep_len; j++) {
326
+ if (*(p + 1 + j) != *(col_sepP + j)) { valid_close = false; break; }
327
+ }
313
328
  }
314
- }
315
- if (!valid_close && row_sep_len > 0) {
316
- valid_close = true;
317
- for (long j = 0; j < row_sep_len; j++) {
318
- if (*(p + 1 + j) != *(row_sepP + j)) { valid_close = false; break; }
329
+ if (!valid_close && row_sep_len > 0) {
330
+ valid_close = true;
331
+ for (long j = 0; j < row_sep_len; j++) {
332
+ if (*(p + 1 + j) != *(row_sepP + j)) { valid_close = false; break; }
333
+ }
319
334
  }
335
+ if (valid_close) {
336
+ in_quotes = false;
337
+ field_started = true;
338
+ }
339
+ // else: quote inside quoted field → literal
320
340
  }
321
- if (valid_close) {
322
- in_quotes = false;
323
- field_started = true;
324
- }
325
- // else: quote inside quoted field → literal (handles "" doubling)
326
341
  } else if (!field_started) {
327
342
  in_quotes = true; // opening quote at field boundary
328
343
  field_started = true;
@@ -763,6 +778,11 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash(VALUE self, VALUE line,
763
778
  * the frame stays well below 4 KB and ___chkstk_darwin never fires on ARM64 macOS.
764
779
  */
765
780
  bool *keep_bitmap = NULL;
781
+ /* In THIS (non-ctx) function the bitmap is alloca'd to headers_len on every call (see the alloca
782
+ * sites below), so keep_bitmap[] is exactly headers_len long and headers_len is the correct bound
783
+ * at all access sites. Do NOT mirror rb_parse_line_to_hash_ctx's keep_bitmap_len here: that variant
784
+ * caches its bitmap across rows (where @headers can grow), so it must use the captured length; this
785
+ * one rebuilds per call and does not. */
766
786
  bool keep_extra_columns = true; /* extra cols (> headers_len): keep by default */
767
787
  bool has_only = false; /* true when only_headers: filtering is active */
768
788
  long early_exit_after = -1; /* column index after which we stop; -1 = no early exit */
@@ -1081,25 +1101,40 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash(VALUE self, VALUE line,
1081
1101
  if (!allow_escaped_quotes || backslash_count % 2 == 0) {
1082
1102
  if (__builtin_expect(quote_boundary_standard, 1)) {
1083
1103
  if (in_quotes) {
1084
- // closing quote: only valid if followed by col_sep, row_sep, or end of line
1085
- bool valid_close = (p + 1 >= endP);
1086
- if (!valid_close) {
1087
- valid_close = true;
1088
- for (long j = 0; j < col_sep_len; j++) {
1089
- if (*(p + 1 + j) != *(col_sepP + j)) { valid_close = false; break; }
1104
+ if (p + 2 < endP && *(p + 1) == quote_char_val) {
1105
+ /* RFC doubled quote inside a quoted field ("" → ").
1106
+ * Give this precedence over the closing-quote check, but only
1107
+ * when another byte follows the doubled pair.
1108
+ *
1109
+ * Compatibility note: we intentionally do NOT force terminal
1110
+ * "" to be consumed here. SmarterCSV has a long-standing lenient
1111
+ * behavior for malformed tails like ...\"" in :double_quotes mode:
1112
+ * the final quote may still close the field instead of turning the
1113
+ * row into an unclosed-quote error. Issue #334 needs doubled-quote
1114
+ * precedence for ..."",... (more content follows), but we keep the
1115
+ * historical leniency for terminal ..."". */
1116
+ p++;
1117
+ } else {
1118
+ // closing quote: only valid if followed by col_sep, row_sep, or end of line
1119
+ bool valid_close = (p + 1 >= endP);
1120
+ if (!valid_close) {
1121
+ valid_close = true;
1122
+ for (long j = 0; j < col_sep_len; j++) {
1123
+ if (*(p + 1 + j) != *(col_sepP + j)) { valid_close = false; break; }
1124
+ }
1090
1125
  }
1091
- }
1092
- if (!valid_close && row_sep_len2 > 0) {
1093
- valid_close = true;
1094
- for (long j = 0; j < row_sep_len2; j++) {
1095
- if (*(p + 1 + j) != *(row_sepP2 + j)) { valid_close = false; break; }
1126
+ if (!valid_close && row_sep_len2 > 0) {
1127
+ valid_close = true;
1128
+ for (long j = 0; j < row_sep_len2; j++) {
1129
+ if (*(p + 1 + j) != *(row_sepP2 + j)) { valid_close = false; break; }
1130
+ }
1096
1131
  }
1132
+ if (valid_close) {
1133
+ in_quotes = false;
1134
+ field_started = true;
1135
+ }
1136
+ // else: quote inside quoted field → literal
1097
1137
  }
1098
- if (valid_close) {
1099
- in_quotes = false;
1100
- field_started = true;
1101
- }
1102
- // else: quote inside quoted field → literal (handles "" doubling)
1103
1138
  } else if (!field_started) {
1104
1139
  in_quotes = true; // opening quote at field boundary
1105
1140
  field_started = true;
@@ -1176,12 +1211,20 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash(VALUE self, VALUE line,
1176
1211
  * return nil instead of the hash so the row can be skipped.
1177
1212
  * With lazy allocation, if all_blank is true, xform.hash is still Qnil —
1178
1213
  * no hash was ever allocated.
1214
+ *
1215
+ * If remove_empty_hashes is disabled, preserve the row as an empty hash.
1216
+ * This keeps parity with the Ruby path without adding any cost to the
1217
+ * normal non-blank hot path.
1179
1218
  */
1180
- if (remove_empty && all_blank) {
1181
- VALUE result = rb_ary_new_capa(2);
1182
- rb_ary_push(result, Qnil);
1183
- rb_ary_push(result, LONG2FIX(element_count));
1184
- return result;
1219
+ if (all_blank) {
1220
+ if (remove_empty) {
1221
+ VALUE result = rb_ary_new_capa(2);
1222
+ rb_ary_push(result, Qnil);
1223
+ rb_ary_push(result, LONG2FIX(element_count));
1224
+ return result;
1225
+ }
1226
+
1227
+ ensure_hash_allocated(&xform);
1185
1228
  }
1186
1229
 
1187
1230
  /* ----------------------------------------
@@ -1421,6 +1464,14 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
1421
1464
  int numeric_mode = ctx->numeric_mode;
1422
1465
  VALUE numeric_keys = ctx->numeric_keys;
1423
1466
  bool *keep_bitmap = ctx->keep_bitmap;
1467
+ /* keep_bitmap is cached in the context (xmalloc'd once at construction, sized to the header count
1468
+ * THEN). @headers can grow in place as undeclared extra columns appear, so the live headers_len
1469
+ * (re-read each call below) may exceed the bitmap's length. Every keep_bitmap[] access in this
1470
+ * function MUST be bounded by keep_bitmap_len, never headers_len — indices past the bitmap are
1471
+ * extra columns and follow keep_extra_columns. Bounding by the grown headers_len was an
1472
+ * out-of-bounds heap read (the bug). The sibling rb_parse_line_to_hash safely uses headers_len
1473
+ * because it re-allocs its bitmap to headers_len on every call. */
1474
+ long keep_bitmap_len = ctx->keep_bitmap_len;
1424
1475
  bool keep_extra_columns = ctx->keep_extra_columns;
1425
1476
  long early_exit_after = ctx->early_exit_after;
1426
1477
 
@@ -1522,7 +1573,7 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
1522
1573
  while (trim_end >= trim_start && (*trim_end == ' ' || *trim_end == '\t')) trim_end--;
1523
1574
  }
1524
1575
  long trimmed_len = (trim_end >= trim_start) ? (trim_end - trim_start + 1) : 0;
1525
- if (!keep_bitmap || (element_count < headers_len ? keep_bitmap[element_count] : keep_extra_columns)) {
1576
+ if (!keep_bitmap || (element_count < keep_bitmap_len ? keep_bitmap[element_count] : keep_extra_columns)) {
1526
1577
  if (insert_field_into_hash(&xform, trim_start, trimmed_len, element_count, false, quote_char_val, encoding))
1527
1578
  all_blank = false;
1528
1579
  }
@@ -1543,7 +1594,7 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
1543
1594
  while (trim_end >= trim_start && (*trim_end == ' ' || *trim_end == '\t')) trim_end--;
1544
1595
  }
1545
1596
  long trimmed_len = (trim_end >= trim_start) ? (trim_end - trim_start + 1) : 0;
1546
- if (!keep_bitmap || (element_count < headers_len ? keep_bitmap[element_count] : keep_extra_columns)) {
1597
+ if (!keep_bitmap || (element_count < keep_bitmap_len ? keep_bitmap[element_count] : keep_extra_columns)) {
1547
1598
  if (insert_field_into_hash(&xform, trim_start, trimmed_len, element_count, false, quote_char_val, encoding))
1548
1599
  all_blank = false;
1549
1600
  }
@@ -1606,7 +1657,7 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
1606
1657
 
1607
1658
  bool has_embedded_quotes = quoted || (trimmed_len > 0 && memchr(trim_start, quote_char_val, trimmed_len));
1608
1659
 
1609
- if (!keep_bitmap || (element_count < headers_len ? keep_bitmap[element_count] : keep_extra_columns)) {
1660
+ if (!keep_bitmap || (element_count < keep_bitmap_len ? keep_bitmap[element_count] : keep_extra_columns)) {
1610
1661
  if (insert_field_into_hash(&xform, trim_start, trimmed_len, element_count, has_embedded_quotes, quote_char_val, encoding))
1611
1662
  all_blank = false;
1612
1663
  }
@@ -1640,25 +1691,40 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
1640
1691
  if (!allow_escaped_quotes || backslash_count % 2 == 0) {
1641
1692
  if (__builtin_expect(quote_boundary_standard, 1)) {
1642
1693
  if (in_quotes) {
1643
- /* closing quote: only valid if followed by col_sep, row_sep, or end */
1644
- bool valid_close = (p + 1 >= endP);
1645
- if (!valid_close) {
1646
- valid_close = true;
1647
- for (long j = 0; j < col_sep_len; j++) {
1648
- if (*(p + 1 + j) != *(col_sepP + j)) { valid_close = false; break; }
1694
+ if (p + 2 < endP && *(p + 1) == quote_char_val) {
1695
+ /* RFC doubled quote inside a quoted field ("" → ").
1696
+ * Give this precedence over the closing-quote check, but only
1697
+ * when another byte follows the doubled pair.
1698
+ *
1699
+ * Compatibility note: we intentionally do NOT force terminal
1700
+ * "" to be consumed here. SmarterCSV has a long-standing lenient
1701
+ * behavior for malformed tails like ...\"" in :double_quotes mode:
1702
+ * the final quote may still close the field instead of turning the
1703
+ * row into an unclosed-quote error. Issue #334 needs doubled-quote
1704
+ * precedence for ..."",... (more content follows), but we keep the
1705
+ * historical leniency for terminal ..."". */
1706
+ p++;
1707
+ } else {
1708
+ /* closing quote: only valid if followed by col_sep, row_sep, or end */
1709
+ bool valid_close = (p + 1 >= endP);
1710
+ if (!valid_close) {
1711
+ valid_close = true;
1712
+ for (long j = 0; j < col_sep_len; j++) {
1713
+ if (*(p + 1 + j) != *(col_sepP + j)) { valid_close = false; break; }
1714
+ }
1649
1715
  }
1650
- }
1651
- if (!valid_close && row_sep_len2 > 0) {
1652
- valid_close = true;
1653
- for (long j = 0; j < row_sep_len2; j++) {
1654
- if (*(p + 1 + j) != *(row_sepP2 + j)) { valid_close = false; break; }
1716
+ if (!valid_close && row_sep_len2 > 0) {
1717
+ valid_close = true;
1718
+ for (long j = 0; j < row_sep_len2; j++) {
1719
+ if (*(p + 1 + j) != *(row_sepP2 + j)) { valid_close = false; break; }
1720
+ }
1655
1721
  }
1722
+ if (valid_close) {
1723
+ in_quotes = false;
1724
+ field_started = true;
1725
+ }
1726
+ /* else: quote inside quoted field → literal */
1656
1727
  }
1657
- if (valid_close) {
1658
- in_quotes = false;
1659
- field_started = true;
1660
- }
1661
- /* else: quote inside quoted field → literal (handles "" doubling) */
1662
1728
  } else if (!field_started) {
1663
1729
  in_quotes = true; /* opening quote at field boundary */
1664
1730
  field_started = true;
@@ -1717,7 +1783,7 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
1717
1783
 
1718
1784
  bool has_embedded_quotes = quoted || (trimmed_len > 0 && memchr(trim_start, quote_char_val, trimmed_len));
1719
1785
 
1720
- if (!keep_bitmap || (element_count < headers_len ? keep_bitmap[element_count] : keep_extra_columns)) {
1786
+ if (!keep_bitmap || (element_count < keep_bitmap_len ? keep_bitmap[element_count] : keep_extra_columns)) {
1721
1787
  if (insert_field_into_hash(&xform, trim_start, trimmed_len, element_count, has_embedded_quotes, quote_char_val, encoding))
1722
1788
  all_blank = false;
1723
1789
  }
@@ -1728,11 +1794,15 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
1728
1794
  /* ----------------------------------------
1729
1795
  * SECTION 6: Handle blank rows
1730
1796
  * ---------------------------------------- */
1731
- if (remove_empty && all_blank) {
1732
- VALUE result = rb_ary_new_capa(2);
1733
- rb_ary_push(result, Qnil);
1734
- rb_ary_push(result, LONG2FIX(element_count));
1735
- return result;
1797
+ if (all_blank) {
1798
+ if (remove_empty) {
1799
+ VALUE result = rb_ary_new_capa(2);
1800
+ rb_ary_push(result, Qnil);
1801
+ rb_ary_push(result, LONG2FIX(element_count));
1802
+ return result;
1803
+ }
1804
+
1805
+ ensure_hash_allocated(&xform);
1736
1806
  }
1737
1807
 
1738
1808
  /* ----------------------------------------
@@ -1741,7 +1811,7 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
1741
1811
  if (!remove_empty_values) {
1742
1812
  ensure_hash_allocated(&xform);
1743
1813
  for (long i = element_count; i < headers_len; i++) {
1744
- if (!keep_bitmap || keep_bitmap[i]) {
1814
+ if (!keep_bitmap || (i < keep_bitmap_len ? keep_bitmap[i] : keep_extra_columns)) {
1745
1815
  rb_hash_aset(xform.hash, rb_ary_entry(headers, i), Qnil);
1746
1816
  }
1747
1817
  }
@@ -405,15 +405,28 @@ module SmarterCSV
405
405
  if !allow_escaped_quotes || backslash_count % 2 == 0
406
406
  if quote_boundary_standard
407
407
  if in_quotes
408
- # closing quote: only valid if followed by col_sep, row_sep, or end of line
409
408
  next_i = i + 1
410
- if next_i >= bytesize ||
411
- line.getbyte(next_i) == col_sep_byte ||
412
- (row_sep_bytesize > 0 && line.byteslice(next_i, row_sep_bytesize) == row_sep)
409
+ if next_i + 1 < bytesize && line.getbyte(next_i) == quote_byte
410
+ # RFC doubled quote inside a quoted field ("" ").
411
+ # Give this precedence over the closing-quote check, but only
412
+ # when another byte follows the doubled pair.
413
+ #
414
+ # Compatibility note: we intentionally do NOT force terminal
415
+ # "" to be consumed here. SmarterCSV has a long-standing lenient
416
+ # behavior for malformed tails like ...\"" in :double_quotes mode:
417
+ # the final quote may still close the field instead of turning the
418
+ # row into an unclosed-quote error. Issue #334 needs doubled-quote
419
+ # precedence for ..."",... (more content follows), but we keep the
420
+ # historical leniency for terminal ..."".
421
+ i = next_i
422
+ # closing quote: only valid if followed by col_sep, row_sep, or end of line
423
+ elsif next_i >= bytesize ||
424
+ line.getbyte(next_i) == col_sep_byte ||
425
+ (row_sep_bytesize > 0 && line.byteslice(next_i, row_sep_bytesize) == row_sep)
413
426
  in_quotes = false
414
427
  field_started = true
415
428
  end
416
- # else: quote inside quoted field → literal (handles "" doubling)
429
+ # else: quote inside quoted field → literal
417
430
  elsif !field_started # at field boundary: open quoted field
418
431
  in_quotes = true
419
432
  field_started = true
@@ -510,15 +523,28 @@ module SmarterCSV
510
523
  if !allow_escaped_quotes || backslash_count % 2 == 0
511
524
  if quote_boundary_standard
512
525
  if in_quotes
513
- # closing quote: only valid if followed by col_sep, row_sep, or end of line
514
526
  next_i = i + 1
515
- if next_i >= line_size ||
516
- line[next_i...next_i + col_sep_size] == col_sep ||
517
- (row_sep_size > 0 && line[next_i...next_i + row_sep_size] == row_sep)
527
+ if next_i + 1 < line_size && line[next_i] == quote
528
+ # RFC doubled quote inside a quoted field ("" → ").
529
+ # Give this precedence over the closing-quote check, but only
530
+ # when another character follows the doubled pair.
531
+ #
532
+ # Compatibility note: we intentionally do NOT force terminal
533
+ # "" to be consumed here. SmarterCSV has a long-standing lenient
534
+ # behavior for malformed tails like ...\"" in :double_quotes mode:
535
+ # the final quote may still close the field instead of turning the
536
+ # row into an unclosed-quote error. Issue #334 needs doubled-quote
537
+ # precedence for ..."",... (more content follows), but we keep the
538
+ # historical leniency for terminal ..."".
539
+ i = next_i
540
+ # closing quote: only valid if followed by col_sep, row_sep, or end of line
541
+ elsif next_i >= line_size ||
542
+ line[next_i...next_i + col_sep_size] == col_sep ||
543
+ (row_sep_size > 0 && line[next_i...next_i + row_sep_size] == row_sep)
518
544
  in_quotes = false
519
545
  field_started = true
520
546
  end
521
- # else: quote inside quoted field → literal (handles "" doubling)
547
+ # else: quote inside quoted field → literal
522
548
  elsif !field_started # at field boundary: open quoted field
523
549
  in_quotes = true
524
550
  field_started = true
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SmarterCSV
4
- VERSION = "1.16.4"
4
+ VERSION = "1.16.6"
5
5
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: smarter_csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.16.4
4
+ version: 1.16.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tilo Sloboda
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2026-04-21 00:00:00.000000000 Z
10
+ date: 2026-05-21 00:00:00.000000000 Z
11
11
  dependencies: []
12
12
  description: |
13
13
  SmarterCSV is a high-performance CSV reader and writer for Ruby focused on
@@ -62,7 +62,6 @@ files:
62
62
  - docs/row_col_sep.md
63
63
  - docs/ruby_csv_pitfalls.md
64
64
  - docs/value_converters.md
65
- - ext/smarter_csv/Makefile
66
65
  - ext/smarter_csv/extconf.rb
67
66
  - ext/smarter_csv/smarter_csv.c
68
67
  - images/SmarterCSV_1.16.0_vs_RubyCSV_3.3.5_speedup.png
@@ -110,7 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
110
109
  - !ruby/object:Gem::Version
111
110
  version: '0'
112
111
  requirements: []
113
- rubygems_version: 4.0.6
112
+ rubygems_version: 4.0.11
114
113
  specification_version: 4
115
114
  summary: Fastest end-to-end CSV ingestion for Ruby with smart defaults and Rails-ready
116
115
  hash output
@@ -1,270 +0,0 @@
1
-
2
- SHELL = /bin/sh
3
-
4
- # V=0 quiet, V=1 verbose. other values don't work.
5
- V = 0
6
- V0 = $(V:0=)
7
- Q1 = $(V:1=)
8
- Q = $(Q1:0=@)
9
- ECHO1 = $(V:1=@ :)
10
- ECHO = $(ECHO1:0=@ echo)
11
- NULLCMD = :
12
-
13
- #### Start of system configuration section. ####
14
-
15
- srcdir = .
16
- topdir = /Users/tilo/.rvm/rubies/ruby-3.2.2/include/ruby-3.2.0
17
- hdrdir = $(topdir)
18
- arch_hdrdir = /Users/tilo/.rvm/rubies/ruby-3.2.2/include/ruby-3.2.0/arm64-darwin23
19
- PATH_SEPARATOR = :
20
- VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
21
- prefix = $(DESTDIR)/Users/tilo/.rvm/rubies/ruby-3.2.2
22
- rubysitearchprefix = $(rubylibprefix)/$(sitearch)
23
- rubyarchprefix = $(rubylibprefix)/$(arch)
24
- rubylibprefix = $(libdir)/$(RUBY_BASE_NAME)
25
- exec_prefix = $(prefix)
26
- vendorarchhdrdir = $(vendorhdrdir)/$(sitearch)
27
- sitearchhdrdir = $(sitehdrdir)/$(sitearch)
28
- rubyarchhdrdir = $(rubyhdrdir)/$(arch)
29
- vendorhdrdir = $(rubyhdrdir)/vendor_ruby
30
- sitehdrdir = $(rubyhdrdir)/site_ruby
31
- rubyhdrdir = $(includedir)/$(RUBY_VERSION_NAME)
32
- vendorarchdir = $(vendorlibdir)/$(sitearch)
33
- vendorlibdir = $(vendordir)/$(ruby_version)
34
- vendordir = $(rubylibprefix)/vendor_ruby
35
- sitearchdir = $(sitelibdir)/$(sitearch)
36
- sitelibdir = $(sitedir)/$(ruby_version)
37
- sitedir = $(rubylibprefix)/site_ruby
38
- rubyarchdir = $(rubylibdir)/$(arch)
39
- rubylibdir = $(rubylibprefix)/$(ruby_version)
40
- sitearchincludedir = $(includedir)/$(sitearch)
41
- archincludedir = $(includedir)/$(arch)
42
- sitearchlibdir = $(libdir)/$(sitearch)
43
- archlibdir = $(libdir)/$(arch)
44
- ridir = $(datarootdir)/$(RI_BASE_NAME)
45
- mandir = $(datarootdir)/man
46
- localedir = $(datarootdir)/locale
47
- libdir = $(exec_prefix)/lib
48
- psdir = $(docdir)
49
- pdfdir = $(docdir)
50
- dvidir = $(docdir)
51
- htmldir = $(docdir)
52
- infodir = $(datarootdir)/info
53
- docdir = $(datarootdir)/doc/$(PACKAGE)
54
- oldincludedir = $(DESTDIR)/usr/include
55
- includedir = $(SDKROOT)$(prefix)/include
56
- runstatedir = $(localstatedir)/run
57
- localstatedir = $(prefix)/var
58
- sharedstatedir = $(prefix)/com
59
- sysconfdir = $(prefix)/etc
60
- datadir = $(datarootdir)
61
- datarootdir = $(prefix)/share
62
- libexecdir = $(exec_prefix)/libexec
63
- sbindir = $(exec_prefix)/sbin
64
- bindir = $(exec_prefix)/bin
65
- archdir = $(rubyarchdir)
66
-
67
-
68
- CC_WRAPPER =
69
- CC = gcc
70
- CXX = g++
71
- LIBRUBY = $(LIBRUBY_SO)
72
- LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
73
- LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
74
- LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static -framework CoreFoundation $(MAINLIBS)
75
- empty =
76
- OUTFLAG = -o $(empty)
77
- COUTFLAG = -o $(empty)
78
- CSRCFLAG = $(empty)
79
-
80
- RUBY_EXTCONF_H =
81
- cflags = -fdeclspec $(optflags) $(debugflags) $(warnflags)
82
- cxxflags =
83
- optflags = -O3
84
- debugflags = -ggdb3
85
- warnflags = -Wall -Wextra -Wextra-tokens -Wdeprecated-declarations -Wdivision-by-zero -Wdiv-by-zero -Wimplicit-function-declaration -Wimplicit-int -Wmisleading-indentation -Wpointer-arith -Wshorten-64-to-32 -Wwrite-strings -Wold-style-definition -Wmissing-noreturn -Wno-cast-function-type -Wno-constant-logical-operand -Wno-long-long -Wno-missing-field-initializers -Wno-overlength-strings -Wno-parentheses-equality -Wno-self-assign -Wno-tautological-compare -Wno-unused-parameter -Wno-unused-value -Wunused-variable -Wundef
86
- cppflags =
87
- CCDLFLAGS = -fno-common
88
- CFLAGS = $(CCDLFLAGS) -O3 -I/opt/homebrew/opt/libyaml/include -I/opt/homebrew/opt/libksba/include -I/opt/homebrew/opt/readline/include -I/opt/homebrew/opt/zlib/include -I/opt/homebrew/opt/openssl@1.1/include $(cflags) -fno-common -pipe $(ARCH_FLAG)
89
- INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
90
- DEFS =
91
- CPPFLAGS = -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -D_DARWIN_UNLIMITED_SELECT -D_REENTRANT $(DEFS) $(cppflags)
92
- CXXFLAGS = $(CCDLFLAGS) -fdeclspec $(ARCH_FLAG)
93
- ldflags = -L. -L/opt/homebrew/opt/libyaml/lib -L/opt/homebrew/opt/libksba/lib -L/opt/homebrew/opt/readline/lib -L/opt/homebrew/opt/zlib/lib -L/opt/homebrew/opt/openssl@1.1/lib -fstack-protector-strong
94
- dldflags = -L/opt/homebrew/opt/libyaml/lib -L/opt/homebrew/opt/libksba/lib -L/opt/homebrew/opt/readline/lib -L/opt/homebrew/opt/zlib/lib -L/opt/homebrew/opt/openssl@1.1/lib -Wl,-undefined,dynamic_lookup $(LIBRUBYARG_SHARED)
95
- ARCH_FLAG =
96
- DLDFLAGS = $(ldflags) $(dldflags) $(ARCH_FLAG)
97
- LDSHARED = $(CC) -dynamic -bundle
98
- LDSHAREDXX = $(CXX) -dynamic -bundle
99
- AR = ar
100
- EXEEXT =
101
-
102
- RUBY_INSTALL_NAME = $(RUBY_BASE_NAME)
103
- RUBY_SO_NAME = ruby.3.2
104
- RUBYW_INSTALL_NAME =
105
- RUBY_VERSION_NAME = $(RUBY_BASE_NAME)-$(ruby_version)
106
- RUBYW_BASE_NAME = rubyw
107
- RUBY_BASE_NAME = ruby
108
-
109
- arch = arm64-darwin23
110
- sitearch = $(arch)
111
- ruby_version = 3.2.0
112
- ruby = $(bindir)/$(RUBY_BASE_NAME)
113
- RUBY = $(ruby)
114
- BUILTRUBY = $(bindir)/$(RUBY_BASE_NAME)
115
- ruby_headers = $(hdrdir)/ruby.h $(hdrdir)/ruby/backward.h $(hdrdir)/ruby/ruby.h $(hdrdir)/ruby/defines.h $(hdrdir)/ruby/missing.h $(hdrdir)/ruby/intern.h $(hdrdir)/ruby/st.h $(hdrdir)/ruby/subst.h $(arch_hdrdir)/ruby/config.h
116
-
117
- RM = rm -f
118
- RM_RF = rm -fr
119
- RMDIRS = rmdir -p
120
- MAKEDIRS = /opt/homebrew/opt/coreutils/bin/gmkdir -p
121
- INSTALL = /opt/homebrew/opt/coreutils/bin/ginstall -c
122
- INSTALL_PROG = $(INSTALL) -m 0755
123
- INSTALL_DATA = $(INSTALL) -m 644
124
- COPY = cp
125
- TOUCH = exit >
126
-
127
- #### End of system configuration section. ####
128
-
129
- preload =
130
- libpath = . $(libdir)
131
- LIBPATH = -L. -L$(libdir)
132
- DEFFILE =
133
-
134
- CLEANFILES = mkmf.log
135
- DISTCLEANFILES =
136
- DISTCLEANDIRS =
137
-
138
- extout =
139
- extout_prefix =
140
- target_prefix = /smarter_csv
141
- LOCAL_LIBS =
142
- LIBS = $(LIBRUBYARG_SHARED) -lpthread
143
- ORIG_SRCS = smarter_csv.c
144
- SRCS = $(ORIG_SRCS)
145
- OBJS = smarter_csv.o
146
- HDRS =
147
- LOCAL_HDRS =
148
- TARGET = smarter_csv
149
- TARGET_NAME = smarter_csv
150
- TARGET_ENTRY = Init_$(TARGET_NAME)
151
- DLLIB = $(TARGET).bundle
152
- EXTSTATIC =
153
- STATIC_LIB =
154
-
155
- TIMESTAMP_DIR = .
156
- BINDIR = $(bindir)
157
- RUBYCOMMONDIR = $(sitedir)$(target_prefix)
158
- RUBYLIBDIR = $(sitelibdir)$(target_prefix)
159
- RUBYARCHDIR = $(sitearchdir)$(target_prefix)
160
- HDRDIR = $(sitehdrdir)$(target_prefix)
161
- ARCHHDRDIR = $(sitearchhdrdir)$(target_prefix)
162
- TARGET_SO_DIR =
163
- TARGET_SO = $(TARGET_SO_DIR)$(DLLIB)
164
- CLEANLIBS = $(TARGET_SO) $(TARGET_SO).dSYM
165
- CLEANOBJS = $(OBJS) *.bak
166
- TARGET_SO_DIR_TIMESTAMP = $(TIMESTAMP_DIR)/.sitearchdir.-.smarter_csv.time
167
-
168
- all: $(DLLIB)
169
- static: $(STATIC_LIB)
170
- .PHONY: all install static install-so install-rb
171
- .PHONY: clean clean-so clean-static clean-rb
172
-
173
- clean-static::
174
- clean-rb-default::
175
- clean-rb::
176
- clean-so::
177
- clean: clean-so clean-static clean-rb-default clean-rb
178
- -$(Q)$(RM_RF) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES) .*.time
179
-
180
- distclean-rb-default::
181
- distclean-rb::
182
- distclean-so::
183
- distclean-static::
184
- distclean: clean distclean-so distclean-static distclean-rb-default distclean-rb
185
- -$(Q)$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
186
- -$(Q)$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
187
- -$(Q)$(RMDIRS) $(DISTCLEANDIRS) 2> /dev/null || true
188
-
189
- realclean: distclean
190
- install: install-so install-rb
191
-
192
- install-so: $(DLLIB) $(TARGET_SO_DIR_TIMESTAMP)
193
- $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
194
- clean-static::
195
- -$(Q)$(RM) $(STATIC_LIB)
196
- install-rb: pre-install-rb do-install-rb install-rb-default
197
- install-rb-default: pre-install-rb-default do-install-rb-default
198
- pre-install-rb: Makefile
199
- pre-install-rb-default: Makefile
200
- do-install-rb:
201
- do-install-rb-default:
202
- pre-install-rb-default:
203
- @$(NULLCMD)
204
- $(TARGET_SO_DIR_TIMESTAMP):
205
- $(Q) $(MAKEDIRS) $(@D) $(RUBYARCHDIR)
206
- $(Q) $(TOUCH) $@
207
-
208
- site-install: site-install-so site-install-rb
209
- site-install-so: install-so
210
- site-install-rb: install-rb
211
-
212
- .SUFFIXES: .c .m .cc .mm .cxx .cpp .o .S
213
-
214
- .cc.o:
215
- $(ECHO) compiling $(<)
216
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
217
-
218
- .cc.S:
219
- $(ECHO) translating $(<)
220
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
221
-
222
- .mm.o:
223
- $(ECHO) compiling $(<)
224
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
225
-
226
- .mm.S:
227
- $(ECHO) translating $(<)
228
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
229
-
230
- .cxx.o:
231
- $(ECHO) compiling $(<)
232
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
233
-
234
- .cxx.S:
235
- $(ECHO) translating $(<)
236
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
237
-
238
- .cpp.o:
239
- $(ECHO) compiling $(<)
240
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
241
-
242
- .cpp.S:
243
- $(ECHO) translating $(<)
244
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
245
-
246
- .c.o:
247
- $(ECHO) compiling $(<)
248
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
249
-
250
- .c.S:
251
- $(ECHO) translating $(<)
252
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
253
-
254
- .m.o:
255
- $(ECHO) compiling $(<)
256
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
257
-
258
- .m.S:
259
- $(ECHO) translating $(<)
260
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
261
-
262
- $(TARGET_SO): $(OBJS) Makefile
263
- $(ECHO) linking shared-object smarter_csv/$(DLLIB)
264
- -$(Q)$(RM) $(@)
265
- $(Q) $(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
266
- $(Q) $(POSTLINK)
267
-
268
-
269
-
270
- $(OBJS): $(HDRS) $(ruby_headers)