smarter_csv 1.16.5 → 1.16.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/CONTRIBUTORS.md +2 -1
- data/README.md +1 -1
- data/Rakefile +7 -2
- data/ext/smarter_csv/extconf.rb +9 -0
- data/ext/smarter_csv/smarter_csv.c +111 -53
- data/lib/smarter_csv/parser.rb +36 -10
- data/lib/smarter_csv/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6e900772903374e904003b0e2219542eed5889619682133731ac15c1392dfcd2
|
|
4
|
+
data.tar.gz: '03039f42cd4bd03bc8353047a888ef8ff510216de887fa62b0c2bcd8614c2a52'
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: af60ffbef48ddaeb0a508624572be722d5ec8c748cbf3519fd277161287ffe406e8fa0209f1b9d21eedad36aa9cf7587dc8dbf6939cf346bce54dfa188c19756
|
|
7
|
+
data.tar.gz: 798f10a02f1add2f42e4a14d5e6abb60c13cdbfe1ade2e3b44f95068503c3827faaf5aa29fcf38eb6031f9ce3f0e68db3cb26b5df8a4afeb50a1a020917b6338
|
data/CHANGELOG.md
CHANGED
|
@@ -1,6 +1,16 @@
|
|
|
1
1
|
|
|
2
2
|
# SmarterCSV 1.x Change Log
|
|
3
3
|
|
|
4
|
+
## 1.16.6 (2026-05-21)
|
|
5
|
+
|
|
6
|
+
RSpec tests: **1,467 → 1,591** (+124 tests)
|
|
7
|
+
|
|
8
|
+
### Bug Fix
|
|
9
|
+
|
|
10
|
+
- fixed [Issue #334](https://github.com/tilo/smarter_csv/issues/334) with escaped double quote followed by comma. Thanks to [conorg](https://github.com/conorg)
|
|
11
|
+
- fixed bug when using `headers: { except: }`
|
|
12
|
+
- added more tests
|
|
13
|
+
|
|
4
14
|
## 1.16.5 (2026-05-18)
|
|
5
15
|
|
|
6
16
|
### Bug Fix
|
data/CONTRIBUTORS.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# A Big Thank You to all
|
|
1
|
+
# A Big Thank You to all 64 Contributors!!
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
A Big Thank you to everyone who filed issues, sent comments, and who contributed with pull requests:
|
|
@@ -66,3 +66,4 @@ A Big Thank you to everyone who filed issues, sent comments, and who contributed
|
|
|
66
66
|
* [Dom Lebron](https://github.com/biglebronski)
|
|
67
67
|
* [Paho Lurie-Gregg](https://github.com/paholg)
|
|
68
68
|
* [Jonas Staškevičius](https://github.com/pirminis)
|
|
69
|
+
* [conorg](https://github.com/conorg)
|
data/README.md
CHANGED
|
@@ -249,7 +249,7 @@ For reporting issues, please:
|
|
|
249
249
|
* open a pull-request adding a test that demonstrates the issue
|
|
250
250
|
* mention your version of SmarterCSV, Ruby, Rails
|
|
251
251
|
|
|
252
|
-
# [A Special Thanks to all
|
|
252
|
+
# [A Special Thanks to all 64 Contributors!](CONTRIBUTORS.md) 🎉🎉🎉
|
|
253
253
|
|
|
254
254
|
|
|
255
255
|
## Contributing
|
data/Rakefile
CHANGED
|
@@ -21,8 +21,13 @@ RuboCop::RakeTask.new
|
|
|
21
21
|
|
|
22
22
|
require "rake/extensiontask"
|
|
23
23
|
|
|
24
|
-
if RUBY_ENGINE
|
|
25
|
-
|
|
24
|
+
if RUBY_ENGINE != 'ruby'
|
|
25
|
+
# Non-MRI (JRuby, TruffleRuby, ...): there is no C extension to build. Define the
|
|
26
|
+
# native-build tasks as no-ops so the same default task list works on every engine
|
|
27
|
+
# and any caller (CI, rake-compiler, downstream) succeeds without trying to build or
|
|
28
|
+
# copy a .so. Runtime uses the pure-Ruby parser.
|
|
29
|
+
task :compile # no-op
|
|
30
|
+
task :clobber # no-op
|
|
26
31
|
task default: %i[spec]
|
|
27
32
|
|
|
28
33
|
else
|
data/ext/smarter_csv/extconf.rb
CHANGED
|
@@ -3,6 +3,15 @@
|
|
|
3
3
|
require 'mkmf'
|
|
4
4
|
require "rbconfig"
|
|
5
5
|
|
|
6
|
+
# On non-MRI Rubies (JRuby, TruffleRuby, ...) there is no C extension to build, and trying to build
|
|
7
|
+
# it breaks `gem install` for anything that depends on smarter_csv. Write a no-op Makefile so install
|
|
8
|
+
# succeeds, then stop. At runtime SmarterCSV falls back to its pure-Ruby parser (it checks whether the
|
|
9
|
+
# C functions actually loaded via respond_to?(:parse_csv_line_c)).
|
|
10
|
+
if RUBY_ENGINE != 'ruby'
|
|
11
|
+
File.write('Makefile', dummy_makefile($srcdir).join)
|
|
12
|
+
exit 0
|
|
13
|
+
end
|
|
14
|
+
|
|
6
15
|
if RbConfig::MAKEFILE_CONFIG["CFLAGS"].include?("-g -O3")
|
|
7
16
|
fixed_CFLAGS = RbConfig::MAKEFILE_CONFIG["CFLAGS"].sub("-g -O3", "-O3 $(cflags)")
|
|
8
17
|
puts("Fix CFLAGS: #{RbConfig::MAKEFILE_CONFIG["CFLAGS"]} -> #{fixed_CFLAGS}")
|
|
@@ -304,25 +304,40 @@ static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quot
|
|
|
304
304
|
if (!allow_escaped_quotes || backslash_count % 2 == 0) {
|
|
305
305
|
if (__builtin_expect(quote_boundary_standard, 1)) {
|
|
306
306
|
if (in_quotes) {
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
307
|
+
if (p + 2 < endP && *(p + 1) == quote_char_val) {
|
|
308
|
+
/* RFC doubled quote inside a quoted field ("" → ").
|
|
309
|
+
* Give this precedence over the closing-quote check, but only
|
|
310
|
+
* when another byte follows the doubled pair.
|
|
311
|
+
*
|
|
312
|
+
* Compatibility note: we intentionally do NOT force terminal
|
|
313
|
+
* "" to be consumed here. SmarterCSV has a long-standing lenient
|
|
314
|
+
* behavior for malformed tails like ...\"" in :double_quotes mode:
|
|
315
|
+
* the final quote may still close the field instead of turning the
|
|
316
|
+
* row into an unclosed-quote error. Issue #334 needs doubled-quote
|
|
317
|
+
* precedence for ..."",... (more content follows), but we keep the
|
|
318
|
+
* historical leniency for terminal ..."". */
|
|
319
|
+
p++;
|
|
320
|
+
} else {
|
|
321
|
+
// closing quote: only valid if followed by col_sep, row_sep, or end of line
|
|
322
|
+
bool valid_close = (p + 1 >= endP);
|
|
323
|
+
if (!valid_close) {
|
|
324
|
+
valid_close = true;
|
|
325
|
+
for (long j = 0; j < col_sep_len; j++) {
|
|
326
|
+
if (*(p + 1 + j) != *(col_sepP + j)) { valid_close = false; break; }
|
|
327
|
+
}
|
|
313
328
|
}
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
329
|
+
if (!valid_close && row_sep_len > 0) {
|
|
330
|
+
valid_close = true;
|
|
331
|
+
for (long j = 0; j < row_sep_len; j++) {
|
|
332
|
+
if (*(p + 1 + j) != *(row_sepP + j)) { valid_close = false; break; }
|
|
333
|
+
}
|
|
319
334
|
}
|
|
335
|
+
if (valid_close) {
|
|
336
|
+
in_quotes = false;
|
|
337
|
+
field_started = true;
|
|
338
|
+
}
|
|
339
|
+
// else: quote inside quoted field → literal
|
|
320
340
|
}
|
|
321
|
-
if (valid_close) {
|
|
322
|
-
in_quotes = false;
|
|
323
|
-
field_started = true;
|
|
324
|
-
}
|
|
325
|
-
// else: quote inside quoted field → literal (handles "" doubling)
|
|
326
341
|
} else if (!field_started) {
|
|
327
342
|
in_quotes = true; // opening quote at field boundary
|
|
328
343
|
field_started = true;
|
|
@@ -763,6 +778,11 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash(VALUE self, VALUE line,
|
|
|
763
778
|
* the frame stays well below 4 KB and ___chkstk_darwin never fires on ARM64 macOS.
|
|
764
779
|
*/
|
|
765
780
|
bool *keep_bitmap = NULL;
|
|
781
|
+
/* In THIS (non-ctx) function the bitmap is alloca'd to headers_len on every call (see the alloca
|
|
782
|
+
* sites below), so keep_bitmap[] is exactly headers_len long and headers_len is the correct bound
|
|
783
|
+
* at all access sites. Do NOT mirror rb_parse_line_to_hash_ctx's keep_bitmap_len here: that variant
|
|
784
|
+
* caches its bitmap across rows (where @headers can grow), so it must use the captured length; this
|
|
785
|
+
* one rebuilds per call and does not. */
|
|
766
786
|
bool keep_extra_columns = true; /* extra cols (> headers_len): keep by default */
|
|
767
787
|
bool has_only = false; /* true when only_headers: filtering is active */
|
|
768
788
|
long early_exit_after = -1; /* column index after which we stop; -1 = no early exit */
|
|
@@ -1081,25 +1101,40 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash(VALUE self, VALUE line,
|
|
|
1081
1101
|
if (!allow_escaped_quotes || backslash_count % 2 == 0) {
|
|
1082
1102
|
if (__builtin_expect(quote_boundary_standard, 1)) {
|
|
1083
1103
|
if (in_quotes) {
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1104
|
+
if (p + 2 < endP && *(p + 1) == quote_char_val) {
|
|
1105
|
+
/* RFC doubled quote inside a quoted field ("" → ").
|
|
1106
|
+
* Give this precedence over the closing-quote check, but only
|
|
1107
|
+
* when another byte follows the doubled pair.
|
|
1108
|
+
*
|
|
1109
|
+
* Compatibility note: we intentionally do NOT force terminal
|
|
1110
|
+
* "" to be consumed here. SmarterCSV has a long-standing lenient
|
|
1111
|
+
* behavior for malformed tails like ...\"" in :double_quotes mode:
|
|
1112
|
+
* the final quote may still close the field instead of turning the
|
|
1113
|
+
* row into an unclosed-quote error. Issue #334 needs doubled-quote
|
|
1114
|
+
* precedence for ..."",... (more content follows), but we keep the
|
|
1115
|
+
* historical leniency for terminal ..."". */
|
|
1116
|
+
p++;
|
|
1117
|
+
} else {
|
|
1118
|
+
// closing quote: only valid if followed by col_sep, row_sep, or end of line
|
|
1119
|
+
bool valid_close = (p + 1 >= endP);
|
|
1120
|
+
if (!valid_close) {
|
|
1121
|
+
valid_close = true;
|
|
1122
|
+
for (long j = 0; j < col_sep_len; j++) {
|
|
1123
|
+
if (*(p + 1 + j) != *(col_sepP + j)) { valid_close = false; break; }
|
|
1124
|
+
}
|
|
1090
1125
|
}
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1126
|
+
if (!valid_close && row_sep_len2 > 0) {
|
|
1127
|
+
valid_close = true;
|
|
1128
|
+
for (long j = 0; j < row_sep_len2; j++) {
|
|
1129
|
+
if (*(p + 1 + j) != *(row_sepP2 + j)) { valid_close = false; break; }
|
|
1130
|
+
}
|
|
1096
1131
|
}
|
|
1132
|
+
if (valid_close) {
|
|
1133
|
+
in_quotes = false;
|
|
1134
|
+
field_started = true;
|
|
1135
|
+
}
|
|
1136
|
+
// else: quote inside quoted field → literal
|
|
1097
1137
|
}
|
|
1098
|
-
if (valid_close) {
|
|
1099
|
-
in_quotes = false;
|
|
1100
|
-
field_started = true;
|
|
1101
|
-
}
|
|
1102
|
-
// else: quote inside quoted field → literal (handles "" doubling)
|
|
1103
1138
|
} else if (!field_started) {
|
|
1104
1139
|
in_quotes = true; // opening quote at field boundary
|
|
1105
1140
|
field_started = true;
|
|
@@ -1429,6 +1464,14 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
|
|
|
1429
1464
|
int numeric_mode = ctx->numeric_mode;
|
|
1430
1465
|
VALUE numeric_keys = ctx->numeric_keys;
|
|
1431
1466
|
bool *keep_bitmap = ctx->keep_bitmap;
|
|
1467
|
+
/* keep_bitmap is cached in the context (xmalloc'd once at construction, sized to the header count
|
|
1468
|
+
* THEN). @headers can grow in place as undeclared extra columns appear, so the live headers_len
|
|
1469
|
+
* (re-read each call below) may exceed the bitmap's length. Every keep_bitmap[] access in this
|
|
1470
|
+
* function MUST be bounded by keep_bitmap_len, never headers_len — indices past the bitmap are
|
|
1471
|
+
* extra columns and follow keep_extra_columns. Bounding by the grown headers_len was an
|
|
1472
|
+
* out-of-bounds heap read (the bug). The sibling rb_parse_line_to_hash safely uses headers_len
|
|
1473
|
+
* because it re-allocs its bitmap to headers_len on every call. */
|
|
1474
|
+
long keep_bitmap_len = ctx->keep_bitmap_len;
|
|
1432
1475
|
bool keep_extra_columns = ctx->keep_extra_columns;
|
|
1433
1476
|
long early_exit_after = ctx->early_exit_after;
|
|
1434
1477
|
|
|
@@ -1530,7 +1573,7 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
|
|
|
1530
1573
|
while (trim_end >= trim_start && (*trim_end == ' ' || *trim_end == '\t')) trim_end--;
|
|
1531
1574
|
}
|
|
1532
1575
|
long trimmed_len = (trim_end >= trim_start) ? (trim_end - trim_start + 1) : 0;
|
|
1533
|
-
if (!keep_bitmap || (element_count <
|
|
1576
|
+
if (!keep_bitmap || (element_count < keep_bitmap_len ? keep_bitmap[element_count] : keep_extra_columns)) {
|
|
1534
1577
|
if (insert_field_into_hash(&xform, trim_start, trimmed_len, element_count, false, quote_char_val, encoding))
|
|
1535
1578
|
all_blank = false;
|
|
1536
1579
|
}
|
|
@@ -1551,7 +1594,7 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
|
|
|
1551
1594
|
while (trim_end >= trim_start && (*trim_end == ' ' || *trim_end == '\t')) trim_end--;
|
|
1552
1595
|
}
|
|
1553
1596
|
long trimmed_len = (trim_end >= trim_start) ? (trim_end - trim_start + 1) : 0;
|
|
1554
|
-
if (!keep_bitmap || (element_count <
|
|
1597
|
+
if (!keep_bitmap || (element_count < keep_bitmap_len ? keep_bitmap[element_count] : keep_extra_columns)) {
|
|
1555
1598
|
if (insert_field_into_hash(&xform, trim_start, trimmed_len, element_count, false, quote_char_val, encoding))
|
|
1556
1599
|
all_blank = false;
|
|
1557
1600
|
}
|
|
@@ -1614,7 +1657,7 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
|
|
|
1614
1657
|
|
|
1615
1658
|
bool has_embedded_quotes = quoted || (trimmed_len > 0 && memchr(trim_start, quote_char_val, trimmed_len));
|
|
1616
1659
|
|
|
1617
|
-
if (!keep_bitmap || (element_count <
|
|
1660
|
+
if (!keep_bitmap || (element_count < keep_bitmap_len ? keep_bitmap[element_count] : keep_extra_columns)) {
|
|
1618
1661
|
if (insert_field_into_hash(&xform, trim_start, trimmed_len, element_count, has_embedded_quotes, quote_char_val, encoding))
|
|
1619
1662
|
all_blank = false;
|
|
1620
1663
|
}
|
|
@@ -1648,25 +1691,40 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
|
|
|
1648
1691
|
if (!allow_escaped_quotes || backslash_count % 2 == 0) {
|
|
1649
1692
|
if (__builtin_expect(quote_boundary_standard, 1)) {
|
|
1650
1693
|
if (in_quotes) {
|
|
1651
|
-
|
|
1652
|
-
|
|
1653
|
-
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
|
|
1694
|
+
if (p + 2 < endP && *(p + 1) == quote_char_val) {
|
|
1695
|
+
/* RFC doubled quote inside a quoted field ("" → ").
|
|
1696
|
+
* Give this precedence over the closing-quote check, but only
|
|
1697
|
+
* when another byte follows the doubled pair.
|
|
1698
|
+
*
|
|
1699
|
+
* Compatibility note: we intentionally do NOT force terminal
|
|
1700
|
+
* "" to be consumed here. SmarterCSV has a long-standing lenient
|
|
1701
|
+
* behavior for malformed tails like ...\"" in :double_quotes mode:
|
|
1702
|
+
* the final quote may still close the field instead of turning the
|
|
1703
|
+
* row into an unclosed-quote error. Issue #334 needs doubled-quote
|
|
1704
|
+
* precedence for ..."",... (more content follows), but we keep the
|
|
1705
|
+
* historical leniency for terminal ..."". */
|
|
1706
|
+
p++;
|
|
1707
|
+
} else {
|
|
1708
|
+
/* closing quote: only valid if followed by col_sep, row_sep, or end */
|
|
1709
|
+
bool valid_close = (p + 1 >= endP);
|
|
1710
|
+
if (!valid_close) {
|
|
1711
|
+
valid_close = true;
|
|
1712
|
+
for (long j = 0; j < col_sep_len; j++) {
|
|
1713
|
+
if (*(p + 1 + j) != *(col_sepP + j)) { valid_close = false; break; }
|
|
1714
|
+
}
|
|
1657
1715
|
}
|
|
1658
|
-
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
|
|
1716
|
+
if (!valid_close && row_sep_len2 > 0) {
|
|
1717
|
+
valid_close = true;
|
|
1718
|
+
for (long j = 0; j < row_sep_len2; j++) {
|
|
1719
|
+
if (*(p + 1 + j) != *(row_sepP2 + j)) { valid_close = false; break; }
|
|
1720
|
+
}
|
|
1663
1721
|
}
|
|
1722
|
+
if (valid_close) {
|
|
1723
|
+
in_quotes = false;
|
|
1724
|
+
field_started = true;
|
|
1725
|
+
}
|
|
1726
|
+
/* else: quote inside quoted field → literal */
|
|
1664
1727
|
}
|
|
1665
|
-
if (valid_close) {
|
|
1666
|
-
in_quotes = false;
|
|
1667
|
-
field_started = true;
|
|
1668
|
-
}
|
|
1669
|
-
/* else: quote inside quoted field → literal (handles "" doubling) */
|
|
1670
1728
|
} else if (!field_started) {
|
|
1671
1729
|
in_quotes = true; /* opening quote at field boundary */
|
|
1672
1730
|
field_started = true;
|
|
@@ -1725,7 +1783,7 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
|
|
|
1725
1783
|
|
|
1726
1784
|
bool has_embedded_quotes = quoted || (trimmed_len > 0 && memchr(trim_start, quote_char_val, trimmed_len));
|
|
1727
1785
|
|
|
1728
|
-
if (!keep_bitmap || (element_count <
|
|
1786
|
+
if (!keep_bitmap || (element_count < keep_bitmap_len ? keep_bitmap[element_count] : keep_extra_columns)) {
|
|
1729
1787
|
if (insert_field_into_hash(&xform, trim_start, trimmed_len, element_count, has_embedded_quotes, quote_char_val, encoding))
|
|
1730
1788
|
all_blank = false;
|
|
1731
1789
|
}
|
|
@@ -1753,7 +1811,7 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
|
|
|
1753
1811
|
if (!remove_empty_values) {
|
|
1754
1812
|
ensure_hash_allocated(&xform);
|
|
1755
1813
|
for (long i = element_count; i < headers_len; i++) {
|
|
1756
|
-
if (!keep_bitmap || keep_bitmap[i]) {
|
|
1814
|
+
if (!keep_bitmap || (i < keep_bitmap_len ? keep_bitmap[i] : keep_extra_columns)) {
|
|
1757
1815
|
rb_hash_aset(xform.hash, rb_ary_entry(headers, i), Qnil);
|
|
1758
1816
|
}
|
|
1759
1817
|
}
|
data/lib/smarter_csv/parser.rb
CHANGED
|
@@ -405,15 +405,28 @@ module SmarterCSV
|
|
|
405
405
|
if !allow_escaped_quotes || backslash_count % 2 == 0
|
|
406
406
|
if quote_boundary_standard
|
|
407
407
|
if in_quotes
|
|
408
|
-
# closing quote: only valid if followed by col_sep, row_sep, or end of line
|
|
409
408
|
next_i = i + 1
|
|
410
|
-
if next_i
|
|
411
|
-
|
|
412
|
-
|
|
409
|
+
if next_i + 1 < bytesize && line.getbyte(next_i) == quote_byte
|
|
410
|
+
# RFC doubled quote inside a quoted field ("" → ").
|
|
411
|
+
# Give this precedence over the closing-quote check, but only
|
|
412
|
+
# when another byte follows the doubled pair.
|
|
413
|
+
#
|
|
414
|
+
# Compatibility note: we intentionally do NOT force terminal
|
|
415
|
+
# "" to be consumed here. SmarterCSV has a long-standing lenient
|
|
416
|
+
# behavior for malformed tails like ...\"" in :double_quotes mode:
|
|
417
|
+
# the final quote may still close the field instead of turning the
|
|
418
|
+
# row into an unclosed-quote error. Issue #334 needs doubled-quote
|
|
419
|
+
# precedence for ..."",... (more content follows), but we keep the
|
|
420
|
+
# historical leniency for terminal ..."".
|
|
421
|
+
i = next_i
|
|
422
|
+
# closing quote: only valid if followed by col_sep, row_sep, or end of line
|
|
423
|
+
elsif next_i >= bytesize ||
|
|
424
|
+
line.getbyte(next_i) == col_sep_byte ||
|
|
425
|
+
(row_sep_bytesize > 0 && line.byteslice(next_i, row_sep_bytesize) == row_sep)
|
|
413
426
|
in_quotes = false
|
|
414
427
|
field_started = true
|
|
415
428
|
end
|
|
416
|
-
# else: quote inside quoted field → literal
|
|
429
|
+
# else: quote inside quoted field → literal
|
|
417
430
|
elsif !field_started # at field boundary: open quoted field
|
|
418
431
|
in_quotes = true
|
|
419
432
|
field_started = true
|
|
@@ -510,15 +523,28 @@ module SmarterCSV
|
|
|
510
523
|
if !allow_escaped_quotes || backslash_count % 2 == 0
|
|
511
524
|
if quote_boundary_standard
|
|
512
525
|
if in_quotes
|
|
513
|
-
# closing quote: only valid if followed by col_sep, row_sep, or end of line
|
|
514
526
|
next_i = i + 1
|
|
515
|
-
if next_i
|
|
516
|
-
|
|
517
|
-
|
|
527
|
+
if next_i + 1 < line_size && line[next_i] == quote
|
|
528
|
+
# RFC doubled quote inside a quoted field ("" → ").
|
|
529
|
+
# Give this precedence over the closing-quote check, but only
|
|
530
|
+
# when another character follows the doubled pair.
|
|
531
|
+
#
|
|
532
|
+
# Compatibility note: we intentionally do NOT force terminal
|
|
533
|
+
# "" to be consumed here. SmarterCSV has a long-standing lenient
|
|
534
|
+
# behavior for malformed tails like ...\"" in :double_quotes mode:
|
|
535
|
+
# the final quote may still close the field instead of turning the
|
|
536
|
+
# row into an unclosed-quote error. Issue #334 needs doubled-quote
|
|
537
|
+
# precedence for ..."",... (more content follows), but we keep the
|
|
538
|
+
# historical leniency for terminal ..."".
|
|
539
|
+
i = next_i
|
|
540
|
+
# closing quote: only valid if followed by col_sep, row_sep, or end of line
|
|
541
|
+
elsif next_i >= line_size ||
|
|
542
|
+
line[next_i...next_i + col_sep_size] == col_sep ||
|
|
543
|
+
(row_sep_size > 0 && line[next_i...next_i + row_sep_size] == row_sep)
|
|
518
544
|
in_quotes = false
|
|
519
545
|
field_started = true
|
|
520
546
|
end
|
|
521
|
-
# else: quote inside quoted field → literal
|
|
547
|
+
# else: quote inside quoted field → literal
|
|
522
548
|
elsif !field_started # at field boundary: open quoted field
|
|
523
549
|
in_quotes = true
|
|
524
550
|
field_started = true
|
data/lib/smarter_csv/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: smarter_csv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.16.
|
|
4
|
+
version: 1.16.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Tilo Sloboda
|
|
8
8
|
bindir: bin
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date: 2026-05-
|
|
10
|
+
date: 2026-05-21 00:00:00.000000000 Z
|
|
11
11
|
dependencies: []
|
|
12
12
|
description: |
|
|
13
13
|
SmarterCSV is a high-performance CSV reader and writer for Ruby focused on
|