smarter_csv 1.16.6 → 1.17.0.pre5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,270 @@
1
+
2
+ SHELL = /bin/sh
3
+
4
+ # V=0 quiet, V=1 verbose. other values don't work.
5
+ V = 0
6
+ V0 = $(V:0=)
7
+ Q1 = $(V:1=)
8
+ Q = $(Q1:0=@)
9
+ ECHO1 = $(V:1=@ :)
10
+ ECHO = $(ECHO1:0=@ echo)
11
+ NULLCMD = :
12
+
13
+ #### Start of system configuration section. ####
14
+
15
+ srcdir = .
16
+ topdir = /Users/tilo/.rvm/rubies/ruby-3.2.2/include/ruby-3.2.0
17
+ hdrdir = $(topdir)
18
+ arch_hdrdir = /Users/tilo/.rvm/rubies/ruby-3.2.2/include/ruby-3.2.0/arm64-darwin23
19
+ PATH_SEPARATOR = :
20
+ VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
21
+ prefix = $(DESTDIR)/Users/tilo/.rvm/rubies/ruby-3.2.2
22
+ rubysitearchprefix = $(rubylibprefix)/$(sitearch)
23
+ rubyarchprefix = $(rubylibprefix)/$(arch)
24
+ rubylibprefix = $(libdir)/$(RUBY_BASE_NAME)
25
+ exec_prefix = $(prefix)
26
+ vendorarchhdrdir = $(vendorhdrdir)/$(sitearch)
27
+ sitearchhdrdir = $(sitehdrdir)/$(sitearch)
28
+ rubyarchhdrdir = $(rubyhdrdir)/$(arch)
29
+ vendorhdrdir = $(rubyhdrdir)/vendor_ruby
30
+ sitehdrdir = $(rubyhdrdir)/site_ruby
31
+ rubyhdrdir = $(includedir)/$(RUBY_VERSION_NAME)
32
+ vendorarchdir = $(vendorlibdir)/$(sitearch)
33
+ vendorlibdir = $(vendordir)/$(ruby_version)
34
+ vendordir = $(rubylibprefix)/vendor_ruby
35
+ sitearchdir = $(sitelibdir)/$(sitearch)
36
+ sitelibdir = $(sitedir)/$(ruby_version)
37
+ sitedir = $(rubylibprefix)/site_ruby
38
+ rubyarchdir = $(rubylibdir)/$(arch)
39
+ rubylibdir = $(rubylibprefix)/$(ruby_version)
40
+ sitearchincludedir = $(includedir)/$(sitearch)
41
+ archincludedir = $(includedir)/$(arch)
42
+ sitearchlibdir = $(libdir)/$(sitearch)
43
+ archlibdir = $(libdir)/$(arch)
44
+ ridir = $(datarootdir)/$(RI_BASE_NAME)
45
+ mandir = $(datarootdir)/man
46
+ localedir = $(datarootdir)/locale
47
+ libdir = $(exec_prefix)/lib
48
+ psdir = $(docdir)
49
+ pdfdir = $(docdir)
50
+ dvidir = $(docdir)
51
+ htmldir = $(docdir)
52
+ infodir = $(datarootdir)/info
53
+ docdir = $(datarootdir)/doc/$(PACKAGE)
54
+ oldincludedir = $(DESTDIR)/usr/include
55
+ includedir = $(SDKROOT)$(prefix)/include
56
+ runstatedir = $(localstatedir)/run
57
+ localstatedir = $(prefix)/var
58
+ sharedstatedir = $(prefix)/com
59
+ sysconfdir = $(prefix)/etc
60
+ datadir = $(datarootdir)
61
+ datarootdir = $(prefix)/share
62
+ libexecdir = $(exec_prefix)/libexec
63
+ sbindir = $(exec_prefix)/sbin
64
+ bindir = $(exec_prefix)/bin
65
+ archdir = $(rubyarchdir)
66
+
67
+
68
+ CC_WRAPPER =
69
+ CC = gcc
70
+ CXX = g++
71
+ LIBRUBY = $(LIBRUBY_SO)
72
+ LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
73
+ LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
74
+ LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static -framework CoreFoundation $(MAINLIBS)
75
+ empty =
76
+ OUTFLAG = -o $(empty)
77
+ COUTFLAG = -o $(empty)
78
+ CSRCFLAG = $(empty)
79
+
80
+ RUBY_EXTCONF_H =
81
+ cflags = -fdeclspec $(optflags) $(debugflags) $(warnflags)
82
+ cxxflags =
83
+ optflags = -O3
84
+ debugflags = -ggdb3
85
+ warnflags = -Wall -Wextra -Wextra-tokens -Wdeprecated-declarations -Wdivision-by-zero -Wdiv-by-zero -Wimplicit-function-declaration -Wimplicit-int -Wmisleading-indentation -Wpointer-arith -Wshorten-64-to-32 -Wwrite-strings -Wold-style-definition -Wmissing-noreturn -Wno-cast-function-type -Wno-constant-logical-operand -Wno-long-long -Wno-missing-field-initializers -Wno-overlength-strings -Wno-parentheses-equality -Wno-self-assign -Wno-tautological-compare -Wno-unused-parameter -Wno-unused-value -Wunused-variable -Wundef
86
+ cppflags =
87
+ CCDLFLAGS = -fno-common
88
+ CFLAGS = $(CCDLFLAGS) -O3 -I/opt/homebrew/opt/libyaml/include -I/opt/homebrew/opt/libksba/include -I/opt/homebrew/opt/readline/include -I/opt/homebrew/opt/zlib/include -I/opt/homebrew/opt/openssl@1.1/include $(cflags) -fno-common -pipe $(ARCH_FLAG)
89
+ INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
90
+ DEFS =
91
+ CPPFLAGS = -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -D_DARWIN_UNLIMITED_SELECT -D_REENTRANT $(DEFS) $(cppflags)
92
+ CXXFLAGS = $(CCDLFLAGS) -fdeclspec $(ARCH_FLAG)
93
+ ldflags = -L. -L/opt/homebrew/opt/libyaml/lib -L/opt/homebrew/opt/libksba/lib -L/opt/homebrew/opt/readline/lib -L/opt/homebrew/opt/zlib/lib -L/opt/homebrew/opt/openssl@1.1/lib -fstack-protector-strong
94
+ dldflags = -L/opt/homebrew/opt/libyaml/lib -L/opt/homebrew/opt/libksba/lib -L/opt/homebrew/opt/readline/lib -L/opt/homebrew/opt/zlib/lib -L/opt/homebrew/opt/openssl@1.1/lib -Wl,-undefined,dynamic_lookup $(LIBRUBYARG_SHARED)
95
+ ARCH_FLAG =
96
+ DLDFLAGS = $(ldflags) $(dldflags) $(ARCH_FLAG)
97
+ LDSHARED = $(CC) -dynamic -bundle
98
+ LDSHAREDXX = $(CXX) -dynamic -bundle
99
+ AR = ar
100
+ EXEEXT =
101
+
102
+ RUBY_INSTALL_NAME = $(RUBY_BASE_NAME)
103
+ RUBY_SO_NAME = ruby.3.2
104
+ RUBYW_INSTALL_NAME =
105
+ RUBY_VERSION_NAME = $(RUBY_BASE_NAME)-$(ruby_version)
106
+ RUBYW_BASE_NAME = rubyw
107
+ RUBY_BASE_NAME = ruby
108
+
109
+ arch = arm64-darwin23
110
+ sitearch = $(arch)
111
+ ruby_version = 3.2.0
112
+ ruby = $(bindir)/$(RUBY_BASE_NAME)
113
+ RUBY = $(ruby)
114
+ BUILTRUBY = $(bindir)/$(RUBY_BASE_NAME)
115
+ ruby_headers = $(hdrdir)/ruby.h $(hdrdir)/ruby/backward.h $(hdrdir)/ruby/ruby.h $(hdrdir)/ruby/defines.h $(hdrdir)/ruby/missing.h $(hdrdir)/ruby/intern.h $(hdrdir)/ruby/st.h $(hdrdir)/ruby/subst.h $(arch_hdrdir)/ruby/config.h
116
+
117
+ RM = rm -f
118
+ RM_RF = rm -fr
119
+ RMDIRS = rmdir -p
120
+ MAKEDIRS = /opt/homebrew/opt/coreutils/bin/gmkdir -p
121
+ INSTALL = /opt/homebrew/opt/coreutils/bin/ginstall -c
122
+ INSTALL_PROG = $(INSTALL) -m 0755
123
+ INSTALL_DATA = $(INSTALL) -m 644
124
+ COPY = cp
125
+ TOUCH = exit >
126
+
127
+ #### End of system configuration section. ####
128
+
129
+ preload =
130
+ libpath = . $(libdir)
131
+ LIBPATH = -L. -L$(libdir)
132
+ DEFFILE =
133
+
134
+ CLEANFILES = mkmf.log
135
+ DISTCLEANFILES =
136
+ DISTCLEANDIRS =
137
+
138
+ extout =
139
+ extout_prefix =
140
+ target_prefix = /smarter_csv
141
+ LOCAL_LIBS =
142
+ LIBS = $(LIBRUBYARG_SHARED) -lpthread
143
+ ORIG_SRCS = smarter_csv.c
144
+ SRCS = $(ORIG_SRCS)
145
+ OBJS = smarter_csv.o
146
+ HDRS =
147
+ LOCAL_HDRS =
148
+ TARGET = smarter_csv
149
+ TARGET_NAME = smarter_csv
150
+ TARGET_ENTRY = Init_$(TARGET_NAME)
151
+ DLLIB = $(TARGET).bundle
152
+ EXTSTATIC =
153
+ STATIC_LIB =
154
+
155
+ TIMESTAMP_DIR = .
156
+ BINDIR = $(bindir)
157
+ RUBYCOMMONDIR = $(sitedir)$(target_prefix)
158
+ RUBYLIBDIR = $(sitelibdir)$(target_prefix)
159
+ RUBYARCHDIR = $(sitearchdir)$(target_prefix)
160
+ HDRDIR = $(sitehdrdir)$(target_prefix)
161
+ ARCHHDRDIR = $(sitearchhdrdir)$(target_prefix)
162
+ TARGET_SO_DIR =
163
+ TARGET_SO = $(TARGET_SO_DIR)$(DLLIB)
164
+ CLEANLIBS = $(TARGET_SO) $(TARGET_SO).dSYM
165
+ CLEANOBJS = $(OBJS) *.bak
166
+ TARGET_SO_DIR_TIMESTAMP = $(TIMESTAMP_DIR)/.sitearchdir.-.smarter_csv.time
167
+
168
+ all: $(DLLIB)
169
+ static: $(STATIC_LIB)
170
+ .PHONY: all install static install-so install-rb
171
+ .PHONY: clean clean-so clean-static clean-rb
172
+
173
+ clean-static::
174
+ clean-rb-default::
175
+ clean-rb::
176
+ clean-so::
177
+ clean: clean-so clean-static clean-rb-default clean-rb
178
+ -$(Q)$(RM_RF) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES) .*.time
179
+
180
+ distclean-rb-default::
181
+ distclean-rb::
182
+ distclean-so::
183
+ distclean-static::
184
+ distclean: clean distclean-so distclean-static distclean-rb-default distclean-rb
185
+ -$(Q)$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
186
+ -$(Q)$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
187
+ -$(Q)$(RMDIRS) $(DISTCLEANDIRS) 2> /dev/null || true
188
+
189
+ realclean: distclean
190
+ install: install-so install-rb
191
+
192
+ install-so: $(DLLIB) $(TARGET_SO_DIR_TIMESTAMP)
193
+ $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
194
+ clean-static::
195
+ -$(Q)$(RM) $(STATIC_LIB)
196
+ install-rb: pre-install-rb do-install-rb install-rb-default
197
+ install-rb-default: pre-install-rb-default do-install-rb-default
198
+ pre-install-rb: Makefile
199
+ pre-install-rb-default: Makefile
200
+ do-install-rb:
201
+ do-install-rb-default:
202
+ pre-install-rb-default:
203
+ @$(NULLCMD)
204
+ $(TARGET_SO_DIR_TIMESTAMP):
205
+ $(Q) $(MAKEDIRS) $(@D) $(RUBYARCHDIR)
206
+ $(Q) $(TOUCH) $@
207
+
208
+ site-install: site-install-so site-install-rb
209
+ site-install-so: install-so
210
+ site-install-rb: install-rb
211
+
212
+ .SUFFIXES: .c .m .cc .mm .cxx .cpp .o .S
213
+
214
+ .cc.o:
215
+ $(ECHO) compiling $(<)
216
+ $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
217
+
218
+ .cc.S:
219
+ $(ECHO) translating $(<)
220
+ $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
221
+
222
+ .mm.o:
223
+ $(ECHO) compiling $(<)
224
+ $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
225
+
226
+ .mm.S:
227
+ $(ECHO) translating $(<)
228
+ $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
229
+
230
+ .cxx.o:
231
+ $(ECHO) compiling $(<)
232
+ $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
233
+
234
+ .cxx.S:
235
+ $(ECHO) translating $(<)
236
+ $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
237
+
238
+ .cpp.o:
239
+ $(ECHO) compiling $(<)
240
+ $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
241
+
242
+ .cpp.S:
243
+ $(ECHO) translating $(<)
244
+ $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
245
+
246
+ .c.o:
247
+ $(ECHO) compiling $(<)
248
+ $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
249
+
250
+ .c.S:
251
+ $(ECHO) translating $(<)
252
+ $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
253
+
254
+ .m.o:
255
+ $(ECHO) compiling $(<)
256
+ $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
257
+
258
+ .m.S:
259
+ $(ECHO) translating $(<)
260
+ $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
261
+
262
+ $(TARGET_SO): $(OBJS) Makefile
263
+ $(ECHO) linking shared-object smarter_csv/$(DLLIB)
264
+ -$(Q)$(RM) $(@)
265
+ $(Q) $(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
266
+ $(Q) $(POSTLINK)
267
+
268
+
269
+
270
+ $(OBJS): $(HDRS) $(ruby_headers)
@@ -3,15 +3,6 @@
3
3
  require 'mkmf'
4
4
  require "rbconfig"
5
5
 
6
- # On non-MRI Rubies (JRuby, TruffleRuby, ...) there is no C extension to build, and trying to build
7
- # it breaks `gem install` for anything that depends on smarter_csv. Write a no-op Makefile so install
8
- # succeeds, then stop. At runtime SmarterCSV falls back to its pure-Ruby parser (it checks whether the
9
- # C functions actually loaded via respond_to?(:parse_csv_line_c)).
10
- if RUBY_ENGINE != 'ruby'
11
- File.write('Makefile', dummy_makefile($srcdir).join)
12
- exit 0
13
- end
14
-
15
6
  if RbConfig::MAKEFILE_CONFIG["CFLAGS"].include?("-g -O3")
16
7
  fixed_CFLAGS = RbConfig::MAKEFILE_CONFIG["CFLAGS"].sub("-g -O3", "-O3 $(cflags)")
17
8
  puts("Fix CFLAGS: #{RbConfig::MAKEFILE_CONFIG["CFLAGS"]} -> #{fixed_CFLAGS}")
@@ -304,40 +304,25 @@ static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quot
304
304
  if (!allow_escaped_quotes || backslash_count % 2 == 0) {
305
305
  if (__builtin_expect(quote_boundary_standard, 1)) {
306
306
  if (in_quotes) {
307
- if (p + 2 < endP && *(p + 1) == quote_char_val) {
308
- /* RFC doubled quote inside a quoted field ("" → ").
309
- * Give this precedence over the closing-quote check, but only
310
- * when another byte follows the doubled pair.
311
- *
312
- * Compatibility note: we intentionally do NOT force terminal
313
- * "" to be consumed here. SmarterCSV has a long-standing lenient
314
- * behavior for malformed tails like ...\"" in :double_quotes mode:
315
- * the final quote may still close the field instead of turning the
316
- * row into an unclosed-quote error. Issue #334 needs doubled-quote
317
- * precedence for ..."",... (more content follows), but we keep the
318
- * historical leniency for terminal ..."". */
319
- p++;
320
- } else {
321
- // closing quote: only valid if followed by col_sep, row_sep, or end of line
322
- bool valid_close = (p + 1 >= endP);
323
- if (!valid_close) {
324
- valid_close = true;
325
- for (long j = 0; j < col_sep_len; j++) {
326
- if (*(p + 1 + j) != *(col_sepP + j)) { valid_close = false; break; }
327
- }
328
- }
329
- if (!valid_close && row_sep_len > 0) {
330
- valid_close = true;
331
- for (long j = 0; j < row_sep_len; j++) {
332
- if (*(p + 1 + j) != *(row_sepP + j)) { valid_close = false; break; }
333
- }
307
+ // closing quote: only valid if followed by col_sep, row_sep, or end of line
308
+ bool valid_close = (p + 1 >= endP);
309
+ if (!valid_close) {
310
+ valid_close = true;
311
+ for (long j = 0; j < col_sep_len; j++) {
312
+ if (*(p + 1 + j) != *(col_sepP + j)) { valid_close = false; break; }
334
313
  }
335
- if (valid_close) {
336
- in_quotes = false;
337
- field_started = true;
314
+ }
315
+ if (!valid_close && row_sep_len > 0) {
316
+ valid_close = true;
317
+ for (long j = 0; j < row_sep_len; j++) {
318
+ if (*(p + 1 + j) != *(row_sepP + j)) { valid_close = false; break; }
338
319
  }
339
- // else: quote inside quoted field → literal
340
320
  }
321
+ if (valid_close) {
322
+ in_quotes = false;
323
+ field_started = true;
324
+ }
325
+ // else: quote inside quoted field → literal (handles "" doubling)
341
326
  } else if (!field_started) {
342
327
  in_quotes = true; // opening quote at field boundary
343
328
  field_started = true;
@@ -778,11 +763,6 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash(VALUE self, VALUE line,
778
763
  * the frame stays well below 4 KB and ___chkstk_darwin never fires on ARM64 macOS.
779
764
  */
780
765
  bool *keep_bitmap = NULL;
781
- /* In THIS (non-ctx) function the bitmap is alloca'd to headers_len on every call (see the alloca
782
- * sites below), so keep_bitmap[] is exactly headers_len long and headers_len is the correct bound
783
- * at all access sites. Do NOT mirror rb_parse_line_to_hash_ctx's keep_bitmap_len here: that variant
784
- * caches its bitmap across rows (where @headers can grow), so it must use the captured length; this
785
- * one rebuilds per call and does not. */
786
766
  bool keep_extra_columns = true; /* extra cols (> headers_len): keep by default */
787
767
  bool has_only = false; /* true when only_headers: filtering is active */
788
768
  long early_exit_after = -1; /* column index after which we stop; -1 = no early exit */
@@ -1101,40 +1081,25 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash(VALUE self, VALUE line,
1101
1081
  if (!allow_escaped_quotes || backslash_count % 2 == 0) {
1102
1082
  if (__builtin_expect(quote_boundary_standard, 1)) {
1103
1083
  if (in_quotes) {
1104
- if (p + 2 < endP && *(p + 1) == quote_char_val) {
1105
- /* RFC doubled quote inside a quoted field ("" → ").
1106
- * Give this precedence over the closing-quote check, but only
1107
- * when another byte follows the doubled pair.
1108
- *
1109
- * Compatibility note: we intentionally do NOT force terminal
1110
- * "" to be consumed here. SmarterCSV has a long-standing lenient
1111
- * behavior for malformed tails like ...\"" in :double_quotes mode:
1112
- * the final quote may still close the field instead of turning the
1113
- * row into an unclosed-quote error. Issue #334 needs doubled-quote
1114
- * precedence for ..."",... (more content follows), but we keep the
1115
- * historical leniency for terminal ..."". */
1116
- p++;
1117
- } else {
1118
- // closing quote: only valid if followed by col_sep, row_sep, or end of line
1119
- bool valid_close = (p + 1 >= endP);
1120
- if (!valid_close) {
1121
- valid_close = true;
1122
- for (long j = 0; j < col_sep_len; j++) {
1123
- if (*(p + 1 + j) != *(col_sepP + j)) { valid_close = false; break; }
1124
- }
1125
- }
1126
- if (!valid_close && row_sep_len2 > 0) {
1127
- valid_close = true;
1128
- for (long j = 0; j < row_sep_len2; j++) {
1129
- if (*(p + 1 + j) != *(row_sepP2 + j)) { valid_close = false; break; }
1130
- }
1084
+ // closing quote: only valid if followed by col_sep, row_sep, or end of line
1085
+ bool valid_close = (p + 1 >= endP);
1086
+ if (!valid_close) {
1087
+ valid_close = true;
1088
+ for (long j = 0; j < col_sep_len; j++) {
1089
+ if (*(p + 1 + j) != *(col_sepP + j)) { valid_close = false; break; }
1131
1090
  }
1132
- if (valid_close) {
1133
- in_quotes = false;
1134
- field_started = true;
1091
+ }
1092
+ if (!valid_close && row_sep_len2 > 0) {
1093
+ valid_close = true;
1094
+ for (long j = 0; j < row_sep_len2; j++) {
1095
+ if (*(p + 1 + j) != *(row_sepP2 + j)) { valid_close = false; break; }
1135
1096
  }
1136
- // else: quote inside quoted field → literal
1137
1097
  }
1098
+ if (valid_close) {
1099
+ in_quotes = false;
1100
+ field_started = true;
1101
+ }
1102
+ // else: quote inside quoted field → literal (handles "" doubling)
1138
1103
  } else if (!field_started) {
1139
1104
  in_quotes = true; // opening quote at field boundary
1140
1105
  field_started = true;
@@ -1211,20 +1176,12 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash(VALUE self, VALUE line,
1211
1176
  * return nil instead of the hash so the row can be skipped.
1212
1177
  * With lazy allocation, if all_blank is true, xform.hash is still Qnil —
1213
1178
  * no hash was ever allocated.
1214
- *
1215
- * If remove_empty_hashes is disabled, preserve the row as an empty hash.
1216
- * This keeps parity with the Ruby path without adding any cost to the
1217
- * normal non-blank hot path.
1218
1179
  */
1219
- if (all_blank) {
1220
- if (remove_empty) {
1221
- VALUE result = rb_ary_new_capa(2);
1222
- rb_ary_push(result, Qnil);
1223
- rb_ary_push(result, LONG2FIX(element_count));
1224
- return result;
1225
- }
1226
-
1227
- ensure_hash_allocated(&xform);
1180
+ if (remove_empty && all_blank) {
1181
+ VALUE result = rb_ary_new_capa(2);
1182
+ rb_ary_push(result, Qnil);
1183
+ rb_ary_push(result, LONG2FIX(element_count));
1184
+ return result;
1228
1185
  }
1229
1186
 
1230
1187
  /* ----------------------------------------
@@ -1464,14 +1421,6 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
1464
1421
  int numeric_mode = ctx->numeric_mode;
1465
1422
  VALUE numeric_keys = ctx->numeric_keys;
1466
1423
  bool *keep_bitmap = ctx->keep_bitmap;
1467
- /* keep_bitmap is cached in the context (xmalloc'd once at construction, sized to the header count
1468
- * THEN). @headers can grow in place as undeclared extra columns appear, so the live headers_len
1469
- * (re-read each call below) may exceed the bitmap's length. Every keep_bitmap[] access in this
1470
- * function MUST be bounded by keep_bitmap_len, never headers_len — indices past the bitmap are
1471
- * extra columns and follow keep_extra_columns. Bounding by the grown headers_len was an
1472
- * out-of-bounds heap read (the bug). The sibling rb_parse_line_to_hash safely uses headers_len
1473
- * because it re-allocs its bitmap to headers_len on every call. */
1474
- long keep_bitmap_len = ctx->keep_bitmap_len;
1475
1424
  bool keep_extra_columns = ctx->keep_extra_columns;
1476
1425
  long early_exit_after = ctx->early_exit_after;
1477
1426
 
@@ -1573,7 +1522,7 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
1573
1522
  while (trim_end >= trim_start && (*trim_end == ' ' || *trim_end == '\t')) trim_end--;
1574
1523
  }
1575
1524
  long trimmed_len = (trim_end >= trim_start) ? (trim_end - trim_start + 1) : 0;
1576
- if (!keep_bitmap || (element_count < keep_bitmap_len ? keep_bitmap[element_count] : keep_extra_columns)) {
1525
+ if (!keep_bitmap || (element_count < headers_len ? keep_bitmap[element_count] : keep_extra_columns)) {
1577
1526
  if (insert_field_into_hash(&xform, trim_start, trimmed_len, element_count, false, quote_char_val, encoding))
1578
1527
  all_blank = false;
1579
1528
  }
@@ -1594,7 +1543,7 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
1594
1543
  while (trim_end >= trim_start && (*trim_end == ' ' || *trim_end == '\t')) trim_end--;
1595
1544
  }
1596
1545
  long trimmed_len = (trim_end >= trim_start) ? (trim_end - trim_start + 1) : 0;
1597
- if (!keep_bitmap || (element_count < keep_bitmap_len ? keep_bitmap[element_count] : keep_extra_columns)) {
1546
+ if (!keep_bitmap || (element_count < headers_len ? keep_bitmap[element_count] : keep_extra_columns)) {
1598
1547
  if (insert_field_into_hash(&xform, trim_start, trimmed_len, element_count, false, quote_char_val, encoding))
1599
1548
  all_blank = false;
1600
1549
  }
@@ -1657,7 +1606,7 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
1657
1606
 
1658
1607
  bool has_embedded_quotes = quoted || (trimmed_len > 0 && memchr(trim_start, quote_char_val, trimmed_len));
1659
1608
 
1660
- if (!keep_bitmap || (element_count < keep_bitmap_len ? keep_bitmap[element_count] : keep_extra_columns)) {
1609
+ if (!keep_bitmap || (element_count < headers_len ? keep_bitmap[element_count] : keep_extra_columns)) {
1661
1610
  if (insert_field_into_hash(&xform, trim_start, trimmed_len, element_count, has_embedded_quotes, quote_char_val, encoding))
1662
1611
  all_blank = false;
1663
1612
  }
@@ -1691,40 +1640,25 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
1691
1640
  if (!allow_escaped_quotes || backslash_count % 2 == 0) {
1692
1641
  if (__builtin_expect(quote_boundary_standard, 1)) {
1693
1642
  if (in_quotes) {
1694
- if (p + 2 < endP && *(p + 1) == quote_char_val) {
1695
- /* RFC doubled quote inside a quoted field ("" → ").
1696
- * Give this precedence over the closing-quote check, but only
1697
- * when another byte follows the doubled pair.
1698
- *
1699
- * Compatibility note: we intentionally do NOT force terminal
1700
- * "" to be consumed here. SmarterCSV has a long-standing lenient
1701
- * behavior for malformed tails like ...\"" in :double_quotes mode:
1702
- * the final quote may still close the field instead of turning the
1703
- * row into an unclosed-quote error. Issue #334 needs doubled-quote
1704
- * precedence for ..."",... (more content follows), but we keep the
1705
- * historical leniency for terminal ..."". */
1706
- p++;
1707
- } else {
1708
- /* closing quote: only valid if followed by col_sep, row_sep, or end */
1709
- bool valid_close = (p + 1 >= endP);
1710
- if (!valid_close) {
1711
- valid_close = true;
1712
- for (long j = 0; j < col_sep_len; j++) {
1713
- if (*(p + 1 + j) != *(col_sepP + j)) { valid_close = false; break; }
1714
- }
1715
- }
1716
- if (!valid_close && row_sep_len2 > 0) {
1717
- valid_close = true;
1718
- for (long j = 0; j < row_sep_len2; j++) {
1719
- if (*(p + 1 + j) != *(row_sepP2 + j)) { valid_close = false; break; }
1720
- }
1643
+ /* closing quote: only valid if followed by col_sep, row_sep, or end */
1644
+ bool valid_close = (p + 1 >= endP);
1645
+ if (!valid_close) {
1646
+ valid_close = true;
1647
+ for (long j = 0; j < col_sep_len; j++) {
1648
+ if (*(p + 1 + j) != *(col_sepP + j)) { valid_close = false; break; }
1721
1649
  }
1722
- if (valid_close) {
1723
- in_quotes = false;
1724
- field_started = true;
1650
+ }
1651
+ if (!valid_close && row_sep_len2 > 0) {
1652
+ valid_close = true;
1653
+ for (long j = 0; j < row_sep_len2; j++) {
1654
+ if (*(p + 1 + j) != *(row_sepP2 + j)) { valid_close = false; break; }
1725
1655
  }
1726
- /* else: quote inside quoted field → literal */
1727
1656
  }
1657
+ if (valid_close) {
1658
+ in_quotes = false;
1659
+ field_started = true;
1660
+ }
1661
+ /* else: quote inside quoted field → literal (handles "" doubling) */
1728
1662
  } else if (!field_started) {
1729
1663
  in_quotes = true; /* opening quote at field boundary */
1730
1664
  field_started = true;
@@ -1783,7 +1717,7 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
1783
1717
 
1784
1718
  bool has_embedded_quotes = quoted || (trimmed_len > 0 && memchr(trim_start, quote_char_val, trimmed_len));
1785
1719
 
1786
- if (!keep_bitmap || (element_count < keep_bitmap_len ? keep_bitmap[element_count] : keep_extra_columns)) {
1720
+ if (!keep_bitmap || (element_count < headers_len ? keep_bitmap[element_count] : keep_extra_columns)) {
1787
1721
  if (insert_field_into_hash(&xform, trim_start, trimmed_len, element_count, has_embedded_quotes, quote_char_val, encoding))
1788
1722
  all_blank = false;
1789
1723
  }
@@ -1794,15 +1728,11 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
1794
1728
  /* ----------------------------------------
1795
1729
  * SECTION 6: Handle blank rows
1796
1730
  * ---------------------------------------- */
1797
- if (all_blank) {
1798
- if (remove_empty) {
1799
- VALUE result = rb_ary_new_capa(2);
1800
- rb_ary_push(result, Qnil);
1801
- rb_ary_push(result, LONG2FIX(element_count));
1802
- return result;
1803
- }
1804
-
1805
- ensure_hash_allocated(&xform);
1731
+ if (remove_empty && all_blank) {
1732
+ VALUE result = rb_ary_new_capa(2);
1733
+ rb_ary_push(result, Qnil);
1734
+ rb_ary_push(result, LONG2FIX(element_count));
1735
+ return result;
1806
1736
  }
1807
1737
 
1808
1738
  /* ----------------------------------------
@@ -1811,7 +1741,7 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
1811
1741
  if (!remove_empty_values) {
1812
1742
  ensure_hash_allocated(&xform);
1813
1743
  for (long i = element_count; i < headers_len; i++) {
1814
- if (!keep_bitmap || (i < keep_bitmap_len ? keep_bitmap[i] : keep_extra_columns)) {
1744
+ if (!keep_bitmap || keep_bitmap[i]) {
1815
1745
  rb_hash_aset(xform.hash, rb_ary_entry(headers, i), Qnil);
1816
1746
  }
1817
1747
  }