smarter_csv 1.16.4 → 1.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +10 -1
  3. data/CHANGELOG.md +54 -0
  4. data/Gemfile +10 -5
  5. data/README.md +98 -14
  6. data/TO_DO.md +109 -0
  7. data/docs/_introduction.md +1 -0
  8. data/docs/bad_row_quarantine.md +2 -1
  9. data/docs/basic_read_api.md +6 -1
  10. data/docs/basic_write_api.md +30 -0
  11. data/docs/batch_processing.md +25 -0
  12. data/docs/column_selection.md +1 -0
  13. data/docs/data_transformations.md +1 -0
  14. data/docs/examples.md +126 -0
  15. data/docs/header_transformations.md +23 -0
  16. data/docs/header_validations.md +1 -0
  17. data/docs/history.md +1 -0
  18. data/docs/instrumentation.md +2 -1
  19. data/docs/migrating_from_csv.md +1 -0
  20. data/docs/options.md +20 -18
  21. data/docs/parsing_strategy.md +1 -0
  22. data/docs/real_world_csv.md +51 -1
  23. data/docs/releases/1.16.0/performance_notes.md +15 -15
  24. data/docs/releases/1.17.0/benchmarks.md +121 -0
  25. data/docs/releases/1.17.0/changes.md +161 -0
  26. data/docs/releases/1.17.0/performance_notes.md +126 -0
  27. data/docs/row_col_sep.md +21 -1
  28. data/docs/ruby_csv_pitfalls.md +1 -0
  29. data/docs/value_converters.md +24 -0
  30. data/docs/warnings.md +141 -0
  31. data/ext/smarter_csv/smarter_csv.c +98 -32
  32. data/images/SmarterCSV_1.17.0_vs_RubyCSV_3.3.5_speedup.svg +106 -0
  33. data/images/SmarterCSV_1.17.0_vs_previous_C-speedup.svg +181 -0
  34. data/images/SmarterCSV_1.17.0_vs_previous_Rb-speedup.svg +179 -0
  35. data/lib/smarter_csv/auto_detection.rb +215 -30
  36. data/lib/smarter_csv/file_io.rb +2 -2
  37. data/lib/smarter_csv/hash_transformations.rb +29 -13
  38. data/lib/smarter_csv/parser.rb +42 -33
  39. data/lib/smarter_csv/peekable_io.rb +453 -0
  40. data/lib/smarter_csv/reader.rb +119 -23
  41. data/lib/smarter_csv/reader_options.rb +61 -1
  42. data/lib/smarter_csv/version.rb +1 -1
  43. data/lib/smarter_csv.rb +40 -12
  44. metadata +12 -5
  45. data/TO_DO_v2.md +0 -14
  46. data/ext/smarter_csv/Makefile +0 -270
@@ -134,24 +134,51 @@ static const rb_data_type_t parse_context_type = {
134
134
  };
135
135
 
136
136
  static VALUE unescape_quotes(char *str, long len, char quote_char, rb_encoding *encoding) {
137
- char *buf = ALLOC_N(char, len);
138
- long j = 0;
139
- for (long i = 0; i < len; i++) {
140
- if (str[i] == quote_char && i + 1 < len && str[i + 1] == quote_char) {
141
- buf[j++] = quote_char;
142
- i++; // skip second quote
143
- } else {
144
- buf[j++] = str[i];
137
+ // Fast path: scan for any doubled quote pair. If none present, the field has
138
+ // nothing to unescape — emit it directly via rb_enc_str_new and skip the
139
+ // temp buffer + byte-by-byte copy. memchr is SIMD-optimized; the scan cost
140
+ // is far less than the malloc/free pair this avoids.
141
+ char *p = str;
142
+ char *end = str + len;
143
+ while ((p = memchr(p, quote_char, end - p))) {
144
+ if (p + 1 < end && *(p + 1) == quote_char) goto needs_unescape;
145
+ p++;
146
+ }
147
+ return rb_enc_str_new(str, len, encoding);
148
+
149
+ needs_unescape:
150
+ // Slow path: at least one doubled quote pair was found. Allocate a temp
151
+ // buffer and walk byte-by-byte, collapsing "" → ".
152
+ {
153
+ char *buf = ALLOC_N(char, len);
154
+ long j = 0;
155
+ for (long i = 0; i < len; i++) {
156
+ if (str[i] == quote_char && i + 1 < len && str[i + 1] == quote_char) {
157
+ buf[j++] = quote_char;
158
+ i++; // skip second quote
159
+ } else {
160
+ buf[j++] = str[i];
161
+ }
145
162
  }
163
+ VALUE out = rb_enc_str_new(buf, j, encoding);
164
+ xfree(buf);
165
+ return out;
146
166
  }
147
- VALUE out = rb_enc_str_new(buf, j, encoding);
148
- xfree(buf);
149
- return out;
167
+ }
168
+
169
+ /* Helper: build the 2-element [elements, data_size] tuple returned by rb_parse_csv_line.
170
+ * Aligns this function's return shape with parse_csv_line_ruby and rb_parse_line_to_hash_ctx:
171
+ * data_size = -1 signals "unclosed quoted field — needs more data". */
172
+ static inline VALUE make_parse_result(VALUE elements, long data_size) {
173
+ VALUE result = rb_ary_new_capa(2);
174
+ rb_ary_push(result, elements);
175
+ rb_ary_push(result, LONG2FIX(data_size));
176
+ return result;
150
177
  }
151
178
 
152
179
  static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quote_char, VALUE max_size, VALUE has_quotes_val, VALUE strip_ws_val, VALUE allow_escaped_quotes_val, VALUE quote_boundary_standard_val, VALUE row_sep_val) {
153
180
  if (RB_TYPE_P(line, T_NIL) == 1) {
154
- return rb_ary_new();
181
+ return make_parse_result(rb_ary_new(), 0);
155
182
  }
156
183
 
157
184
  if (RB_TYPE_P(line, T_STRING) != 1) {
@@ -178,7 +205,7 @@ static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quot
178
205
  if (max_size != Qnil) {
179
206
  max_fields = NUM2INT(max_size);
180
207
  if (max_fields < 0) {
181
- return rb_ary_new();
208
+ return make_parse_result(rb_ary_new(), 0);
182
209
  }
183
210
  }
184
211
 
@@ -237,7 +264,7 @@ static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quot
237
264
  rb_ary_push(elements, field);
238
265
  }
239
266
 
240
- return elements;
267
+ return make_parse_result(elements, RARRAY_LEN(elements));
241
268
  }
242
269
 
243
270
  // === SLOW PATH: Quoted fields or multi-char separator ===
@@ -350,7 +377,13 @@ static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quot
350
377
  }
351
378
 
352
379
  if (in_quotes) {
353
- rb_raise(eMalformedCSVError, "Unclosed quoted field detected in line: %s", StringValueCStr(line));
380
+ /* Unclosed quoted field at EOL: signal "needs more data" rather than raising.
381
+ * Aligns with parse_csv_line_ruby and rb_parse_line_to_hash_ctx, which both
382
+ * return data_size = -1 on this condition. The Reader's stitch loop consumes
383
+ * the signal: append the next physical line and re-parse, or raise MalformedCSV
384
+ * at EOF if the field never closes. The parser does not decide "ultimately
385
+ * malformed"; the caller does. */
386
+ return make_parse_result(rb_ary_new(), -1);
354
387
  }
355
388
 
356
389
  if ((max_fields < 0) || (element_count < max_fields)) {
@@ -384,7 +417,7 @@ static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quot
384
417
  rb_ary_push(elements, field);
385
418
  }
386
419
 
387
- return elements;
420
+ return make_parse_result(elements, RARRAY_LEN(elements));
388
421
  }
389
422
 
390
423
  // Efficiently combine two arrays into a hash (replaces headers.zip(values).to_h)
@@ -484,6 +517,37 @@ static inline VALUE try_numeric_conversion(char *trim_start, long trimmed_len) {
484
517
  return Qundef; // not numeric
485
518
  }
486
519
 
520
+ /*
521
+ * leading_whitespace_len - byte length (1, 2, or 3) of the whitespace character at the start of
522
+ * `s` (with `len` bytes available), or 0 if `s` does not start with whitespace.
523
+ *
524
+ * "Whitespace" here matches Ruby's [[:space:]] / Rails' String#blank? — the Unicode White_Space
525
+ * set — so the C blank check stays consistent with the Ruby fallback path (hash_transformations).
526
+ * ASCII bytes are handled with a single comparison; the multibyte arms are only reached when a
527
+ * byte >= 0x80 appears, so all-ASCII fields pay nothing extra.
528
+ */
529
+ static inline int leading_whitespace_len(const char *s, long len) {
530
+ if (len < 1) return 0;
531
+ unsigned char b0 = (unsigned char)s[0];
532
+ if (b0 == 0x20 || (b0 >= 0x09 && b0 <= 0x0D)) return 1; // space (most common) then \t \n \v \f \r
533
+ if (b0 < 0x80) return 0; // any other ASCII byte: not whitespace
534
+ if (len < 2) return 0;
535
+ unsigned char b1 = (unsigned char)s[1];
536
+ if (b0 == 0xC2 && (b1 == 0x85 || b1 == 0xA0)) return 2; // U+0085 NEL, U+00A0 NBSP
537
+ if (len < 3) return 0;
538
+ unsigned char b2 = (unsigned char)s[2];
539
+ if (b0 == 0xE1 && b1 == 0x9A && b2 == 0x80) return 3; // U+1680 OGHAM SPACE MARK
540
+ if (b0 == 0xE2) {
541
+ // U+2000..U+200A (E2 80 80..8A) — note: 0x8B is U+200B ZERO WIDTH SPACE, NOT whitespace.
542
+ // U+2028 LINE SEP (A8), U+2029 PARA SEP (A9), U+202F NARROW NBSP (AF), U+205F MMSP (E2 81 9F).
543
+ if (b1 == 0x80 && ((b2 >= 0x80 && b2 <= 0x8A) || b2 == 0xA8 || b2 == 0xA9 || b2 == 0xAF)) return 3;
544
+ if (b1 == 0x81 && b2 == 0x9F) return 3;
545
+ return 0;
546
+ }
547
+ if (b0 == 0xE3 && b1 == 0x80 && b2 == 0x80) return 3; // U+3000 IDEOGRAPHIC SPACE
548
+ return 0;
549
+ }
550
+
487
551
  /*
488
552
  * ================================================================================
489
553
  * Transformation options struct - passed to insert_field_into_hash to avoid
@@ -541,17 +605,16 @@ static inline __attribute__((always_inline)) bool insert_field_into_hash(
541
605
  VALUE key = get_key_for_index(element_count, opts->headers, opts->headers_len, opts->prefix_str);
542
606
 
543
607
  // 1. Empty/blank field handling
544
- // Check if field is blank: either zero-length, or all whitespace characters.
545
- // This matches Ruby's blank? behavior (BLANK_RE = /\A\s*\z/) which considers
546
- // spaces, tabs, \r, \n, \v, \f as whitespace.
608
+ // A field is blank if it is zero-length or consists entirely of whitespace characters.
609
+ // "Whitespace" matches Ruby's BLANK_RE = /\A[[:space:]]*\z/ (and Rails' String#blank?) the
610
+ // Unicode White_Space set so this stays consistent with the Ruby fallback path.
547
611
  if (opts->remove_empty_values) {
548
612
  bool is_blank = true;
549
- for (long i = 0; i < trimmed_len; i++) {
550
- char c = trim_start[i];
551
- if (c != ' ' && c != '\t' && c != '\r' && c != '\n' && c != '\v' && c != '\f') {
552
- is_blank = false;
553
- break;
554
- }
613
+ long i = 0;
614
+ while (i < trimmed_len) {
615
+ int w = leading_whitespace_len(trim_start + i, trimmed_len - i);
616
+ if (w == 0) { is_blank = false; break; }
617
+ i += w;
555
618
  }
556
619
  if (is_blank) return false; // skip blank value
557
620
  }
@@ -562,22 +625,25 @@ static inline __attribute__((always_inline)) bool insert_field_into_hash(
562
625
  return false; // not a non-blank value
563
626
  }
564
627
 
565
- // 2. String-based zero check — matches /\A0+(?:\.0+)?\z/
566
- // Works independently of numeric conversion: "0", "00", "0.0", "00.00" etc.
628
+ // 2. String-based zero check — matches /\A[+-]?0+(?:\.0+)?\z/
629
+ // Works independently of numeric conversion: "0", "00", "0.0", "00.00", "+0", "-0.00" etc.
567
630
  // Outer quotes are stripped before this call, so the check applies equally
568
631
  // to quoted ("0") and unquoted (0) fields.
569
632
  if (opts->remove_zero_values) {
570
- long i = 0;
571
- // Must start with at least one '0'
572
- if (trimmed_len > 0 && trim_start[0] == '0') {
633
+ char c0 = trim_start[0]; // trimmed_len > 0 guaranteed (zero-length handled above)
634
+ // Index i skips an optional leading sign; bail right away if the first byte can't begin a zero.
635
+ long i = (c0 == '0') ? 0
636
+ : (c0 == '+' || c0 == '-') ? 1
637
+ : trimmed_len;
638
+ if (i < trimmed_len && trim_start[i] == '0') {
573
639
  while (i < trimmed_len && trim_start[i] == '0') i++;
574
- if (i == trimmed_len) return false; // all zeros, e.g. "0", "00"
640
+ if (i == trimmed_len) return false; // all zeros, e.g. "0", "00", "+0", "-00"
575
641
  if (trim_start[i] == '.') {
576
642
  i++;
577
643
  long dot_pos = i;
578
644
  while (i < trimmed_len && trim_start[i] == '0') i++;
579
645
  // Valid if we consumed everything AND had at least one zero after dot
580
- if (i == trimmed_len && i > dot_pos) return false; // e.g. "0.0", "00.00"
646
+ if (i == trimmed_len && i > dot_pos) return false; // e.g. "0.0", "00.00", "+0.0", "-0.00"
581
647
  }
582
648
  }
583
649
  }
@@ -0,0 +1,106 @@
1
+ <svg xmlns="http://www.w3.org/2000/svg" width="764" height="632"
2
+ font-family="ui-monospace, 'Cascadia Code', 'Courier New', monospace" font-size="12">
3
+ <rect width="764" height="632" fill="#ffffff"/>
4
+ <text x="382" y="20" text-anchor="middle" font-size="14" font-weight="bold" fill="#212121">SmarterCSV 1.17.0 vs Ruby CSV.read 3.3.5</text>
5
+ <text x="382" y="36" text-anchor="middle" font-size="10" fill="#9e9e9e">Speedup = CSV.read time ÷ SmarterCSV time (higher = SmarterCSV is faster) · Ruby 3.4.7 · best of 40</text>
6
+ <text x="490" y="620" text-anchor="middle" font-size="11" fill="#616161">Speedup (CSV.read ÷ SmarterCSV 1.17.0 C)</text>
7
+ <line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
8
+ <text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">0×</text>
9
+ <line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
10
+ <text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">1×</text>
11
+ <line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
12
+ <text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">2×</text>
13
+ <line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
14
+ <text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">3×</text>
15
+ <line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
16
+ <text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">4×</text>
17
+ <line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
18
+ <text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">5×</text>
19
+ <line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
20
+ <text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">6×</text>
21
+ <line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
22
+ <text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">7×</text>
23
+ <line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
24
+ <text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">8×</text>
25
+ <line x1="740" y1="62" x2="740" y2="594" stroke="#e0e0e0" stroke-width="1"/>
26
+ <text x="740" y="606" text-anchor="middle" font-size="11" fill="#757575">9×</text>
27
+ <line x1="296" y1="62" x2="296" y2="594" stroke="#9e9e9e" stroke-width="1.5" stroke-dasharray="4,3"/>
28
+ <line x1="240" y1="594" x2="740" y2="594" stroke="#bdbdbd" stroke-width="1"/>
29
+ <line x1="240" y1="62" x2="240" y2="594" stroke="#bdbdbd" stroke-width="1"/>
30
+ <rect x="0" y="62" width="764" height="28" fill="#f5f5f5"/>
31
+ <text x="232" y="80" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_C.csv</text>
32
+ <rect x="240" y="67" width="437" height="18" fill="#1565C0" rx="2"/>
33
+ <text x="673" y="80" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">7.87×</text>
34
+ <rect x="0" y="90" width="764" height="28" fill="#ffffff"/>
35
+ <text x="232" y="108" text-anchor="end" font-size="11" fill="#424242">uszips.csv</text>
36
+ <rect x="240" y="95" width="435" height="18" fill="#1565C0" rx="2"/>
37
+ <text x="671" y="108" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">7.83×</text>
38
+ <rect x="0" y="118" width="764" height="28" fill="#f5f5f5"/>
39
+ <text x="232" y="136" text-anchor="end" font-size="11" fill="#424242">long_fields_40k.csv</text>
40
+ <rect x="240" y="123" width="380" height="18" fill="#1565C0" rx="2"/>
41
+ <text x="616" y="136" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">6.83×</text>
42
+ <rect x="0" y="146" width="764" height="28" fill="#ffffff"/>
43
+ <text x="232" y="164" text-anchor="end" font-size="11" fill="#424242">worldcities.csv</text>
44
+ <rect x="240" y="151" width="378" height="18" fill="#1565C0" rx="2"/>
45
+ <text x="614" y="164" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">6.80×</text>
46
+ <rect x="0" y="174" width="764" height="28" fill="#f5f5f5"/>
47
+ <text x="232" y="192" text-anchor="end" font-size="11" fill="#424242">uscities.csv</text>
48
+ <rect x="240" y="179" width="371" height="18" fill="#1565C0" rx="2"/>
49
+ <text x="607" y="192" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">6.68×</text>
50
+ <rect x="0" y="202" width="764" height="28" fill="#ffffff"/>
51
+ <text x="232" y="220" text-anchor="end" font-size="11" fill="#424242">embedded_separators_60k.csv</text>
52
+ <rect x="240" y="207" width="307" height="18" fill="#1565C0" rx="2"/>
53
+ <text x="543" y="220" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">5.53×</text>
54
+ <rect x="0" y="230" width="764" height="28" fill="#f5f5f5"/>
55
+ <text x="232" y="248" text-anchor="end" font-size="11" fill="#424242">utf8_multibyte_60k.csv</text>
56
+ <rect x="240" y="235" width="216" height="18" fill="#1565C0" rx="2"/>
57
+ <text x="452" y="248" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">3.89×</text>
58
+ <rect x="0" y="258" width="764" height="28" fill="#ffffff"/>
59
+ <text x="232" y="276" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_NC.csv</text>
60
+ <rect x="240" y="263" width="212" height="18" fill="#1565C0" rx="2"/>
61
+ <text x="448" y="276" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">3.81×</text>
62
+ <rect x="0" y="286" width="764" height="28" fill="#f5f5f5"/>
63
+ <text x="232" y="304" text-anchor="end" font-size="11" fill="#424242">heavy_quoting_60k.csv</text>
64
+ <rect x="240" y="291" width="195" height="18" fill="#1565C0" rx="2"/>
65
+ <text x="431" y="304" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">3.51×</text>
66
+ <rect x="0" y="314" width="764" height="28" fill="#ffffff"/>
67
+ <text x="232" y="332" text-anchor="end" font-size="11" fill="#424242">many_empty_fields_60k.csv</text>
68
+ <rect x="240" y="319" width="194" height="18" fill="#1565C0" rx="2"/>
69
+ <text x="430" y="332" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">3.49×</text>
70
+ <rect x="0" y="342" width="764" height="28" fill="#f5f5f5"/>
71
+ <text x="232" y="360" text-anchor="end" font-size="11" fill="#424242">sample_100k.csv</text>
72
+ <rect x="240" y="347" width="191" height="18" fill="#1565C0" rx="2"/>
73
+ <text x="427" y="360" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">3.44×</text>
74
+ <rect x="0" y="370" width="764" height="28" fill="#ffffff"/>
75
+ <text x="232" y="388" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_NB.csv</text>
76
+ <rect x="240" y="375" width="175" height="18" fill="#1565C0" rx="2"/>
77
+ <text x="411" y="388" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">3.14×</text>
78
+ <rect x="0" y="398" width="764" height="28" fill="#f5f5f5"/>
79
+ <text x="232" y="416" text-anchor="end" font-size="11" fill="#424242">whitespace_heavy_60k.csv</text>
80
+ <rect x="240" y="403" width="166" height="18" fill="#1565C0" rx="2"/>
81
+ <text x="402" y="416" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">2.98×</text>
82
+ <rect x="0" y="426" width="764" height="28" fill="#ffffff"/>
83
+ <text x="232" y="444" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_B.csv</text>
84
+ <rect x="240" y="431" width="164" height="18" fill="#1565C0" rx="2"/>
85
+ <text x="400" y="444" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">2.95×</text>
86
+ <rect x="0" y="454" width="764" height="28" fill="#f5f5f5"/>
87
+ <text x="232" y="472" text-anchor="end" font-size="11" fill="#424242">embedded_newlines_60k.csv</text>
88
+ <rect x="240" y="459" width="162" height="18" fill="#1565C0" rx="2"/>
89
+ <text x="398" y="472" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">2.91×</text>
90
+ <rect x="0" y="482" width="764" height="28" fill="#ffffff"/>
91
+ <text x="232" y="500" text-anchor="end" font-size="11" fill="#424242">sensor_data_50krows_50cols.csv</text>
92
+ <rect x="240" y="487" width="141" height="18" fill="#1565C0" rx="2"/>
93
+ <text x="377" y="500" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">2.54×</text>
94
+ <rect x="0" y="510" width="764" height="28" fill="#f5f5f5"/>
95
+ <text x="232" y="528" text-anchor="end" font-size="11" fill="#424242">wide_500_cols_20k.csv</text>
96
+ <rect x="240" y="515" width="94" height="18" fill="#1565C0" rx="2"/>
97
+ <text x="330" y="528" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">1.69×</text>
98
+ <rect x="0" y="538" width="764" height="28" fill="#ffffff"/>
99
+ <text x="232" y="556" text-anchor="end" font-size="11" fill="#424242">tab_separated_60k.tsv</text>
100
+ <rect x="240" y="543" width="87" height="18" fill="#1565C0" rx="2"/>
101
+ <text x="323" y="556" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">1.56×</text>
102
+ <rect x="0" y="566" width="764" height="28" fill="#f5f5f5"/>
103
+ <text x="232" y="584" text-anchor="end" font-size="11" fill="#424242">multi_char_separator_60k.csv</text>
104
+ <rect x="240" y="571" width="75" height="18" fill="#1565C0" rx="2"/>
105
+ <text x="311" y="584" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">1.35×</text>
106
+ </svg>
@@ -0,0 +1,181 @@
1
+ <svg xmlns="http://www.w3.org/2000/svg" width="820" height="668"
2
+ font-family="ui-monospace, 'Cascadia Code', 'Courier New', monospace" font-size="12">
3
+ <rect width="820" height="668" fill="#ffffff"/>
4
+ <text x="410" y="18" text-anchor="middle" font-size="13" font-weight="bold" fill="#212121">SmarterCSV improvements 1.15.2, 1.16.4, 1.17.0 vs 1.14.4 — C accelerated</text>
5
+ <text x="410" y="32" text-anchor="middle" font-size="10" fill="#9e9e9e">Speedup ratio = baseline version time ÷ newer version time (higher = newer version is faster)</text>
6
+ <text x="410" y="48" text-anchor="middle" font-size="11" fill="#616161">Ruby 3.4.7 [log scale, best of 40]</text>
7
+ <line x1="220" y1="68" x2="220" y2="580" stroke="#e0e0e0" stroke-width="1"/>
8
+ <text x="220" y="64" text-anchor="middle" font-size="11" fill="#757575">1×</text>
9
+ <line x1="307" y1="68" x2="307" y2="580" stroke="#e0e0e0" stroke-width="1"/>
10
+ <text x="307" y="64" text-anchor="middle" font-size="11" fill="#757575">2×</text>
11
+ <line x1="423" y1="68" x2="423" y2="580" stroke="#e0e0e0" stroke-width="1"/>
12
+ <text x="423" y="64" text-anchor="middle" font-size="11" fill="#757575">5×</text>
13
+ <line x1="510" y1="68" x2="510" y2="580" stroke="#e0e0e0" stroke-width="1"/>
14
+ <text x="510" y="64" text-anchor="middle" font-size="11" fill="#757575">10×</text>
15
+ <line x1="597" y1="68" x2="597" y2="580" stroke="#e0e0e0" stroke-width="1"/>
16
+ <text x="597" y="64" text-anchor="middle" font-size="11" fill="#757575">20×</text>
17
+ <line x1="713" y1="68" x2="713" y2="580" stroke="#e0e0e0" stroke-width="1"/>
18
+ <text x="713" y="64" text-anchor="middle" font-size="11" fill="#757575">50×</text>
19
+ <line x1="800" y1="68" x2="800" y2="580" stroke="#e0e0e0" stroke-width="1"/>
20
+ <text x="800" y="64" text-anchor="middle" font-size="11" fill="#757575">100×</text>
21
+ <line x1="220" y1="68" x2="220" y2="580" stroke="#9e9e9e" stroke-width="1.5"/>
22
+ <line x1="220" y1="68" x2="800" y2="68" stroke="#bdbdbd" stroke-width="1"/>
23
+ <rect x="0" y="86" width="820" height="26" fill="#f5f5f5"/>
24
+ <text x="212" y="103" text-anchor="end" font-size="11" fill="#424242">long_fields_40k</text>
25
+ <circle cx="645" cy="99" r="5" fill="#1565C0"/>
26
+ <text x="616" y="103" font-size="10" fill="#1565C0">29×</text>
27
+ <circle cx="744" cy="99" r="5" fill="#BF360C"/>
28
+ <text x="715" y="103" font-size="10" fill="#BF360C">64×</text>
29
+ <circle cx="775" cy="99" r="5" fill="#2E7D32"/>
30
+ <text x="783" y="103" font-size="10" fill="#2E7D32">82×</text>
31
+ <rect x="0" y="112" width="820" height="26" fill="#ffffff"/>
32
+ <text x="212" y="129" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_C</text>
33
+ <circle cx="674" cy="125" r="5" fill="#1565C0"/>
34
+ <circle cx="691" cy="125" r="5" fill="#2E7D32"/>
35
+ <circle cx="693" cy="125" r="5" fill="#BF360C"/>
36
+ <text x="645" y="129" font-size="10" fill="#1565C0">37×</text>
37
+ <text x="701" y="129" font-size="10" fill="#2E7D32">42×</text>
38
+ <text x="728" y="129" font-size="10" fill="#BF360C">43×</text>
39
+ <rect x="0" y="138" width="820" height="26" fill="#f5f5f5"/>
40
+ <text x="212" y="155" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_NC</text>
41
+ <circle cx="568" cy="151" r="5" fill="#1565C0"/>
42
+ <circle cx="592" cy="151" r="5" fill="#2E7D32"/>
43
+ <circle cx="596" cy="151" r="5" fill="#BF360C"/>
44
+ <text x="539" y="155" font-size="10" fill="#1565C0">16×</text>
45
+ <text x="604" y="155" font-size="10" fill="#2E7D32">19×</text>
46
+ <text x="631" y="155" font-size="10" fill="#BF360C">20×</text>
47
+ <rect x="0" y="164" width="820" height="26" fill="#ffffff"/>
48
+ <text x="212" y="181" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_NB</text>
49
+ <circle cx="566" cy="177" r="5" fill="#1565C0"/>
50
+ <circle cx="589" cy="177" r="5" fill="#2E7D32"/>
51
+ <circle cx="591" cy="177" r="5" fill="#BF360C"/>
52
+ <text x="537" y="181" font-size="10" fill="#1565C0">16×</text>
53
+ <text x="599" y="181" font-size="10" fill="#2E7D32">19×</text>
54
+ <text x="626" y="181" font-size="10" fill="#BF360C">19×</text>
55
+ <rect x="0" y="190" width="820" height="26" fill="#f5f5f5"/>
56
+ <text x="212" y="207" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_B</text>
57
+ <circle cx="565" cy="203" r="5" fill="#1565C0"/>
58
+ <circle cx="587" cy="203" r="5" fill="#2E7D32"/>
59
+ <circle cx="588" cy="203" r="5" fill="#BF360C"/>
60
+ <text x="536" y="207" font-size="10" fill="#1565C0">15×</text>
61
+ <text x="596" y="207" font-size="10" fill="#2E7D32">18×</text>
62
+ <text x="623" y="207" font-size="10" fill="#BF360C">19×</text>
63
+ <rect x="0" y="216" width="820" height="26" fill="#ffffff"/>
64
+ <text x="212" y="233" text-anchor="end" font-size="11" fill="#424242">whitespace_heavy_60k</text>
65
+ <circle cx="551" cy="229" r="5" fill="#1565C0"/>
66
+ <circle cx="574" cy="229" r="5" fill="#2E7D32"/>
67
+ <circle cx="576" cy="229" r="5" fill="#BF360C"/>
68
+ <text x="522" y="233" font-size="10" fill="#1565C0">14×</text>
69
+ <text x="584" y="233" font-size="10" fill="#2E7D32">17×</text>
70
+ <text x="611" y="233" font-size="10" fill="#BF360C">17×</text>
71
+ <rect x="0" y="242" width="820" height="26" fill="#f5f5f5"/>
72
+ <text x="212" y="259" text-anchor="end" font-size="11" fill="#424242">sensor_data_50krows_50cols</text>
73
+ <circle cx="560" cy="255" r="5" fill="#1565C0"/>
74
+ <circle cx="571" cy="255" r="5" fill="#BF360C"/>
75
+ <circle cx="571" cy="255" r="5" fill="#2E7D32"/>
76
+ <text x="531" y="259" font-size="10" fill="#1565C0">15×</text>
77
+ <text x="579" y="259" font-size="10" fill="#BF360C">16×</text>
78
+ <text x="606" y="259" font-size="10" fill="#2E7D32">16×</text>
79
+ <rect x="0" y="268" width="820" height="26" fill="#ffffff"/>
80
+ <text x="212" y="285" text-anchor="end" font-size="11" fill="#424242">tab_separated_60k</text>
81
+ <circle cx="540" cy="281" r="5" fill="#1565C0"/>
82
+ <circle cx="563" cy="281" r="5" fill="#2E7D32"/>
83
+ <circle cx="565" cy="281" r="5" fill="#BF360C"/>
84
+ <text x="511" y="285" font-size="10" fill="#1565C0">13×</text>
85
+ <text x="573" y="285" font-size="10" fill="#2E7D32">15×</text>
86
+ <text x="600" y="285" font-size="10" fill="#BF360C">15×</text>
87
+ <rect x="0" y="294" width="820" height="26" fill="#f5f5f5"/>
88
+ <text x="212" y="311" text-anchor="end" font-size="11" fill="#424242">multi_char_separator_60k</text>
89
+ <circle cx="538" cy="307" r="5" fill="#1565C0"/>
90
+ <circle cx="561" cy="307" r="5" fill="#2E7D32"/>
91
+ <circle cx="563" cy="307" r="5" fill="#BF360C"/>
92
+ <text x="509" y="311" font-size="10" fill="#1565C0">13×</text>
93
+ <text x="571" y="311" font-size="10" fill="#2E7D32">15×</text>
94
+ <text x="598" y="311" font-size="10" fill="#BF360C">15×</text>
95
+ <rect x="0" y="320" width="820" height="26" fill="#ffffff"/>
96
+ <text x="212" y="337" text-anchor="end" font-size="11" fill="#424242">heavy_quoting_60k</text>
97
+ <circle cx="503" cy="333" r="5" fill="#1565C0"/>
98
+ <text x="467" y="337" font-size="10" fill="#1565C0">9.4×</text>
99
+ <circle cx="538" cy="333" r="5" fill="#BF360C"/>
100
+ <circle cx="560" cy="333" r="5" fill="#2E7D32"/>
101
+ <text x="568" y="337" font-size="10" fill="#BF360C">13×</text>
102
+ <text x="595" y="337" font-size="10" fill="#2E7D32">15×</text>
103
+ <rect x="0" y="346" width="820" height="26" fill="#f5f5f5"/>
104
+ <text x="212" y="363" text-anchor="end" font-size="11" fill="#424242">uszips</text>
105
+ <circle cx="508" cy="359" r="5" fill="#1565C0"/>
106
+ <text x="472" y="363" font-size="10" fill="#1565C0">9.8×</text>
107
+ <circle cx="542" cy="359" r="5" fill="#BF360C"/>
108
+ <circle cx="555" cy="359" r="5" fill="#2E7D32"/>
109
+ <text x="563" y="363" font-size="10" fill="#BF360C">13×</text>
110
+ <text x="590" y="363" font-size="10" fill="#2E7D32">14×</text>
111
+ <rect x="0" y="372" width="820" height="26" fill="#ffffff"/>
112
+ <text x="212" y="389" text-anchor="end" font-size="11" fill="#424242">uscities</text>
113
+ <circle cx="495" cy="385" r="5" fill="#1565C0"/>
114
+ <text x="459" y="389" font-size="10" fill="#1565C0">8.9×</text>
115
+ <circle cx="531" cy="385" r="5" fill="#BF360C"/>
116
+ <circle cx="548" cy="385" r="5" fill="#2E7D32"/>
117
+ <text x="556" y="389" font-size="10" fill="#BF360C">12×</text>
118
+ <text x="583" y="389" font-size="10" fill="#2E7D32">13×</text>
119
+ <rect x="0" y="398" width="820" height="26" fill="#f5f5f5"/>
120
+ <text x="212" y="415" text-anchor="end" font-size="11" fill="#424242">embedded_separators_60k</text>
121
+ <circle cx="482" cy="411" r="5" fill="#1565C0"/>
122
+ <text x="446" y="415" font-size="10" fill="#1565C0">8.0×</text>
123
+ <circle cx="530" cy="411" r="5" fill="#BF360C"/>
124
+ <circle cx="543" cy="411" r="5" fill="#2E7D32"/>
125
+ <text x="551" y="415" font-size="10" fill="#BF360C">12×</text>
126
+ <text x="578" y="415" font-size="10" fill="#2E7D32">13×</text>
127
+ <rect x="0" y="424" width="820" height="26" fill="#ffffff"/>
128
+ <text x="212" y="441" text-anchor="end" font-size="11" fill="#424242">many_empty_fields_60k</text>
129
+ <circle cx="514" cy="437" r="5" fill="#1565C0"/>
130
+ <circle cx="540" cy="437" r="5" fill="#2E7D32"/>
131
+ <circle cx="541" cy="437" r="5" fill="#BF360C"/>
132
+ <text x="485" y="441" font-size="10" fill="#1565C0">10×</text>
133
+ <text x="549" y="441" font-size="10" fill="#2E7D32">13×</text>
134
+ <text x="576" y="441" font-size="10" fill="#BF360C">13×</text>
135
+ <rect x="0" y="450" width="820" height="26" fill="#f5f5f5"/>
136
+ <text x="212" y="467" text-anchor="end" font-size="11" fill="#424242">worldcities</text>
137
+ <circle cx="484" cy="463" r="5" fill="#1565C0"/>
138
+ <text x="448" y="467" font-size="10" fill="#1565C0">8.1×</text>
139
+ <circle cx="523" cy="463" r="5" fill="#BF360C"/>
140
+ <circle cx="536" cy="463" r="5" fill="#2E7D32"/>
141
+ <text x="544" y="467" font-size="10" fill="#BF360C">11×</text>
142
+ <text x="571" y="467" font-size="10" fill="#2E7D32">12×</text>
143
+ <rect x="0" y="476" width="820" height="26" fill="#ffffff"/>
144
+ <text x="212" y="493" text-anchor="end" font-size="11" fill="#424242">wide_500_cols_20k</text>
145
+ <circle cx="525" cy="489" r="5" fill="#1565C0"/>
146
+ <circle cx="527" cy="489" r="5" fill="#BF360C"/>
147
+ <circle cx="527" cy="489" r="5" fill="#2E7D32"/>
148
+ <text x="496" y="493" font-size="10" fill="#1565C0">11×</text>
149
+ <text x="535" y="493" font-size="10" fill="#BF360C">11×</text>
150
+ <text x="562" y="493" font-size="10" fill="#2E7D32">11×</text>
151
+ <rect x="0" y="502" width="820" height="26" fill="#f5f5f5"/>
152
+ <text x="212" y="519" text-anchor="end" font-size="11" fill="#424242">utf8_multibyte_60k</text>
153
+ <circle cx="494" cy="515" r="5" fill="#1565C0"/>
154
+ <text x="458" y="519" font-size="10" fill="#1565C0">8.8×</text>
155
+ <circle cx="525" cy="515" r="5" fill="#BF360C"/>
156
+ <circle cx="525" cy="515" r="5" fill="#2E7D32"/>
157
+ <text x="533" y="519" font-size="10" fill="#BF360C">11×</text>
158
+ <text x="560" y="519" font-size="10" fill="#2E7D32">11×</text>
159
+ <rect x="0" y="528" width="820" height="26" fill="#ffffff"/>
160
+ <text x="212" y="545" text-anchor="end" font-size="11" fill="#424242">embedded_newlines_60k</text>
161
+ <circle cx="479" cy="541" r="5" fill="#1565C0"/>
162
+ <circle cx="495" cy="541" r="5" fill="#BF360C"/>
163
+ <circle cx="503" cy="541" r="5" fill="#2E7D32"/>
164
+ <text x="443" y="545" font-size="10" fill="#1565C0">7.8×</text>
165
+ <text x="511" y="545" font-size="10" fill="#BF360C">8.9×</text>
166
+ <text x="545" y="545" font-size="10" fill="#2E7D32">9.5×</text>
167
+ <rect x="0" y="554" width="820" height="26" fill="#f5f5f5"/>
168
+ <text x="212" y="571" text-anchor="end" font-size="11" fill="#424242">sample_100k</text>
169
+ <circle cx="470" cy="567" r="5" fill="#1565C0"/>
170
+ <circle cx="500" cy="567" r="5" fill="#2E7D32"/>
171
+ <circle cx="502" cy="567" r="5" fill="#BF360C"/>
172
+ <text x="434" y="571" font-size="10" fill="#1565C0">7.3×</text>
173
+ <text x="510" y="571" font-size="10" fill="#2E7D32">9.2×</text>
174
+ <text x="544" y="571" font-size="10" fill="#BF360C">9.4×</text>
175
+ <circle cx="228" cy="594" r="5" fill="#1565C0"/>
176
+ <text x="240" y="598" font-size="11" fill="#1565C0">C accelerated (v1.15.2)</text>
177
+ <circle cx="228" cy="614" r="5" fill="#BF360C"/>
178
+ <text x="240" y="618" font-size="11" fill="#BF360C">C accelerated (v1.16.4)</text>
179
+ <circle cx="228" cy="634" r="5" fill="#2E7D32"/>
180
+ <text x="240" y="638" font-size="11" fill="#2E7D32">C accelerated (v1.17.0)</text>
181
+ </svg>