smarter_csv 1.16.4 → 1.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +10 -1
- data/CHANGELOG.md +54 -0
- data/Gemfile +10 -5
- data/README.md +98 -14
- data/TO_DO.md +109 -0
- data/docs/_introduction.md +1 -0
- data/docs/bad_row_quarantine.md +2 -1
- data/docs/basic_read_api.md +6 -1
- data/docs/basic_write_api.md +30 -0
- data/docs/batch_processing.md +25 -0
- data/docs/column_selection.md +1 -0
- data/docs/data_transformations.md +1 -0
- data/docs/examples.md +126 -0
- data/docs/header_transformations.md +23 -0
- data/docs/header_validations.md +1 -0
- data/docs/history.md +1 -0
- data/docs/instrumentation.md +2 -1
- data/docs/migrating_from_csv.md +1 -0
- data/docs/options.md +20 -18
- data/docs/parsing_strategy.md +1 -0
- data/docs/real_world_csv.md +51 -1
- data/docs/releases/1.16.0/performance_notes.md +15 -15
- data/docs/releases/1.17.0/benchmarks.md +121 -0
- data/docs/releases/1.17.0/changes.md +161 -0
- data/docs/releases/1.17.0/performance_notes.md +126 -0
- data/docs/row_col_sep.md +21 -1
- data/docs/ruby_csv_pitfalls.md +1 -0
- data/docs/value_converters.md +24 -0
- data/docs/warnings.md +141 -0
- data/ext/smarter_csv/smarter_csv.c +98 -32
- data/images/SmarterCSV_1.17.0_vs_RubyCSV_3.3.5_speedup.svg +106 -0
- data/images/SmarterCSV_1.17.0_vs_previous_C-speedup.svg +181 -0
- data/images/SmarterCSV_1.17.0_vs_previous_Rb-speedup.svg +179 -0
- data/lib/smarter_csv/auto_detection.rb +215 -30
- data/lib/smarter_csv/file_io.rb +2 -2
- data/lib/smarter_csv/hash_transformations.rb +29 -13
- data/lib/smarter_csv/parser.rb +42 -33
- data/lib/smarter_csv/peekable_io.rb +453 -0
- data/lib/smarter_csv/reader.rb +119 -23
- data/lib/smarter_csv/reader_options.rb +61 -1
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv.rb +40 -12
- metadata +12 -5
- data/TO_DO_v2.md +0 -14
- data/ext/smarter_csv/Makefile +0 -270
|
@@ -134,24 +134,51 @@ static const rb_data_type_t parse_context_type = {
|
|
|
134
134
|
};
|
|
135
135
|
|
|
136
136
|
static VALUE unescape_quotes(char *str, long len, char quote_char, rb_encoding *encoding) {
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
137
|
+
// Fast path: scan for any doubled quote pair. If none present, the field has
|
|
138
|
+
// nothing to unescape — emit it directly via rb_enc_str_new and skip the
|
|
139
|
+
// temp buffer + byte-by-byte copy. memchr is SIMD-optimized; the scan cost
|
|
140
|
+
// is far less than the malloc/free pair this avoids.
|
|
141
|
+
char *p = str;
|
|
142
|
+
char *end = str + len;
|
|
143
|
+
while ((p = memchr(p, quote_char, end - p))) {
|
|
144
|
+
if (p + 1 < end && *(p + 1) == quote_char) goto needs_unescape;
|
|
145
|
+
p++;
|
|
146
|
+
}
|
|
147
|
+
return rb_enc_str_new(str, len, encoding);
|
|
148
|
+
|
|
149
|
+
needs_unescape:
|
|
150
|
+
// Slow path: at least one doubled quote pair was found. Allocate a temp
|
|
151
|
+
// buffer and walk byte-by-byte, collapsing "" → ".
|
|
152
|
+
{
|
|
153
|
+
char *buf = ALLOC_N(char, len);
|
|
154
|
+
long j = 0;
|
|
155
|
+
for (long i = 0; i < len; i++) {
|
|
156
|
+
if (str[i] == quote_char && i + 1 < len && str[i + 1] == quote_char) {
|
|
157
|
+
buf[j++] = quote_char;
|
|
158
|
+
i++; // skip second quote
|
|
159
|
+
} else {
|
|
160
|
+
buf[j++] = str[i];
|
|
161
|
+
}
|
|
145
162
|
}
|
|
163
|
+
VALUE out = rb_enc_str_new(buf, j, encoding);
|
|
164
|
+
xfree(buf);
|
|
165
|
+
return out;
|
|
146
166
|
}
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/* Helper: build the 2-element [elements, data_size] tuple returned by rb_parse_csv_line.
|
|
170
|
+
* Aligns this function's return shape with parse_csv_line_ruby and rb_parse_line_to_hash_ctx:
|
|
171
|
+
* data_size = -1 signals "unclosed quoted field — needs more data". */
|
|
172
|
+
static inline VALUE make_parse_result(VALUE elements, long data_size) {
|
|
173
|
+
VALUE result = rb_ary_new_capa(2);
|
|
174
|
+
rb_ary_push(result, elements);
|
|
175
|
+
rb_ary_push(result, LONG2FIX(data_size));
|
|
176
|
+
return result;
|
|
150
177
|
}
|
|
151
178
|
|
|
152
179
|
static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quote_char, VALUE max_size, VALUE has_quotes_val, VALUE strip_ws_val, VALUE allow_escaped_quotes_val, VALUE quote_boundary_standard_val, VALUE row_sep_val) {
|
|
153
180
|
if (RB_TYPE_P(line, T_NIL) == 1) {
|
|
154
|
-
return rb_ary_new();
|
|
181
|
+
return make_parse_result(rb_ary_new(), 0);
|
|
155
182
|
}
|
|
156
183
|
|
|
157
184
|
if (RB_TYPE_P(line, T_STRING) != 1) {
|
|
@@ -178,7 +205,7 @@ static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quot
|
|
|
178
205
|
if (max_size != Qnil) {
|
|
179
206
|
max_fields = NUM2INT(max_size);
|
|
180
207
|
if (max_fields < 0) {
|
|
181
|
-
return rb_ary_new();
|
|
208
|
+
return make_parse_result(rb_ary_new(), 0);
|
|
182
209
|
}
|
|
183
210
|
}
|
|
184
211
|
|
|
@@ -237,7 +264,7 @@ static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quot
|
|
|
237
264
|
rb_ary_push(elements, field);
|
|
238
265
|
}
|
|
239
266
|
|
|
240
|
-
return elements;
|
|
267
|
+
return make_parse_result(elements, RARRAY_LEN(elements));
|
|
241
268
|
}
|
|
242
269
|
|
|
243
270
|
// === SLOW PATH: Quoted fields or multi-char separator ===
|
|
@@ -350,7 +377,13 @@ static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quot
|
|
|
350
377
|
}
|
|
351
378
|
|
|
352
379
|
if (in_quotes) {
|
|
353
|
-
|
|
380
|
+
/* Unclosed quoted field at EOL: signal "needs more data" rather than raising.
|
|
381
|
+
* Aligns with parse_csv_line_ruby and rb_parse_line_to_hash_ctx, which both
|
|
382
|
+
* return data_size = -1 on this condition. The Reader's stitch loop consumes
|
|
383
|
+
* the signal: append the next physical line and re-parse, or raise MalformedCSV
|
|
384
|
+
* at EOF if the field never closes. The parser does not decide "ultimately
|
|
385
|
+
* malformed"; the caller does. */
|
|
386
|
+
return make_parse_result(rb_ary_new(), -1);
|
|
354
387
|
}
|
|
355
388
|
|
|
356
389
|
if ((max_fields < 0) || (element_count < max_fields)) {
|
|
@@ -384,7 +417,7 @@ static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quot
|
|
|
384
417
|
rb_ary_push(elements, field);
|
|
385
418
|
}
|
|
386
419
|
|
|
387
|
-
return elements;
|
|
420
|
+
return make_parse_result(elements, RARRAY_LEN(elements));
|
|
388
421
|
}
|
|
389
422
|
|
|
390
423
|
// Efficiently combine two arrays into a hash (replaces headers.zip(values).to_h)
|
|
@@ -484,6 +517,37 @@ static inline VALUE try_numeric_conversion(char *trim_start, long trimmed_len) {
|
|
|
484
517
|
return Qundef; // not numeric
|
|
485
518
|
}
|
|
486
519
|
|
|
520
|
+
/*
|
|
521
|
+
* leading_whitespace_len - byte length (1, 2, or 3) of the whitespace character at the start of
|
|
522
|
+
* `s` (with `len` bytes available), or 0 if `s` does not start with whitespace.
|
|
523
|
+
*
|
|
524
|
+
* "Whitespace" here matches Ruby's [[:space:]] / Rails' String#blank? — the Unicode White_Space
|
|
525
|
+
* set — so the C blank check stays consistent with the Ruby fallback path (hash_transformations).
|
|
526
|
+
* ASCII bytes are handled with a single comparison; the multibyte arms are only reached when a
|
|
527
|
+
* byte >= 0x80 appears, so all-ASCII fields pay nothing extra.
|
|
528
|
+
*/
|
|
529
|
+
static inline int leading_whitespace_len(const char *s, long len) {
|
|
530
|
+
if (len < 1) return 0;
|
|
531
|
+
unsigned char b0 = (unsigned char)s[0];
|
|
532
|
+
if (b0 == 0x20 || (b0 >= 0x09 && b0 <= 0x0D)) return 1; // space (most common) then \t \n \v \f \r
|
|
533
|
+
if (b0 < 0x80) return 0; // any other ASCII byte: not whitespace
|
|
534
|
+
if (len < 2) return 0;
|
|
535
|
+
unsigned char b1 = (unsigned char)s[1];
|
|
536
|
+
if (b0 == 0xC2 && (b1 == 0x85 || b1 == 0xA0)) return 2; // U+0085 NEL, U+00A0 NBSP
|
|
537
|
+
if (len < 3) return 0;
|
|
538
|
+
unsigned char b2 = (unsigned char)s[2];
|
|
539
|
+
if (b0 == 0xE1 && b1 == 0x9A && b2 == 0x80) return 3; // U+1680 OGHAM SPACE MARK
|
|
540
|
+
if (b0 == 0xE2) {
|
|
541
|
+
// U+2000..U+200A (E2 80 80..8A) — note: 0x8B is U+200B ZERO WIDTH SPACE, NOT whitespace.
|
|
542
|
+
// U+2028 LINE SEP (A8), U+2029 PARA SEP (A9), U+202F NARROW NBSP (AF), U+205F MMSP (E2 81 9F).
|
|
543
|
+
if (b1 == 0x80 && ((b2 >= 0x80 && b2 <= 0x8A) || b2 == 0xA8 || b2 == 0xA9 || b2 == 0xAF)) return 3;
|
|
544
|
+
if (b1 == 0x81 && b2 == 0x9F) return 3;
|
|
545
|
+
return 0;
|
|
546
|
+
}
|
|
547
|
+
if (b0 == 0xE3 && b1 == 0x80 && b2 == 0x80) return 3; // U+3000 IDEOGRAPHIC SPACE
|
|
548
|
+
return 0;
|
|
549
|
+
}
|
|
550
|
+
|
|
487
551
|
/*
|
|
488
552
|
* ================================================================================
|
|
489
553
|
* Transformation options struct - passed to insert_field_into_hash to avoid
|
|
@@ -541,17 +605,16 @@ static inline __attribute__((always_inline)) bool insert_field_into_hash(
|
|
|
541
605
|
VALUE key = get_key_for_index(element_count, opts->headers, opts->headers_len, opts->prefix_str);
|
|
542
606
|
|
|
543
607
|
// 1. Empty/blank field handling
|
|
544
|
-
//
|
|
545
|
-
//
|
|
546
|
-
//
|
|
608
|
+
// A field is blank if it is zero-length or consists entirely of whitespace characters.
|
|
609
|
+
// "Whitespace" matches Ruby's BLANK_RE = /\A[[:space:]]*\z/ (and Rails' String#blank?) — the
|
|
610
|
+
// Unicode White_Space set — so this stays consistent with the Ruby fallback path.
|
|
547
611
|
if (opts->remove_empty_values) {
|
|
548
612
|
bool is_blank = true;
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
}
|
|
613
|
+
long i = 0;
|
|
614
|
+
while (i < trimmed_len) {
|
|
615
|
+
int w = leading_whitespace_len(trim_start + i, trimmed_len - i);
|
|
616
|
+
if (w == 0) { is_blank = false; break; }
|
|
617
|
+
i += w;
|
|
555
618
|
}
|
|
556
619
|
if (is_blank) return false; // skip blank value
|
|
557
620
|
}
|
|
@@ -562,22 +625,25 @@ static inline __attribute__((always_inline)) bool insert_field_into_hash(
|
|
|
562
625
|
return false; // not a non-blank value
|
|
563
626
|
}
|
|
564
627
|
|
|
565
|
-
// 2. String-based zero check — matches /\
|
|
566
|
-
// Works independently of numeric conversion: "0", "00", "0.0", "00.00" etc.
|
|
628
|
+
// 2. String-based zero check — matches /\A[+-]?0+(?:\.0+)?\z/
|
|
629
|
+
// Works independently of numeric conversion: "0", "00", "0.0", "00.00", "+0", "-0.00" etc.
|
|
567
630
|
// Outer quotes are stripped before this call, so the check applies equally
|
|
568
631
|
// to quoted ("0") and unquoted (0) fields.
|
|
569
632
|
if (opts->remove_zero_values) {
|
|
570
|
-
|
|
571
|
-
//
|
|
572
|
-
|
|
633
|
+
char c0 = trim_start[0]; // trimmed_len > 0 guaranteed (zero-length handled above)
|
|
634
|
+
// Index i skips an optional leading sign; bail right away if the first byte can't begin a zero.
|
|
635
|
+
long i = (c0 == '0') ? 0
|
|
636
|
+
: (c0 == '+' || c0 == '-') ? 1
|
|
637
|
+
: trimmed_len;
|
|
638
|
+
if (i < trimmed_len && trim_start[i] == '0') {
|
|
573
639
|
while (i < trimmed_len && trim_start[i] == '0') i++;
|
|
574
|
-
if (i == trimmed_len) return false; // all zeros, e.g. "0", "00"
|
|
640
|
+
if (i == trimmed_len) return false; // all zeros, e.g. "0", "00", "+0", "-00"
|
|
575
641
|
if (trim_start[i] == '.') {
|
|
576
642
|
i++;
|
|
577
643
|
long dot_pos = i;
|
|
578
644
|
while (i < trimmed_len && trim_start[i] == '0') i++;
|
|
579
645
|
// Valid if we consumed everything AND had at least one zero after dot
|
|
580
|
-
if (i == trimmed_len && i > dot_pos) return false; // e.g. "0.0", "00.00"
|
|
646
|
+
if (i == trimmed_len && i > dot_pos) return false; // e.g. "0.0", "00.00", "+0.0", "-0.00"
|
|
581
647
|
}
|
|
582
648
|
}
|
|
583
649
|
}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="764" height="632"
|
|
2
|
+
font-family="ui-monospace, 'Cascadia Code', 'Courier New', monospace" font-size="12">
|
|
3
|
+
<rect width="764" height="632" fill="#ffffff"/>
|
|
4
|
+
<text x="382" y="20" text-anchor="middle" font-size="14" font-weight="bold" fill="#212121">SmarterCSV 1.17.0 vs Ruby CSV.read 3.3.5</text>
|
|
5
|
+
<text x="382" y="36" text-anchor="middle" font-size="10" fill="#9e9e9e">Speedup = CSV.read time ÷ SmarterCSV time (higher = SmarterCSV is faster) · Ruby 3.4.7 · best of 40</text>
|
|
6
|
+
<text x="490" y="620" text-anchor="middle" font-size="11" fill="#616161">Speedup (CSV.read ÷ SmarterCSV 1.17.0 C)</text>
|
|
7
|
+
<line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
|
|
8
|
+
<text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">0×</text>
|
|
9
|
+
<line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
|
|
10
|
+
<text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">1×</text>
|
|
11
|
+
<line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
|
|
12
|
+
<text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">2×</text>
|
|
13
|
+
<line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
|
|
14
|
+
<text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">3×</text>
|
|
15
|
+
<line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
|
|
16
|
+
<text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">4×</text>
|
|
17
|
+
<line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
|
|
18
|
+
<text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">5×</text>
|
|
19
|
+
<line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
|
|
20
|
+
<text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">6×</text>
|
|
21
|
+
<line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
|
|
22
|
+
<text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">7×</text>
|
|
23
|
+
<line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
|
|
24
|
+
<text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">8×</text>
|
|
25
|
+
<line x1="740" y1="62" x2="740" y2="594" stroke="#e0e0e0" stroke-width="1"/>
|
|
26
|
+
<text x="740" y="606" text-anchor="middle" font-size="11" fill="#757575">9×</text>
|
|
27
|
+
<line x1="296" y1="62" x2="296" y2="594" stroke="#9e9e9e" stroke-width="1.5" stroke-dasharray="4,3"/>
|
|
28
|
+
<line x1="240" y1="594" x2="740" y2="594" stroke="#bdbdbd" stroke-width="1"/>
|
|
29
|
+
<line x1="240" y1="62" x2="240" y2="594" stroke="#bdbdbd" stroke-width="1"/>
|
|
30
|
+
<rect x="0" y="62" width="764" height="28" fill="#f5f5f5"/>
|
|
31
|
+
<text x="232" y="80" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_C.csv</text>
|
|
32
|
+
<rect x="240" y="67" width="437" height="18" fill="#1565C0" rx="2"/>
|
|
33
|
+
<text x="673" y="80" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">7.87×</text>
|
|
34
|
+
<rect x="0" y="90" width="764" height="28" fill="#ffffff"/>
|
|
35
|
+
<text x="232" y="108" text-anchor="end" font-size="11" fill="#424242">uszips.csv</text>
|
|
36
|
+
<rect x="240" y="95" width="435" height="18" fill="#1565C0" rx="2"/>
|
|
37
|
+
<text x="671" y="108" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">7.83×</text>
|
|
38
|
+
<rect x="0" y="118" width="764" height="28" fill="#f5f5f5"/>
|
|
39
|
+
<text x="232" y="136" text-anchor="end" font-size="11" fill="#424242">long_fields_40k.csv</text>
|
|
40
|
+
<rect x="240" y="123" width="380" height="18" fill="#1565C0" rx="2"/>
|
|
41
|
+
<text x="616" y="136" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">6.83×</text>
|
|
42
|
+
<rect x="0" y="146" width="764" height="28" fill="#ffffff"/>
|
|
43
|
+
<text x="232" y="164" text-anchor="end" font-size="11" fill="#424242">worldcities.csv</text>
|
|
44
|
+
<rect x="240" y="151" width="378" height="18" fill="#1565C0" rx="2"/>
|
|
45
|
+
<text x="614" y="164" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">6.80×</text>
|
|
46
|
+
<rect x="0" y="174" width="764" height="28" fill="#f5f5f5"/>
|
|
47
|
+
<text x="232" y="192" text-anchor="end" font-size="11" fill="#424242">uscities.csv</text>
|
|
48
|
+
<rect x="240" y="179" width="371" height="18" fill="#1565C0" rx="2"/>
|
|
49
|
+
<text x="607" y="192" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">6.68×</text>
|
|
50
|
+
<rect x="0" y="202" width="764" height="28" fill="#ffffff"/>
|
|
51
|
+
<text x="232" y="220" text-anchor="end" font-size="11" fill="#424242">embedded_separators_60k.csv</text>
|
|
52
|
+
<rect x="240" y="207" width="307" height="18" fill="#1565C0" rx="2"/>
|
|
53
|
+
<text x="543" y="220" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">5.53×</text>
|
|
54
|
+
<rect x="0" y="230" width="764" height="28" fill="#f5f5f5"/>
|
|
55
|
+
<text x="232" y="248" text-anchor="end" font-size="11" fill="#424242">utf8_multibyte_60k.csv</text>
|
|
56
|
+
<rect x="240" y="235" width="216" height="18" fill="#1565C0" rx="2"/>
|
|
57
|
+
<text x="452" y="248" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">3.89×</text>
|
|
58
|
+
<rect x="0" y="258" width="764" height="28" fill="#ffffff"/>
|
|
59
|
+
<text x="232" y="276" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_NC.csv</text>
|
|
60
|
+
<rect x="240" y="263" width="212" height="18" fill="#1565C0" rx="2"/>
|
|
61
|
+
<text x="448" y="276" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">3.81×</text>
|
|
62
|
+
<rect x="0" y="286" width="764" height="28" fill="#f5f5f5"/>
|
|
63
|
+
<text x="232" y="304" text-anchor="end" font-size="11" fill="#424242">heavy_quoting_60k.csv</text>
|
|
64
|
+
<rect x="240" y="291" width="195" height="18" fill="#1565C0" rx="2"/>
|
|
65
|
+
<text x="431" y="304" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">3.51×</text>
|
|
66
|
+
<rect x="0" y="314" width="764" height="28" fill="#ffffff"/>
|
|
67
|
+
<text x="232" y="332" text-anchor="end" font-size="11" fill="#424242">many_empty_fields_60k.csv</text>
|
|
68
|
+
<rect x="240" y="319" width="194" height="18" fill="#1565C0" rx="2"/>
|
|
69
|
+
<text x="430" y="332" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">3.49×</text>
|
|
70
|
+
<rect x="0" y="342" width="764" height="28" fill="#f5f5f5"/>
|
|
71
|
+
<text x="232" y="360" text-anchor="end" font-size="11" fill="#424242">sample_100k.csv</text>
|
|
72
|
+
<rect x="240" y="347" width="191" height="18" fill="#1565C0" rx="2"/>
|
|
73
|
+
<text x="427" y="360" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">3.44×</text>
|
|
74
|
+
<rect x="0" y="370" width="764" height="28" fill="#ffffff"/>
|
|
75
|
+
<text x="232" y="388" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_NB.csv</text>
|
|
76
|
+
<rect x="240" y="375" width="175" height="18" fill="#1565C0" rx="2"/>
|
|
77
|
+
<text x="411" y="388" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">3.14×</text>
|
|
78
|
+
<rect x="0" y="398" width="764" height="28" fill="#f5f5f5"/>
|
|
79
|
+
<text x="232" y="416" text-anchor="end" font-size="11" fill="#424242">whitespace_heavy_60k.csv</text>
|
|
80
|
+
<rect x="240" y="403" width="166" height="18" fill="#1565C0" rx="2"/>
|
|
81
|
+
<text x="402" y="416" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">2.98×</text>
|
|
82
|
+
<rect x="0" y="426" width="764" height="28" fill="#ffffff"/>
|
|
83
|
+
<text x="232" y="444" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_B.csv</text>
|
|
84
|
+
<rect x="240" y="431" width="164" height="18" fill="#1565C0" rx="2"/>
|
|
85
|
+
<text x="400" y="444" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">2.95×</text>
|
|
86
|
+
<rect x="0" y="454" width="764" height="28" fill="#f5f5f5"/>
|
|
87
|
+
<text x="232" y="472" text-anchor="end" font-size="11" fill="#424242">embedded_newlines_60k.csv</text>
|
|
88
|
+
<rect x="240" y="459" width="162" height="18" fill="#1565C0" rx="2"/>
|
|
89
|
+
<text x="398" y="472" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">2.91×</text>
|
|
90
|
+
<rect x="0" y="482" width="764" height="28" fill="#ffffff"/>
|
|
91
|
+
<text x="232" y="500" text-anchor="end" font-size="11" fill="#424242">sensor_data_50krows_50cols.csv</text>
|
|
92
|
+
<rect x="240" y="487" width="141" height="18" fill="#1565C0" rx="2"/>
|
|
93
|
+
<text x="377" y="500" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">2.54×</text>
|
|
94
|
+
<rect x="0" y="510" width="764" height="28" fill="#f5f5f5"/>
|
|
95
|
+
<text x="232" y="528" text-anchor="end" font-size="11" fill="#424242">wide_500_cols_20k.csv</text>
|
|
96
|
+
<rect x="240" y="515" width="94" height="18" fill="#1565C0" rx="2"/>
|
|
97
|
+
<text x="330" y="528" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">1.69×</text>
|
|
98
|
+
<rect x="0" y="538" width="764" height="28" fill="#ffffff"/>
|
|
99
|
+
<text x="232" y="556" text-anchor="end" font-size="11" fill="#424242">tab_separated_60k.tsv</text>
|
|
100
|
+
<rect x="240" y="543" width="87" height="18" fill="#1565C0" rx="2"/>
|
|
101
|
+
<text x="323" y="556" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">1.56×</text>
|
|
102
|
+
<rect x="0" y="566" width="764" height="28" fill="#f5f5f5"/>
|
|
103
|
+
<text x="232" y="584" text-anchor="end" font-size="11" fill="#424242">multi_char_separator_60k.csv</text>
|
|
104
|
+
<rect x="240" y="571" width="75" height="18" fill="#1565C0" rx="2"/>
|
|
105
|
+
<text x="311" y="584" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">1.35×</text>
|
|
106
|
+
</svg>
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="820" height="668"
|
|
2
|
+
font-family="ui-monospace, 'Cascadia Code', 'Courier New', monospace" font-size="12">
|
|
3
|
+
<rect width="820" height="668" fill="#ffffff"/>
|
|
4
|
+
<text x="410" y="18" text-anchor="middle" font-size="13" font-weight="bold" fill="#212121">SmarterCSV improvements 1.15.2, 1.16.4, 1.17.0 vs 1.14.4 — C accelerated</text>
|
|
5
|
+
<text x="410" y="32" text-anchor="middle" font-size="10" fill="#9e9e9e">Speedup ratio = baseline version time ÷ newer version time (higher = newer version is faster)</text>
|
|
6
|
+
<text x="410" y="48" text-anchor="middle" font-size="11" fill="#616161">Ruby 3.4.7 [log scale, best of 40]</text>
|
|
7
|
+
<line x1="220" y1="68" x2="220" y2="580" stroke="#e0e0e0" stroke-width="1"/>
|
|
8
|
+
<text x="220" y="64" text-anchor="middle" font-size="11" fill="#757575">1×</text>
|
|
9
|
+
<line x1="307" y1="68" x2="307" y2="580" stroke="#e0e0e0" stroke-width="1"/>
|
|
10
|
+
<text x="307" y="64" text-anchor="middle" font-size="11" fill="#757575">2×</text>
|
|
11
|
+
<line x1="423" y1="68" x2="423" y2="580" stroke="#e0e0e0" stroke-width="1"/>
|
|
12
|
+
<text x="423" y="64" text-anchor="middle" font-size="11" fill="#757575">5×</text>
|
|
13
|
+
<line x1="510" y1="68" x2="510" y2="580" stroke="#e0e0e0" stroke-width="1"/>
|
|
14
|
+
<text x="510" y="64" text-anchor="middle" font-size="11" fill="#757575">10×</text>
|
|
15
|
+
<line x1="597" y1="68" x2="597" y2="580" stroke="#e0e0e0" stroke-width="1"/>
|
|
16
|
+
<text x="597" y="64" text-anchor="middle" font-size="11" fill="#757575">20×</text>
|
|
17
|
+
<line x1="713" y1="68" x2="713" y2="580" stroke="#e0e0e0" stroke-width="1"/>
|
|
18
|
+
<text x="713" y="64" text-anchor="middle" font-size="11" fill="#757575">50×</text>
|
|
19
|
+
<line x1="800" y1="68" x2="800" y2="580" stroke="#e0e0e0" stroke-width="1"/>
|
|
20
|
+
<text x="800" y="64" text-anchor="middle" font-size="11" fill="#757575">100×</text>
|
|
21
|
+
<line x1="220" y1="68" x2="220" y2="580" stroke="#9e9e9e" stroke-width="1.5"/>
|
|
22
|
+
<line x1="220" y1="68" x2="800" y2="68" stroke="#bdbdbd" stroke-width="1"/>
|
|
23
|
+
<rect x="0" y="86" width="820" height="26" fill="#f5f5f5"/>
|
|
24
|
+
<text x="212" y="103" text-anchor="end" font-size="11" fill="#424242">long_fields_40k</text>
|
|
25
|
+
<circle cx="645" cy="99" r="5" fill="#1565C0"/>
|
|
26
|
+
<text x="616" y="103" font-size="10" fill="#1565C0">29×</text>
|
|
27
|
+
<circle cx="744" cy="99" r="5" fill="#BF360C"/>
|
|
28
|
+
<text x="715" y="103" font-size="10" fill="#BF360C">64×</text>
|
|
29
|
+
<circle cx="775" cy="99" r="5" fill="#2E7D32"/>
|
|
30
|
+
<text x="783" y="103" font-size="10" fill="#2E7D32">82×</text>
|
|
31
|
+
<rect x="0" y="112" width="820" height="26" fill="#ffffff"/>
|
|
32
|
+
<text x="212" y="129" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_C</text>
|
|
33
|
+
<circle cx="674" cy="125" r="5" fill="#1565C0"/>
|
|
34
|
+
<circle cx="691" cy="125" r="5" fill="#2E7D32"/>
|
|
35
|
+
<circle cx="693" cy="125" r="5" fill="#BF360C"/>
|
|
36
|
+
<text x="645" y="129" font-size="10" fill="#1565C0">37×</text>
|
|
37
|
+
<text x="701" y="129" font-size="10" fill="#2E7D32">42×</text>
|
|
38
|
+
<text x="728" y="129" font-size="10" fill="#BF360C">43×</text>
|
|
39
|
+
<rect x="0" y="138" width="820" height="26" fill="#f5f5f5"/>
|
|
40
|
+
<text x="212" y="155" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_NC</text>
|
|
41
|
+
<circle cx="568" cy="151" r="5" fill="#1565C0"/>
|
|
42
|
+
<circle cx="592" cy="151" r="5" fill="#2E7D32"/>
|
|
43
|
+
<circle cx="596" cy="151" r="5" fill="#BF360C"/>
|
|
44
|
+
<text x="539" y="155" font-size="10" fill="#1565C0">16×</text>
|
|
45
|
+
<text x="604" y="155" font-size="10" fill="#2E7D32">19×</text>
|
|
46
|
+
<text x="631" y="155" font-size="10" fill="#BF360C">20×</text>
|
|
47
|
+
<rect x="0" y="164" width="820" height="26" fill="#ffffff"/>
|
|
48
|
+
<text x="212" y="181" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_NB</text>
|
|
49
|
+
<circle cx="566" cy="177" r="5" fill="#1565C0"/>
|
|
50
|
+
<circle cx="589" cy="177" r="5" fill="#2E7D32"/>
|
|
51
|
+
<circle cx="591" cy="177" r="5" fill="#BF360C"/>
|
|
52
|
+
<text x="537" y="181" font-size="10" fill="#1565C0">16×</text>
|
|
53
|
+
<text x="599" y="181" font-size="10" fill="#2E7D32">19×</text>
|
|
54
|
+
<text x="626" y="181" font-size="10" fill="#BF360C">19×</text>
|
|
55
|
+
<rect x="0" y="190" width="820" height="26" fill="#f5f5f5"/>
|
|
56
|
+
<text x="212" y="207" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_B</text>
|
|
57
|
+
<circle cx="565" cy="203" r="5" fill="#1565C0"/>
|
|
58
|
+
<circle cx="587" cy="203" r="5" fill="#2E7D32"/>
|
|
59
|
+
<circle cx="588" cy="203" r="5" fill="#BF360C"/>
|
|
60
|
+
<text x="536" y="207" font-size="10" fill="#1565C0">15×</text>
|
|
61
|
+
<text x="596" y="207" font-size="10" fill="#2E7D32">18×</text>
|
|
62
|
+
<text x="623" y="207" font-size="10" fill="#BF360C">19×</text>
|
|
63
|
+
<rect x="0" y="216" width="820" height="26" fill="#ffffff"/>
|
|
64
|
+
<text x="212" y="233" text-anchor="end" font-size="11" fill="#424242">whitespace_heavy_60k</text>
|
|
65
|
+
<circle cx="551" cy="229" r="5" fill="#1565C0"/>
|
|
66
|
+
<circle cx="574" cy="229" r="5" fill="#2E7D32"/>
|
|
67
|
+
<circle cx="576" cy="229" r="5" fill="#BF360C"/>
|
|
68
|
+
<text x="522" y="233" font-size="10" fill="#1565C0">14×</text>
|
|
69
|
+
<text x="584" y="233" font-size="10" fill="#2E7D32">17×</text>
|
|
70
|
+
<text x="611" y="233" font-size="10" fill="#BF360C">17×</text>
|
|
71
|
+
<rect x="0" y="242" width="820" height="26" fill="#f5f5f5"/>
|
|
72
|
+
<text x="212" y="259" text-anchor="end" font-size="11" fill="#424242">sensor_data_50krows_50cols</text>
|
|
73
|
+
<circle cx="560" cy="255" r="5" fill="#1565C0"/>
|
|
74
|
+
<circle cx="571" cy="255" r="5" fill="#BF360C"/>
|
|
75
|
+
<circle cx="571" cy="255" r="5" fill="#2E7D32"/>
|
|
76
|
+
<text x="531" y="259" font-size="10" fill="#1565C0">15×</text>
|
|
77
|
+
<text x="579" y="259" font-size="10" fill="#BF360C">16×</text>
|
|
78
|
+
<text x="606" y="259" font-size="10" fill="#2E7D32">16×</text>
|
|
79
|
+
<rect x="0" y="268" width="820" height="26" fill="#ffffff"/>
|
|
80
|
+
<text x="212" y="285" text-anchor="end" font-size="11" fill="#424242">tab_separated_60k</text>
|
|
81
|
+
<circle cx="540" cy="281" r="5" fill="#1565C0"/>
|
|
82
|
+
<circle cx="563" cy="281" r="5" fill="#2E7D32"/>
|
|
83
|
+
<circle cx="565" cy="281" r="5" fill="#BF360C"/>
|
|
84
|
+
<text x="511" y="285" font-size="10" fill="#1565C0">13×</text>
|
|
85
|
+
<text x="573" y="285" font-size="10" fill="#2E7D32">15×</text>
|
|
86
|
+
<text x="600" y="285" font-size="10" fill="#BF360C">15×</text>
|
|
87
|
+
<rect x="0" y="294" width="820" height="26" fill="#f5f5f5"/>
|
|
88
|
+
<text x="212" y="311" text-anchor="end" font-size="11" fill="#424242">multi_char_separator_60k</text>
|
|
89
|
+
<circle cx="538" cy="307" r="5" fill="#1565C0"/>
|
|
90
|
+
<circle cx="561" cy="307" r="5" fill="#2E7D32"/>
|
|
91
|
+
<circle cx="563" cy="307" r="5" fill="#BF360C"/>
|
|
92
|
+
<text x="509" y="311" font-size="10" fill="#1565C0">13×</text>
|
|
93
|
+
<text x="571" y="311" font-size="10" fill="#2E7D32">15×</text>
|
|
94
|
+
<text x="598" y="311" font-size="10" fill="#BF360C">15×</text>
|
|
95
|
+
<rect x="0" y="320" width="820" height="26" fill="#ffffff"/>
|
|
96
|
+
<text x="212" y="337" text-anchor="end" font-size="11" fill="#424242">heavy_quoting_60k</text>
|
|
97
|
+
<circle cx="503" cy="333" r="5" fill="#1565C0"/>
|
|
98
|
+
<text x="467" y="337" font-size="10" fill="#1565C0">9.4×</text>
|
|
99
|
+
<circle cx="538" cy="333" r="5" fill="#BF360C"/>
|
|
100
|
+
<circle cx="560" cy="333" r="5" fill="#2E7D32"/>
|
|
101
|
+
<text x="568" y="337" font-size="10" fill="#BF360C">13×</text>
|
|
102
|
+
<text x="595" y="337" font-size="10" fill="#2E7D32">15×</text>
|
|
103
|
+
<rect x="0" y="346" width="820" height="26" fill="#f5f5f5"/>
|
|
104
|
+
<text x="212" y="363" text-anchor="end" font-size="11" fill="#424242">uszips</text>
|
|
105
|
+
<circle cx="508" cy="359" r="5" fill="#1565C0"/>
|
|
106
|
+
<text x="472" y="363" font-size="10" fill="#1565C0">9.8×</text>
|
|
107
|
+
<circle cx="542" cy="359" r="5" fill="#BF360C"/>
|
|
108
|
+
<circle cx="555" cy="359" r="5" fill="#2E7D32"/>
|
|
109
|
+
<text x="563" y="363" font-size="10" fill="#BF360C">13×</text>
|
|
110
|
+
<text x="590" y="363" font-size="10" fill="#2E7D32">14×</text>
|
|
111
|
+
<rect x="0" y="372" width="820" height="26" fill="#ffffff"/>
|
|
112
|
+
<text x="212" y="389" text-anchor="end" font-size="11" fill="#424242">uscities</text>
|
|
113
|
+
<circle cx="495" cy="385" r="5" fill="#1565C0"/>
|
|
114
|
+
<text x="459" y="389" font-size="10" fill="#1565C0">8.9×</text>
|
|
115
|
+
<circle cx="531" cy="385" r="5" fill="#BF360C"/>
|
|
116
|
+
<circle cx="548" cy="385" r="5" fill="#2E7D32"/>
|
|
117
|
+
<text x="556" y="389" font-size="10" fill="#BF360C">12×</text>
|
|
118
|
+
<text x="583" y="389" font-size="10" fill="#2E7D32">13×</text>
|
|
119
|
+
<rect x="0" y="398" width="820" height="26" fill="#f5f5f5"/>
|
|
120
|
+
<text x="212" y="415" text-anchor="end" font-size="11" fill="#424242">embedded_separators_60k</text>
|
|
121
|
+
<circle cx="482" cy="411" r="5" fill="#1565C0"/>
|
|
122
|
+
<text x="446" y="415" font-size="10" fill="#1565C0">8.0×</text>
|
|
123
|
+
<circle cx="530" cy="411" r="5" fill="#BF360C"/>
|
|
124
|
+
<circle cx="543" cy="411" r="5" fill="#2E7D32"/>
|
|
125
|
+
<text x="551" y="415" font-size="10" fill="#BF360C">12×</text>
|
|
126
|
+
<text x="578" y="415" font-size="10" fill="#2E7D32">13×</text>
|
|
127
|
+
<rect x="0" y="424" width="820" height="26" fill="#ffffff"/>
|
|
128
|
+
<text x="212" y="441" text-anchor="end" font-size="11" fill="#424242">many_empty_fields_60k</text>
|
|
129
|
+
<circle cx="514" cy="437" r="5" fill="#1565C0"/>
|
|
130
|
+
<circle cx="540" cy="437" r="5" fill="#2E7D32"/>
|
|
131
|
+
<circle cx="541" cy="437" r="5" fill="#BF360C"/>
|
|
132
|
+
<text x="485" y="441" font-size="10" fill="#1565C0">10×</text>
|
|
133
|
+
<text x="549" y="441" font-size="10" fill="#2E7D32">13×</text>
|
|
134
|
+
<text x="576" y="441" font-size="10" fill="#BF360C">13×</text>
|
|
135
|
+
<rect x="0" y="450" width="820" height="26" fill="#f5f5f5"/>
|
|
136
|
+
<text x="212" y="467" text-anchor="end" font-size="11" fill="#424242">worldcities</text>
|
|
137
|
+
<circle cx="484" cy="463" r="5" fill="#1565C0"/>
|
|
138
|
+
<text x="448" y="467" font-size="10" fill="#1565C0">8.1×</text>
|
|
139
|
+
<circle cx="523" cy="463" r="5" fill="#BF360C"/>
|
|
140
|
+
<circle cx="536" cy="463" r="5" fill="#2E7D32"/>
|
|
141
|
+
<text x="544" y="467" font-size="10" fill="#BF360C">11×</text>
|
|
142
|
+
<text x="571" y="467" font-size="10" fill="#2E7D32">12×</text>
|
|
143
|
+
<rect x="0" y="476" width="820" height="26" fill="#ffffff"/>
|
|
144
|
+
<text x="212" y="493" text-anchor="end" font-size="11" fill="#424242">wide_500_cols_20k</text>
|
|
145
|
+
<circle cx="525" cy="489" r="5" fill="#1565C0"/>
|
|
146
|
+
<circle cx="527" cy="489" r="5" fill="#BF360C"/>
|
|
147
|
+
<circle cx="527" cy="489" r="5" fill="#2E7D32"/>
|
|
148
|
+
<text x="496" y="493" font-size="10" fill="#1565C0">11×</text>
|
|
149
|
+
<text x="535" y="493" font-size="10" fill="#BF360C">11×</text>
|
|
150
|
+
<text x="562" y="493" font-size="10" fill="#2E7D32">11×</text>
|
|
151
|
+
<rect x="0" y="502" width="820" height="26" fill="#f5f5f5"/>
|
|
152
|
+
<text x="212" y="519" text-anchor="end" font-size="11" fill="#424242">utf8_multibyte_60k</text>
|
|
153
|
+
<circle cx="494" cy="515" r="5" fill="#1565C0"/>
|
|
154
|
+
<text x="458" y="519" font-size="10" fill="#1565C0">8.8×</text>
|
|
155
|
+
<circle cx="525" cy="515" r="5" fill="#BF360C"/>
|
|
156
|
+
<circle cx="525" cy="515" r="5" fill="#2E7D32"/>
|
|
157
|
+
<text x="533" y="519" font-size="10" fill="#BF360C">11×</text>
|
|
158
|
+
<text x="560" y="519" font-size="10" fill="#2E7D32">11×</text>
|
|
159
|
+
<rect x="0" y="528" width="820" height="26" fill="#ffffff"/>
|
|
160
|
+
<text x="212" y="545" text-anchor="end" font-size="11" fill="#424242">embedded_newlines_60k</text>
|
|
161
|
+
<circle cx="479" cy="541" r="5" fill="#1565C0"/>
|
|
162
|
+
<circle cx="495" cy="541" r="5" fill="#BF360C"/>
|
|
163
|
+
<circle cx="503" cy="541" r="5" fill="#2E7D32"/>
|
|
164
|
+
<text x="443" y="545" font-size="10" fill="#1565C0">7.8×</text>
|
|
165
|
+
<text x="511" y="545" font-size="10" fill="#BF360C">8.9×</text>
|
|
166
|
+
<text x="545" y="545" font-size="10" fill="#2E7D32">9.5×</text>
|
|
167
|
+
<rect x="0" y="554" width="820" height="26" fill="#f5f5f5"/>
|
|
168
|
+
<text x="212" y="571" text-anchor="end" font-size="11" fill="#424242">sample_100k</text>
|
|
169
|
+
<circle cx="470" cy="567" r="5" fill="#1565C0"/>
|
|
170
|
+
<circle cx="500" cy="567" r="5" fill="#2E7D32"/>
|
|
171
|
+
<circle cx="502" cy="567" r="5" fill="#BF360C"/>
|
|
172
|
+
<text x="434" y="571" font-size="10" fill="#1565C0">7.3×</text>
|
|
173
|
+
<text x="510" y="571" font-size="10" fill="#2E7D32">9.2×</text>
|
|
174
|
+
<text x="544" y="571" font-size="10" fill="#BF360C">9.4×</text>
|
|
175
|
+
<circle cx="228" cy="594" r="5" fill="#1565C0"/>
|
|
176
|
+
<text x="240" y="598" font-size="11" fill="#1565C0">C accelerated (v1.15.2)</text>
|
|
177
|
+
<circle cx="228" cy="614" r="5" fill="#BF360C"/>
|
|
178
|
+
<text x="240" y="618" font-size="11" fill="#BF360C">C accelerated (v1.16.4)</text>
|
|
179
|
+
<circle cx="228" cy="634" r="5" fill="#2E7D32"/>
|
|
180
|
+
<text x="240" y="638" font-size="11" fill="#2E7D32">C accelerated (v1.17.0)</text>
|
|
181
|
+
</svg>
|