smarter_csv 1.15.2 → 1.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +9 -0
- data/CHANGELOG.md +68 -1
- data/CONTRIBUTORS.md +3 -1
- data/Gemfile +1 -0
- data/README.md +123 -27
- data/docs/_introduction.md +40 -24
- data/docs/bad_row_quarantine.md +285 -0
- data/docs/basic_read_api.md +151 -9
- data/docs/basic_write_api.md +474 -59
- data/docs/batch_processing.md +161 -4
- data/docs/column_selection.md +183 -0
- data/docs/data_transformations.md +162 -29
- data/docs/examples.md +339 -46
- data/docs/header_transformations.md +93 -12
- data/docs/header_validations.md +56 -18
- data/docs/history.md +117 -0
- data/docs/instrumentation.md +165 -0
- data/docs/migrating_from_csv.md +290 -0
- data/docs/options.md +150 -87
- data/docs/parsing_strategy.md +63 -1
- data/docs/real_world_csv.md +262 -0
- data/docs/releases/1.16.0/benchmarks.md +223 -0
- data/docs/releases/1.16.0/changes.md +272 -0
- data/docs/releases/1.16.0/performance_notes.md +114 -0
- data/docs/row_col_sep.md +14 -5
- data/docs/value_converters.md +193 -57
- data/ext/smarter_csv/extconf.rb +3 -0
- data/ext/smarter_csv/smarter_csv.c +1007 -71
- data/images/SmarterCSV_1.16.0_vs_RubyCSV_3.3.5_speedup.png +0 -0
- data/images/SmarterCSV_1.16.0_vs_RubyCSV_3.3.5_speedup.svg +108 -0
- data/images/SmarterCSV_1.16.0_vs_previous_C-speedup.png +0 -0
- data/images/SmarterCSV_1.16.0_vs_previous_C-speedup.svg +141 -0
- data/images/SmarterCSV_1.16.0_vs_previous_Rb-speedup.png +0 -0
- data/images/SmarterCSV_1.16.0_vs_previous_Rb-speedup.svg +139 -0
- data/lib/smarter_csv/errors.rb +8 -0
- data/lib/smarter_csv/file_io.rb +1 -1
- data/lib/smarter_csv/hash_transformations.rb +14 -13
- data/lib/smarter_csv/header_transformations.rb +21 -2
- data/lib/smarter_csv/headers.rb +2 -1
- data/lib/smarter_csv/options.rb +124 -7
- data/lib/smarter_csv/parser.rb +362 -75
- data/lib/smarter_csv/reader.rb +494 -46
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv/writer.rb +71 -19
- data/lib/smarter_csv.rb +95 -12
- data/smarter_csv.gemspec +20 -10
- metadata +37 -80
|
Binary file
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="764" height="632"
|
|
2
|
+
font-family="ui-monospace, 'Cascadia Code', 'Courier New', monospace" font-size="12">
|
|
3
|
+
<rect width="764" height="632" fill="#ffffff"/>
|
|
4
|
+
<text x="382" y="20" text-anchor="middle" font-size="14" font-weight="bold" fill="#212121">SmarterCSV 1.16.0 vs Ruby CSV.read 3.3.5</text>
|
|
5
|
+
<text x="382" y="36" text-anchor="middle" font-size="10" fill="#9e9e9e">Speedup = CSV.read time ÷ SmarterCSV time (higher = SmarterCSV is faster) · Ruby 3.4.7 · best of 30</text>
|
|
6
|
+
<text x="490" y="620" text-anchor="middle" font-size="11" fill="#616161">Speedup (CSV.read ÷ SmarterCSV 1.16.0 C)</text>
|
|
7
|
+
<line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
|
|
8
|
+
<text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">0×</text>
|
|
9
|
+
<line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
|
|
10
|
+
<text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">1×</text>
|
|
11
|
+
<line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
|
|
12
|
+
<text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">2×</text>
|
|
13
|
+
<line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
|
|
14
|
+
<text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">3×</text>
|
|
15
|
+
<line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
|
|
16
|
+
<text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">4×</text>
|
|
17
|
+
<line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
|
|
18
|
+
<text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">5×</text>
|
|
19
|
+
<line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
|
|
20
|
+
<text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">6×</text>
|
|
21
|
+
<line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
|
|
22
|
+
<text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">7×</text>
|
|
23
|
+
<line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
|
|
24
|
+
<text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">8×</text>
|
|
25
|
+
<line x1="240" y1="62" x2="240" y2="594" stroke="#e0e0e0" stroke-width="1"/>
|
|
26
|
+
<text x="240" y="606" text-anchor="middle" font-size="11" fill="#757575">9×</text>
|
|
27
|
+
<line x1="740" y1="62" x2="740" y2="594" stroke="#e0e0e0" stroke-width="1"/>
|
|
28
|
+
<text x="740" y="606" text-anchor="middle" font-size="11" fill="#757575">10×</text>
|
|
29
|
+
<line x1="290" y1="62" x2="290" y2="594" stroke="#9e9e9e" stroke-width="1.5" stroke-dasharray="4,3"/>
|
|
30
|
+
<line x1="240" y1="594" x2="740" y2="594" stroke="#bdbdbd" stroke-width="1"/>
|
|
31
|
+
<line x1="240" y1="62" x2="240" y2="594" stroke="#bdbdbd" stroke-width="1"/>
|
|
32
|
+
<rect x="0" y="62" width="764" height="28" fill="#f5f5f5"/>
|
|
33
|
+
<text x="232" y="80" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_C.csv</text>
|
|
34
|
+
<rect x="240" y="67" width="425" height="18" fill="#1565C0" rx="2"/>
|
|
35
|
+
<text x="661" y="80" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">8.51×</text>
|
|
36
|
+
<rect x="0" y="90" width="764" height="28" fill="#ffffff"/>
|
|
37
|
+
<text x="232" y="108" text-anchor="end" font-size="11" fill="#424242">uszips.csv</text>
|
|
38
|
+
<rect x="240" y="95" width="348" height="18" fill="#1565C0" rx="2"/>
|
|
39
|
+
<text x="584" y="108" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">6.97×</text>
|
|
40
|
+
<rect x="0" y="118" width="764" height="28" fill="#f5f5f5"/>
|
|
41
|
+
<text x="232" y="136" text-anchor="end" font-size="11" fill="#424242">worldcities.csv</text>
|
|
42
|
+
<rect x="240" y="123" width="311" height="18" fill="#1565C0" rx="2"/>
|
|
43
|
+
<text x="547" y="136" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">6.22×</text>
|
|
44
|
+
<rect x="0" y="146" width="764" height="28" fill="#ffffff"/>
|
|
45
|
+
<text x="232" y="164" text-anchor="end" font-size="11" fill="#424242">long_fields_20k.csv</text>
|
|
46
|
+
<rect x="240" y="151" width="261" height="18" fill="#1565C0" rx="2"/>
|
|
47
|
+
<text x="497" y="164" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">5.22×</text>
|
|
48
|
+
<rect x="0" y="174" width="764" height="28" fill="#f5f5f5"/>
|
|
49
|
+
<text x="232" y="192" text-anchor="end" font-size="11" fill="#424242">uscities.csv</text>
|
|
50
|
+
<rect x="240" y="179" width="248" height="18" fill="#1565C0" rx="2"/>
|
|
51
|
+
<text x="484" y="192" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">4.96×</text>
|
|
52
|
+
<rect x="0" y="202" width="764" height="28" fill="#ffffff"/>
|
|
53
|
+
<text x="232" y="220" text-anchor="end" font-size="11" fill="#424242">embedded_separators_20k.csv</text>
|
|
54
|
+
<rect x="240" y="207" width="241" height="18" fill="#1565C0" rx="2"/>
|
|
55
|
+
<text x="477" y="220" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">4.83×</text>
|
|
56
|
+
<rect x="0" y="230" width="764" height="28" fill="#f5f5f5"/>
|
|
57
|
+
<text x="232" y="248" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_NC.csv</text>
|
|
58
|
+
<rect x="240" y="235" width="226" height="18" fill="#1565C0" rx="2"/>
|
|
59
|
+
<text x="462" y="248" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">4.52×</text>
|
|
60
|
+
<rect x="0" y="258" width="764" height="28" fill="#ffffff"/>
|
|
61
|
+
<text x="232" y="276" text-anchor="end" font-size="11" fill="#424242">many_empty_fields_20k.csv</text>
|
|
62
|
+
<rect x="240" y="263" width="207" height="18" fill="#1565C0" rx="2"/>
|
|
63
|
+
<text x="443" y="276" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">4.15×</text>
|
|
64
|
+
<rect x="0" y="286" width="764" height="28" fill="#f5f5f5"/>
|
|
65
|
+
<text x="232" y="304" text-anchor="end" font-size="11" fill="#424242">utf8_multibyte_20k.csv</text>
|
|
66
|
+
<rect x="240" y="291" width="190" height="18" fill="#1565C0" rx="2"/>
|
|
67
|
+
<text x="426" y="304" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">3.81×</text>
|
|
68
|
+
<rect x="0" y="314" width="764" height="28" fill="#ffffff"/>
|
|
69
|
+
<text x="232" y="332" text-anchor="end" font-size="11" fill="#424242">tab_separated_20k.tsv</text>
|
|
70
|
+
<rect x="240" y="319" width="165" height="18" fill="#1565C0" rx="2"/>
|
|
71
|
+
<text x="401" y="332" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">3.30×</text>
|
|
72
|
+
<rect x="0" y="342" width="764" height="28" fill="#f5f5f5"/>
|
|
73
|
+
<text x="232" y="360" text-anchor="end" font-size="11" fill="#424242">sample_10M.csv</text>
|
|
74
|
+
<rect x="240" y="347" width="163" height="18" fill="#1565C0" rx="2"/>
|
|
75
|
+
<text x="399" y="360" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">3.26×</text>
|
|
76
|
+
<rect x="0" y="370" width="764" height="28" fill="#ffffff"/>
|
|
77
|
+
<text x="232" y="388" text-anchor="end" font-size="11" fill="#424242">whitespace_heavy_20k.csv</text>
|
|
78
|
+
<rect x="240" y="375" width="160" height="18" fill="#1565C0" rx="2"/>
|
|
79
|
+
<text x="396" y="388" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">3.19×</text>
|
|
80
|
+
<rect x="0" y="398" width="764" height="28" fill="#f5f5f5"/>
|
|
81
|
+
<text x="232" y="416" text-anchor="end" font-size="11" fill="#424242">heavy_quoting_20k.csv</text>
|
|
82
|
+
<rect x="240" y="403" width="157" height="18" fill="#1565C0" rx="2"/>
|
|
83
|
+
<text x="393" y="416" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">3.14×</text>
|
|
84
|
+
<rect x="0" y="426" width="764" height="28" fill="#ffffff"/>
|
|
85
|
+
<text x="232" y="444" text-anchor="end" font-size="11" fill="#424242">embedded_newlines_20k.csv</text>
|
|
86
|
+
<rect x="240" y="431" width="148" height="18" fill="#1565C0" rx="2"/>
|
|
87
|
+
<text x="384" y="444" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">2.96×</text>
|
|
88
|
+
<rect x="0" y="454" width="764" height="28" fill="#f5f5f5"/>
|
|
89
|
+
<text x="232" y="472" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_B.csv</text>
|
|
90
|
+
<rect x="240" y="459" width="147" height="18" fill="#1565C0" rx="2"/>
|
|
91
|
+
<text x="383" y="472" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">2.94×</text>
|
|
92
|
+
<rect x="0" y="482" width="764" height="28" fill="#ffffff"/>
|
|
93
|
+
<text x="232" y="500" text-anchor="end" font-size="11" fill="#424242">multi_char_separator_20k.csv</text>
|
|
94
|
+
<rect x="240" y="487" width="145" height="18" fill="#1565C0" rx="2"/>
|
|
95
|
+
<text x="381" y="500" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">2.91×</text>
|
|
96
|
+
<rect x="0" y="510" width="764" height="28" fill="#f5f5f5"/>
|
|
97
|
+
<text x="232" y="528" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_NB.csv</text>
|
|
98
|
+
<rect x="240" y="515" width="140" height="18" fill="#1565C0" rx="2"/>
|
|
99
|
+
<text x="376" y="528" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">2.81×</text>
|
|
100
|
+
<rect x="0" y="538" width="764" height="28" fill="#ffffff"/>
|
|
101
|
+
<text x="232" y="556" text-anchor="end" font-size="11" fill="#424242">sensor_data_50krows_50cols.csv</text>
|
|
102
|
+
<rect x="240" y="543" width="105" height="18" fill="#1565C0" rx="2"/>
|
|
103
|
+
<text x="341" y="556" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">2.11×</text>
|
|
104
|
+
<rect x="0" y="566" width="764" height="28" fill="#f5f5f5"/>
|
|
105
|
+
<text x="232" y="584" text-anchor="end" font-size="11" fill="#424242">wide_500_cols_20k.csv</text>
|
|
106
|
+
<rect x="240" y="571" width="88" height="18" fill="#1565C0" rx="2"/>
|
|
107
|
+
<text x="324" y="584" text-anchor="end" font-size="10" fill="#ffffff" font-weight="bold">1.75×</text>
|
|
108
|
+
</svg>
|
|
Binary file
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="820" height="648"
|
|
2
|
+
font-family="ui-monospace, 'Cascadia Code', 'Courier New', monospace" font-size="12">
|
|
3
|
+
<rect width="820" height="648" fill="#ffffff"/>
|
|
4
|
+
<text x="410" y="18" text-anchor="middle" font-size="13" font-weight="bold" fill="#212121">SmarterCSV improvements 1.15.2, 1.16.0 vs 1.14.4 — C accelerated</text>
|
|
5
|
+
<text x="410" y="32" text-anchor="middle" font-size="10" fill="#9e9e9e">Speedup ratio = baseline version time ÷ newer version time (higher = newer version is faster)</text>
|
|
6
|
+
<text x="410" y="48" text-anchor="middle" font-size="11" fill="#616161">Ruby 3.4.7 [log scale, best of 30]</text>
|
|
7
|
+
<line x1="220" y1="68" x2="220" y2="580" stroke="#e0e0e0" stroke-width="1"/>
|
|
8
|
+
<text x="220" y="64" text-anchor="middle" font-size="11" fill="#757575">1×</text>
|
|
9
|
+
<line x1="307" y1="68" x2="307" y2="580" stroke="#e0e0e0" stroke-width="1"/>
|
|
10
|
+
<text x="307" y="64" text-anchor="middle" font-size="11" fill="#757575">2×</text>
|
|
11
|
+
<line x1="423" y1="68" x2="423" y2="580" stroke="#e0e0e0" stroke-width="1"/>
|
|
12
|
+
<text x="423" y="64" text-anchor="middle" font-size="11" fill="#757575">5×</text>
|
|
13
|
+
<line x1="510" y1="68" x2="510" y2="580" stroke="#e0e0e0" stroke-width="1"/>
|
|
14
|
+
<text x="510" y="64" text-anchor="middle" font-size="11" fill="#757575">10×</text>
|
|
15
|
+
<line x1="597" y1="68" x2="597" y2="580" stroke="#e0e0e0" stroke-width="1"/>
|
|
16
|
+
<text x="597" y="64" text-anchor="middle" font-size="11" fill="#757575">20×</text>
|
|
17
|
+
<line x1="713" y1="68" x2="713" y2="580" stroke="#e0e0e0" stroke-width="1"/>
|
|
18
|
+
<text x="713" y="64" text-anchor="middle" font-size="11" fill="#757575">50×</text>
|
|
19
|
+
<line x1="800" y1="68" x2="800" y2="580" stroke="#e0e0e0" stroke-width="1"/>
|
|
20
|
+
<text x="800" y="64" text-anchor="middle" font-size="11" fill="#757575">100×</text>
|
|
21
|
+
<line x1="220" y1="68" x2="220" y2="580" stroke="#9e9e9e" stroke-width="1.5"/>
|
|
22
|
+
<line x1="220" y1="68" x2="800" y2="68" stroke="#bdbdbd" stroke-width="1"/>
|
|
23
|
+
<rect x="0" y="86" width="820" height="26" fill="#f5f5f5"/>
|
|
24
|
+
<text x="212" y="103" text-anchor="end" font-size="11" fill="#424242">long_fields_20k</text>
|
|
25
|
+
<circle cx="635" cy="99" r="5" fill="#1565C0"/>
|
|
26
|
+
<text x="606" y="103" font-size="10" fill="#1565C0">27×</text>
|
|
27
|
+
<circle cx="744" cy="99" r="5" fill="#BF360C"/>
|
|
28
|
+
<text x="752" y="103" font-size="10" fill="#BF360C">64×</text>
|
|
29
|
+
<rect x="0" y="112" width="820" height="26" fill="#ffffff"/>
|
|
30
|
+
<text x="212" y="129" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_C</text>
|
|
31
|
+
<circle cx="683" cy="125" r="5" fill="#1565C0"/>
|
|
32
|
+
<text x="654" y="129" font-size="10" fill="#1565C0">40×</text>
|
|
33
|
+
<circle cx="708" cy="125" r="5" fill="#BF360C"/>
|
|
34
|
+
<text x="716" y="132" font-size="10" fill="#BF360C">48×</text>
|
|
35
|
+
<rect x="0" y="138" width="820" height="26" fill="#f5f5f5"/>
|
|
36
|
+
<text x="212" y="155" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_NC</text>
|
|
37
|
+
<circle cx="595" cy="151" r="5" fill="#1565C0"/>
|
|
38
|
+
<text x="566" y="155" font-size="10" fill="#1565C0">20×</text>
|
|
39
|
+
<circle cx="619" cy="151" r="5" fill="#BF360C"/>
|
|
40
|
+
<text x="627" y="158" font-size="10" fill="#BF360C">24×</text>
|
|
41
|
+
<rect x="0" y="164" width="820" height="26" fill="#ffffff"/>
|
|
42
|
+
<text x="212" y="181" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_NB</text>
|
|
43
|
+
<circle cx="589" cy="177" r="5" fill="#1565C0"/>
|
|
44
|
+
<text x="560" y="181" font-size="10" fill="#1565C0">19×</text>
|
|
45
|
+
<circle cx="598" cy="177" r="5" fill="#BF360C"/>
|
|
46
|
+
<text x="606" y="184" font-size="10" fill="#BF360C">20×</text>
|
|
47
|
+
<rect x="0" y="190" width="820" height="26" fill="#f5f5f5"/>
|
|
48
|
+
<text x="212" y="207" text-anchor="end" font-size="11" fill="#424242">multi_char_separator_20k</text>
|
|
49
|
+
<circle cx="573" cy="203" r="5" fill="#1565C0"/>
|
|
50
|
+
<text x="544" y="207" font-size="10" fill="#1565C0">16×</text>
|
|
51
|
+
<circle cx="596" cy="203" r="5" fill="#BF360C"/>
|
|
52
|
+
<text x="604" y="210" font-size="10" fill="#BF360C">20×</text>
|
|
53
|
+
<rect x="0" y="216" width="820" height="26" fill="#ffffff"/>
|
|
54
|
+
<text x="212" y="233" text-anchor="end" font-size="11" fill="#424242">whitespace_heavy_20k</text>
|
|
55
|
+
<circle cx="562" cy="229" r="5" fill="#1565C0"/>
|
|
56
|
+
<text x="533" y="233" font-size="10" fill="#1565C0">15×</text>
|
|
57
|
+
<circle cx="591" cy="229" r="5" fill="#BF360C"/>
|
|
58
|
+
<text x="599" y="236" font-size="10" fill="#BF360C">19×</text>
|
|
59
|
+
<rect x="0" y="242" width="820" height="26" fill="#f5f5f5"/>
|
|
60
|
+
<text x="212" y="259" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_B</text>
|
|
61
|
+
<circle cx="572" cy="255" r="5" fill="#1565C0"/>
|
|
62
|
+
<text x="543" y="259" font-size="10" fill="#1565C0">16×</text>
|
|
63
|
+
<circle cx="591" cy="255" r="5" fill="#BF360C"/>
|
|
64
|
+
<text x="599" y="262" font-size="10" fill="#BF360C">19×</text>
|
|
65
|
+
<rect x="0" y="268" width="820" height="26" fill="#ffffff"/>
|
|
66
|
+
<text x="212" y="285" text-anchor="end" font-size="11" fill="#424242">tab_separated_20k</text>
|
|
67
|
+
<circle cx="547" cy="281" r="5" fill="#1565C0"/>
|
|
68
|
+
<text x="518" y="285" font-size="10" fill="#1565C0">13×</text>
|
|
69
|
+
<circle cx="590" cy="281" r="5" fill="#BF360C"/>
|
|
70
|
+
<text x="598" y="288" font-size="10" fill="#BF360C">19×</text>
|
|
71
|
+
<rect x="0" y="294" width="820" height="26" fill="#f5f5f5"/>
|
|
72
|
+
<text x="212" y="311" text-anchor="end" font-size="11" fill="#424242">many_empty_fields_20k</text>
|
|
73
|
+
<circle cx="539" cy="307" r="5" fill="#1565C0"/>
|
|
74
|
+
<text x="510" y="311" font-size="10" fill="#1565C0">13×</text>
|
|
75
|
+
<circle cx="563" cy="307" r="5" fill="#BF360C"/>
|
|
76
|
+
<text x="571" y="314" font-size="10" fill="#BF360C">15×</text>
|
|
77
|
+
<rect x="0" y="320" width="820" height="26" fill="#ffffff"/>
|
|
78
|
+
<text x="212" y="337" text-anchor="end" font-size="11" fill="#424242">sensor_data_50krows_50cols</text>
|
|
79
|
+
<circle cx="558" cy="333" r="5" fill="#1565C0"/>
|
|
80
|
+
<text x="529" y="337" font-size="10" fill="#1565C0">15×</text>
|
|
81
|
+
<circle cx="560" cy="333" r="5" fill="#BF360C"/>
|
|
82
|
+
<text x="568" y="340" font-size="10" fill="#BF360C">15×</text>
|
|
83
|
+
<rect x="0" y="346" width="820" height="26" fill="#f5f5f5"/>
|
|
84
|
+
<text x="212" y="363" text-anchor="end" font-size="11" fill="#424242">heavy_quoting_20k</text>
|
|
85
|
+
<circle cx="506" cy="359" r="5" fill="#1565C0"/>
|
|
86
|
+
<text x="470" y="363" font-size="10" fill="#1565C0">9.7×</text>
|
|
87
|
+
<circle cx="557" cy="359" r="5" fill="#BF360C"/>
|
|
88
|
+
<text x="565" y="363" font-size="10" fill="#BF360C">15×</text>
|
|
89
|
+
<rect x="0" y="372" width="820" height="26" fill="#ffffff"/>
|
|
90
|
+
<text x="212" y="389" text-anchor="end" font-size="11" fill="#424242">utf8_multibyte_20k</text>
|
|
91
|
+
<circle cx="524" cy="385" r="5" fill="#1565C0"/>
|
|
92
|
+
<text x="495" y="389" font-size="10" fill="#1565C0">11×</text>
|
|
93
|
+
<circle cx="549" cy="385" r="5" fill="#BF360C"/>
|
|
94
|
+
<text x="557" y="392" font-size="10" fill="#BF360C">14×</text>
|
|
95
|
+
<rect x="0" y="398" width="820" height="26" fill="#f5f5f5"/>
|
|
96
|
+
<text x="212" y="415" text-anchor="end" font-size="11" fill="#424242">wide_500_cols_20k</text>
|
|
97
|
+
<circle cx="538" cy="411" r="5" fill="#1565C0"/>
|
|
98
|
+
<text x="509" y="415" font-size="10" fill="#1565C0">12×</text>
|
|
99
|
+
<circle cx="541" cy="411" r="5" fill="#BF360C"/>
|
|
100
|
+
<text x="549" y="418" font-size="10" fill="#BF360C">13×</text>
|
|
101
|
+
<rect x="0" y="424" width="820" height="26" fill="#ffffff"/>
|
|
102
|
+
<text x="212" y="441" text-anchor="end" font-size="11" fill="#424242">uszips</text>
|
|
103
|
+
<circle cx="527" cy="437" r="5" fill="#1565C0"/>
|
|
104
|
+
<text x="498" y="441" font-size="10" fill="#1565C0">11×</text>
|
|
105
|
+
<circle cx="538" cy="437" r="5" fill="#BF360C"/>
|
|
106
|
+
<text x="546" y="444" font-size="10" fill="#BF360C">13×</text>
|
|
107
|
+
<rect x="0" y="450" width="820" height="26" fill="#f5f5f5"/>
|
|
108
|
+
<text x="212" y="467" text-anchor="end" font-size="11" fill="#424242">embedded_separators_20k</text>
|
|
109
|
+
<circle cx="492" cy="463" r="5" fill="#1565C0"/>
|
|
110
|
+
<text x="456" y="467" font-size="10" fill="#1565C0">8.7×</text>
|
|
111
|
+
<circle cx="525" cy="463" r="5" fill="#BF360C"/>
|
|
112
|
+
<text x="533" y="470" font-size="10" fill="#BF360C">11×</text>
|
|
113
|
+
<rect x="0" y="476" width="820" height="26" fill="#ffffff"/>
|
|
114
|
+
<text x="212" y="493" text-anchor="end" font-size="11" fill="#424242">worldcities</text>
|
|
115
|
+
<circle cx="500" cy="489" r="5" fill="#1565C0"/>
|
|
116
|
+
<text x="464" y="493" font-size="10" fill="#1565C0">9.2×</text>
|
|
117
|
+
<circle cx="522" cy="489" r="5" fill="#BF360C"/>
|
|
118
|
+
<text x="530" y="496" font-size="10" fill="#BF360C">11×</text>
|
|
119
|
+
<rect x="0" y="502" width="820" height="26" fill="#f5f5f5"/>
|
|
120
|
+
<text x="212" y="519" text-anchor="end" font-size="11" fill="#424242">sample_10M</text>
|
|
121
|
+
<circle cx="491" cy="515" r="5" fill="#1565C0"/>
|
|
122
|
+
<text x="455" y="519" font-size="10" fill="#1565C0">8.6×</text>
|
|
123
|
+
<circle cx="509" cy="515" r="5" fill="#BF360C"/>
|
|
124
|
+
<text x="517" y="522" font-size="10" fill="#BF360C">9.9×</text>
|
|
125
|
+
<rect x="0" y="528" width="820" height="26" fill="#ffffff"/>
|
|
126
|
+
<text x="212" y="545" text-anchor="end" font-size="11" fill="#424242">uscities</text>
|
|
127
|
+
<circle cx="502" cy="541" r="5" fill="#1565C0"/>
|
|
128
|
+
<text x="466" y="545" font-size="10" fill="#1565C0">9.4×</text>
|
|
129
|
+
<circle cx="507" cy="541" r="5" fill="#BF360C"/>
|
|
130
|
+
<text x="515" y="548" font-size="10" fill="#BF360C">9.8×</text>
|
|
131
|
+
<rect x="0" y="554" width="820" height="26" fill="#f5f5f5"/>
|
|
132
|
+
<text x="212" y="571" text-anchor="end" font-size="11" fill="#424242">embedded_newlines_20k</text>
|
|
133
|
+
<circle cx="505" cy="567" r="5" fill="#1565C0"/>
|
|
134
|
+
<text x="469" y="571" font-size="10" fill="#1565C0">9.6×</text>
|
|
135
|
+
<circle cx="507" cy="567" r="5" fill="#BF360C"/>
|
|
136
|
+
<text x="515" y="574" font-size="10" fill="#BF360C">9.8×</text>
|
|
137
|
+
<circle cx="228" cy="594" r="5" fill="#1565C0"/>
|
|
138
|
+
<text x="240" y="598" font-size="11" fill="#1565C0">C accelerated (v1.15.2)</text>
|
|
139
|
+
<circle cx="228" cy="614" r="5" fill="#BF360C"/>
|
|
140
|
+
<text x="240" y="618" font-size="11" fill="#BF360C">C accelerated (v1.16.0)</text>
|
|
141
|
+
</svg>
|
|
Binary file
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="820" height="648"
|
|
2
|
+
font-family="ui-monospace, 'Cascadia Code', 'Courier New', monospace" font-size="12">
|
|
3
|
+
<rect width="820" height="648" fill="#ffffff"/>
|
|
4
|
+
<text x="410" y="18" text-anchor="middle" font-size="13" font-weight="bold" fill="#212121">SmarterCSV improvements 1.15.2, 1.16.0 vs 1.14.4 — Ruby (not accelerated)</text>
|
|
5
|
+
<text x="410" y="32" text-anchor="middle" font-size="10" fill="#9e9e9e">Speedup ratio = baseline version time ÷ newer version time (higher = newer version is faster)</text>
|
|
6
|
+
<text x="410" y="48" text-anchor="middle" font-size="11" fill="#616161">Ruby 3.4.7 [log scale, best of 30]</text>
|
|
7
|
+
<line x1="220" y1="68" x2="220" y2="580" stroke="#e0e0e0" stroke-width="1"/>
|
|
8
|
+
<text x="220" y="64" text-anchor="middle" font-size="11" fill="#757575">1×</text>
|
|
9
|
+
<line x1="323" y1="68" x2="323" y2="580" stroke="#e0e0e0" stroke-width="1"/>
|
|
10
|
+
<text x="323" y="64" text-anchor="middle" font-size="11" fill="#757575">2×</text>
|
|
11
|
+
<line x1="459" y1="68" x2="459" y2="580" stroke="#e0e0e0" stroke-width="1"/>
|
|
12
|
+
<text x="459" y="64" text-anchor="middle" font-size="11" fill="#757575">5×</text>
|
|
13
|
+
<line x1="561" y1="68" x2="561" y2="580" stroke="#e0e0e0" stroke-width="1"/>
|
|
14
|
+
<text x="561" y="64" text-anchor="middle" font-size="11" fill="#757575">10×</text>
|
|
15
|
+
<line x1="664" y1="68" x2="664" y2="580" stroke="#e0e0e0" stroke-width="1"/>
|
|
16
|
+
<text x="664" y="64" text-anchor="middle" font-size="11" fill="#757575">20×</text>
|
|
17
|
+
<line x1="800" y1="68" x2="800" y2="580" stroke="#e0e0e0" stroke-width="1"/>
|
|
18
|
+
<text x="800" y="64" text-anchor="middle" font-size="11" fill="#757575">50×</text>
|
|
19
|
+
<line x1="220" y1="68" x2="220" y2="580" stroke="#9e9e9e" stroke-width="1.5"/>
|
|
20
|
+
<line x1="220" y1="68" x2="800" y2="68" stroke="#bdbdbd" stroke-width="1"/>
|
|
21
|
+
<rect x="0" y="86" width="820" height="26" fill="#f5f5f5"/>
|
|
22
|
+
<text x="212" y="103" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_C</text>
|
|
23
|
+
<rect x="563" y="94" width="10" height="10" fill="#1565C0"/>
|
|
24
|
+
<text x="539" y="103" font-size="10" fill="#1565C0">10×</text>
|
|
25
|
+
<rect x="660" y="94" width="10" height="10" fill="#BF360C"/>
|
|
26
|
+
<text x="673" y="103" font-size="10" fill="#BF360C">20×</text>
|
|
27
|
+
<rect x="0" y="112" width="820" height="26" fill="#ffffff"/>
|
|
28
|
+
<text x="212" y="129" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_NC</text>
|
|
29
|
+
<rect x="515" y="120" width="10" height="10" fill="#1565C0"/>
|
|
30
|
+
<text x="484" y="129" font-size="10" fill="#1565C0">7.6×</text>
|
|
31
|
+
<rect x="572" y="120" width="10" height="10" fill="#BF360C"/>
|
|
32
|
+
<text x="585" y="129" font-size="10" fill="#BF360C">11×</text>
|
|
33
|
+
<rect x="0" y="138" width="820" height="26" fill="#f5f5f5"/>
|
|
34
|
+
<text x="212" y="155" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_NB</text>
|
|
35
|
+
<rect x="535" y="146" width="10" height="10" fill="#1565C0"/>
|
|
36
|
+
<text x="504" y="155" font-size="10" fill="#1565C0">8.6×</text>
|
|
37
|
+
<rect x="554" y="146" width="10" height="10" fill="#BF360C"/>
|
|
38
|
+
<text x="567" y="158" font-size="10" fill="#BF360C">9.8×</text>
|
|
39
|
+
<rect x="0" y="164" width="820" height="26" fill="#ffffff"/>
|
|
40
|
+
<text x="212" y="181" text-anchor="end" font-size="11" fill="#424242">whitespace_heavy_20k</text>
|
|
41
|
+
<rect x="546" y="172" width="10" height="10" fill="#1565C0"/>
|
|
42
|
+
<text x="515" y="181" font-size="10" fill="#1565C0">9.3×</text>
|
|
43
|
+
<rect x="550" y="172" width="10" height="10" fill="#BF360C"/>
|
|
44
|
+
<text x="563" y="184" font-size="10" fill="#BF360C">9.6×</text>
|
|
45
|
+
<rect x="0" y="190" width="820" height="26" fill="#f5f5f5"/>
|
|
46
|
+
<text x="212" y="207" text-anchor="end" font-size="11" fill="#424242">PEOPLE_IMPORT_B</text>
|
|
47
|
+
<rect x="535" y="198" width="10" height="10" fill="#1565C0"/>
|
|
48
|
+
<text x="504" y="207" font-size="10" fill="#1565C0">8.7×</text>
|
|
49
|
+
<rect x="541" y="198" width="10" height="10" fill="#BF360C"/>
|
|
50
|
+
<text x="554" y="210" font-size="10" fill="#BF360C">9.0×</text>
|
|
51
|
+
<rect x="0" y="216" width="820" height="26" fill="#ffffff"/>
|
|
52
|
+
<text x="212" y="233" text-anchor="end" font-size="11" fill="#424242">tab_separated_20k</text>
|
|
53
|
+
<rect x="521" y="224" width="10" height="10" fill="#1565C0"/>
|
|
54
|
+
<text x="490" y="233" font-size="10" fill="#1565C0">7.9×</text>
|
|
55
|
+
<rect x="528" y="224" width="10" height="10" fill="#BF360C"/>
|
|
56
|
+
<text x="541" y="236" font-size="10" fill="#BF360C">8.2×</text>
|
|
57
|
+
<rect x="0" y="242" width="820" height="26" fill="#f5f5f5"/>
|
|
58
|
+
<text x="212" y="259" text-anchor="end" font-size="11" fill="#424242">multi_char_separator_20k</text>
|
|
59
|
+
<rect x="476" y="250" width="10" height="10" fill="#1565C0"/>
|
|
60
|
+
<text x="445" y="259" font-size="10" fill="#1565C0">5.8×</text>
|
|
61
|
+
<rect x="519" y="250" width="10" height="10" fill="#BF360C"/>
|
|
62
|
+
<text x="532" y="262" font-size="10" fill="#BF360C">7.8×</text>
|
|
63
|
+
<rect x="0" y="268" width="820" height="26" fill="#ffffff"/>
|
|
64
|
+
<text x="212" y="285" text-anchor="end" font-size="11" fill="#424242">wide_500_cols_20k</text>
|
|
65
|
+
<rect x="486" y="276" width="10" height="10" fill="#1565C0"/>
|
|
66
|
+
<text x="455" y="285" font-size="10" fill="#1565C0">6.2×</text>
|
|
67
|
+
<rect x="487" y="276" width="10" height="10" fill="#BF360C"/>
|
|
68
|
+
<text x="500" y="288" font-size="10" fill="#BF360C">6.3×</text>
|
|
69
|
+
<rect x="0" y="294" width="820" height="26" fill="#f5f5f5"/>
|
|
70
|
+
<text x="212" y="311" text-anchor="end" font-size="11" fill="#424242">utf8_multibyte_20k</text>
|
|
71
|
+
<rect x="473" y="302" width="10" height="10" fill="#1565C0"/>
|
|
72
|
+
<text x="442" y="311" font-size="10" fill="#1565C0">5.7×</text>
|
|
73
|
+
<rect x="481" y="302" width="10" height="10" fill="#BF360C"/>
|
|
74
|
+
<text x="494" y="314" font-size="10" fill="#BF360C">6.0×</text>
|
|
75
|
+
<rect x="0" y="320" width="820" height="26" fill="#ffffff"/>
|
|
76
|
+
<text x="212" y="337" text-anchor="end" font-size="11" fill="#424242">many_empty_fields_20k</text>
|
|
77
|
+
<rect x="395" y="328" width="10" height="10" fill="#1565C0"/>
|
|
78
|
+
<text x="364" y="337" font-size="10" fill="#1565C0">3.4×</text>
|
|
79
|
+
<rect x="461" y="328" width="10" height="10" fill="#BF360C"/>
|
|
80
|
+
<text x="474" y="337" font-size="10" fill="#BF360C">5.3×</text>
|
|
81
|
+
<rect x="0" y="346" width="820" height="26" fill="#f5f5f5"/>
|
|
82
|
+
<text x="212" y="363" text-anchor="end" font-size="11" fill="#424242">sensor_data_50krows_50cols</text>
|
|
83
|
+
<rect x="453" y="354" width="10" height="10" fill="#1565C0"/>
|
|
84
|
+
<text x="422" y="363" font-size="10" fill="#1565C0">5.0×</text>
|
|
85
|
+
<rect x="457" y="354" width="10" height="10" fill="#BF360C"/>
|
|
86
|
+
<text x="470" y="366" font-size="10" fill="#BF360C">5.1×</text>
|
|
87
|
+
<rect x="0" y="372" width="820" height="26" fill="#ffffff"/>
|
|
88
|
+
<text x="212" y="389" text-anchor="end" font-size="11" fill="#424242">sample_10M</text>
|
|
89
|
+
<rect x="435" y="380" width="10" height="10" fill="#1565C0"/>
|
|
90
|
+
<text x="404" y="389" font-size="10" fill="#1565C0">4.4×</text>
|
|
91
|
+
<rect x="447" y="380" width="10" height="10" fill="#BF360C"/>
|
|
92
|
+
<text x="460" y="392" font-size="10" fill="#BF360C">4.8×</text>
|
|
93
|
+
<rect x="0" y="398" width="820" height="26" fill="#f5f5f5"/>
|
|
94
|
+
<text x="212" y="415" text-anchor="end" font-size="11" fill="#424242">long_fields_20k</text>
|
|
95
|
+
<rect x="308" y="406" width="10" height="10" fill="#1565C0"/>
|
|
96
|
+
<text x="277" y="415" font-size="10" fill="#1565C0">1.9×</text>
|
|
97
|
+
<rect x="402" y="406" width="10" height="10" fill="#BF360C"/>
|
|
98
|
+
<text x="415" y="415" font-size="10" fill="#BF360C">3.5×</text>
|
|
99
|
+
<rect x="0" y="424" width="820" height="26" fill="#ffffff"/>
|
|
100
|
+
<text x="212" y="441" text-anchor="end" font-size="11" fill="#424242">heavy_quoting_20k</text>
|
|
101
|
+
<rect x="289" y="432" width="10" height="10" fill="#1565C0"/>
|
|
102
|
+
<text x="258" y="441" font-size="10" fill="#1565C0">1.7×</text>
|
|
103
|
+
<rect x="364" y="432" width="10" height="10" fill="#BF360C"/>
|
|
104
|
+
<text x="377" y="441" font-size="10" fill="#BF360C">2.7×</text>
|
|
105
|
+
<rect x="0" y="450" width="820" height="26" fill="#f5f5f5"/>
|
|
106
|
+
<text x="212" y="467" text-anchor="end" font-size="11" fill="#424242">worldcities</text>
|
|
107
|
+
<rect x="275" y="458" width="10" height="10" fill="#1565C0"/>
|
|
108
|
+
<text x="244" y="467" font-size="10" fill="#1565C0">1.5×</text>
|
|
109
|
+
<rect x="360" y="458" width="10" height="10" fill="#BF360C"/>
|
|
110
|
+
<text x="373" y="467" font-size="10" fill="#BF360C">2.7×</text>
|
|
111
|
+
<rect x="0" y="476" width="820" height="26" fill="#ffffff"/>
|
|
112
|
+
<text x="212" y="493" text-anchor="end" font-size="11" fill="#424242">embedded_separators_20k</text>
|
|
113
|
+
<rect x="280" y="484" width="10" height="10" fill="#1565C0"/>
|
|
114
|
+
<text x="249" y="493" font-size="10" fill="#1565C0">1.6×</text>
|
|
115
|
+
<rect x="357" y="484" width="10" height="10" fill="#BF360C"/>
|
|
116
|
+
<text x="370" y="493" font-size="10" fill="#BF360C">2.6×</text>
|
|
117
|
+
<rect x="0" y="502" width="820" height="26" fill="#f5f5f5"/>
|
|
118
|
+
<text x="212" y="519" text-anchor="end" font-size="11" fill="#424242">uscities</text>
|
|
119
|
+
<rect x="274" y="510" width="10" height="10" fill="#1565C0"/>
|
|
120
|
+
<text x="243" y="519" font-size="10" fill="#1565C0">1.5×</text>
|
|
121
|
+
<rect x="352" y="510" width="10" height="10" fill="#BF360C"/>
|
|
122
|
+
<text x="365" y="519" font-size="10" fill="#BF360C">2.5×</text>
|
|
123
|
+
<rect x="0" y="528" width="820" height="26" fill="#ffffff"/>
|
|
124
|
+
<text x="212" y="545" text-anchor="end" font-size="11" fill="#424242">uszips</text>
|
|
125
|
+
<rect x="274" y="536" width="10" height="10" fill="#1565C0"/>
|
|
126
|
+
<text x="243" y="545" font-size="10" fill="#1565C0">1.5×</text>
|
|
127
|
+
<rect x="348" y="536" width="10" height="10" fill="#BF360C"/>
|
|
128
|
+
<text x="361" y="545" font-size="10" fill="#BF360C">2.4×</text>
|
|
129
|
+
<rect x="0" y="554" width="820" height="26" fill="#f5f5f5"/>
|
|
130
|
+
<text x="212" y="571" text-anchor="end" font-size="11" fill="#424242">embedded_newlines_20k</text>
|
|
131
|
+
<rect x="322" y="562" width="10" height="10" fill="#1565C0"/>
|
|
132
|
+
<text x="291" y="571" font-size="10" fill="#1565C0">2.1×</text>
|
|
133
|
+
<rect x="336" y="562" width="10" height="10" fill="#BF360C"/>
|
|
134
|
+
<text x="349" y="574" font-size="10" fill="#BF360C">2.3×</text>
|
|
135
|
+
<rect x="223" y="589" width="10" height="10" fill="#1565C0"/>
|
|
136
|
+
<text x="240" y="598" font-size="11" fill="#1565C0">Ruby path (v1.15.2)</text>
|
|
137
|
+
<rect x="223" y="609" width="10" height="10" fill="#BF360C"/>
|
|
138
|
+
<text x="240" y="618" font-size="11" fill="#BF360C">Ruby path (v1.16.0)</text>
|
|
139
|
+
</svg>
|
data/lib/smarter_csv/errors.rb
CHANGED
|
@@ -2,11 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
module SmarterCSV
|
|
4
4
|
class Error < StandardError; end # new code should rescue this instead
|
|
5
|
+
|
|
5
6
|
# Reader:
|
|
6
7
|
class SmarterCSVException < Error; end # for backwards compatibility
|
|
7
8
|
class HeaderSizeMismatch < SmarterCSVException; end
|
|
8
9
|
class IncorrectOption < SmarterCSVException; end
|
|
9
10
|
class ValidationError < SmarterCSVException; end
|
|
11
|
+
|
|
10
12
|
class DuplicateHeaders < SmarterCSVException
|
|
11
13
|
attr_reader :headers
|
|
12
14
|
|
|
@@ -25,9 +27,15 @@ module SmarterCSV
|
|
|
25
27
|
end
|
|
26
28
|
end
|
|
27
29
|
|
|
30
|
+
class EmptyFileError < SmarterCSVException; end
|
|
28
31
|
class NoColSepDetected < SmarterCSVException; end
|
|
29
32
|
class KeyMappingError < SmarterCSVException; end
|
|
30
33
|
class MalformedCSV < SmarterCSVException; end
|
|
34
|
+
class FieldSizeLimitExceeded < SmarterCSVException; end
|
|
35
|
+
|
|
31
36
|
# Writer:
|
|
32
37
|
class InvalidInputData < SmarterCSVException; end
|
|
38
|
+
|
|
39
|
+
# Bad-row quarantine:
|
|
40
|
+
class TooManyBadRows < SmarterCSVException; end
|
|
33
41
|
end
|
data/lib/smarter_csv/file_io.rb
CHANGED
|
@@ -44,7 +44,7 @@ module SmarterCSV
|
|
|
44
44
|
return str.byteslice(2..-1) if [UTF_16_BOM, UTF_16LE_BOM].include?(str_as_hex[0..1])
|
|
45
45
|
|
|
46
46
|
# :nocov:
|
|
47
|
-
|
|
47
|
+
warn "SmarterCSV found unhandled BOM! #{str.chars[0..7].inspect}" unless @options[:verbose] == :quiet
|
|
48
48
|
str
|
|
49
49
|
# :nocov:
|
|
50
50
|
end
|
|
@@ -17,16 +17,26 @@ module SmarterCSV
|
|
|
17
17
|
|
|
18
18
|
remove_empty_values = options[:remove_empty_values] == true
|
|
19
19
|
remove_zero_values = options[:remove_zero_values]
|
|
20
|
-
|
|
20
|
+
nil_values_matching = options[:nil_values_matching]
|
|
21
21
|
convert_to_numeric = options[:convert_values_to_numeric]
|
|
22
22
|
value_converters = options[:value_converters]
|
|
23
23
|
|
|
24
24
|
# Early return if no transformations needed
|
|
25
|
-
return hash unless remove_empty_values || remove_zero_values ||
|
|
25
|
+
return hash unless remove_empty_values || remove_zero_values || nil_values_matching || convert_to_numeric || value_converters
|
|
26
26
|
|
|
27
27
|
keys_to_delete = []
|
|
28
28
|
|
|
29
29
|
hash.each do |k, v|
|
|
30
|
+
# Nil-ify values matching the pattern (keeps the key; remove_empty_values handles deletion)
|
|
31
|
+
if nil_values_matching
|
|
32
|
+
str_val = v.is_a?(String) ? v : (v.is_a?(Numeric) ? v.to_s : nil)
|
|
33
|
+
if str_val && nil_values_matching.match?(str_val)
|
|
34
|
+
hash[k] = nil
|
|
35
|
+
v = nil
|
|
36
|
+
# fall through: remove_empty_values will delete the key if true
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
30
40
|
# Check if this key/value should be removed
|
|
31
41
|
# Note: numeric values (Integer/Float) are never blank, so skip the blank check for them
|
|
32
42
|
if remove_empty_values && !v.is_a?(Numeric) && (has_rails ? v.blank? : blank?(v))
|
|
@@ -40,15 +50,6 @@ module SmarterCSV
|
|
|
40
50
|
next
|
|
41
51
|
end
|
|
42
52
|
|
|
43
|
-
# Match against string values, or against the string representation of numeric values
|
|
44
|
-
if remove_values_matching
|
|
45
|
-
str_val = v.is_a?(String) ? v : (v.is_a?(Numeric) ? v.to_s : nil)
|
|
46
|
-
if str_val && remove_values_matching.match?(str_val)
|
|
47
|
-
keys_to_delete << k
|
|
48
|
-
next
|
|
49
|
-
end
|
|
50
|
-
end
|
|
51
|
-
|
|
52
53
|
# Convert to numeric if requested
|
|
53
54
|
if convert_to_numeric && v.is_a?(String) && !limit_execution_for_only_or_except(options, :convert_values_to_numeric, k)
|
|
54
55
|
if FLOAT_REGEX.match?(v)
|
|
@@ -75,7 +76,7 @@ module SmarterCSV
|
|
|
75
76
|
# def hash_transformations(hash, options)
|
|
76
77
|
# remove_empty_values = options[:remove_empty_values] == true
|
|
77
78
|
# remove_zero_values = options[:remove_zero_values]
|
|
78
|
-
#
|
|
79
|
+
# nil_values_matching = options[:nil_values_matching] # replaces deprecated remove_values_matching
|
|
79
80
|
# convert_to_numeric = options[:convert_values_to_numeric]
|
|
80
81
|
# value_converters = options[:value_converters]
|
|
81
82
|
#
|
|
@@ -83,7 +84,7 @@ module SmarterCSV
|
|
|
83
84
|
# next if k.nil? || k == '' || k == :""
|
|
84
85
|
# next if remove_empty_values && (has_rails ? v.blank? : blank?(v))
|
|
85
86
|
# next if remove_zero_values && v.is_a?(String) && ZERO_REGEX.match?(v)
|
|
86
|
-
# next if
|
|
87
|
+
# next if nil_values_matching && nil_values_matching.match?(v)
|
|
87
88
|
#
|
|
88
89
|
# if convert_to_numeric && !limit_execution_for_only_or_except(options, :convert_values_to_numeric, k)
|
|
89
90
|
# if v.is_a?(String)
|
|
@@ -8,6 +8,9 @@ module SmarterCSV
|
|
|
8
8
|
header_array.map!{|x| x.strip} if options[:strip_whitespace]
|
|
9
9
|
|
|
10
10
|
unless options[:keep_original_headers]
|
|
11
|
+
# Normalize whitespace-only headers to "" before gsub so they are treated as
|
|
12
|
+
# blank/missing by disambiguate_headers rather than converted to "_".
|
|
13
|
+
header_array.map!{|x| blank?(x) ? '' : x} unless options[:strip_whitespace]
|
|
11
14
|
header_array.map!{|x| x.gsub(/\s+|-+/, '_')}
|
|
12
15
|
header_array.map!{|x| x.downcase} if options[:downcase_header]
|
|
13
16
|
end
|
|
@@ -24,9 +27,25 @@ module SmarterCSV
|
|
|
24
27
|
|
|
25
28
|
def disambiguate_headers(headers, options)
|
|
26
29
|
counts = Hash.new(0)
|
|
30
|
+
empty_count = 0
|
|
31
|
+
prefix = options[:missing_header_prefix] || 'column_'
|
|
32
|
+
# Pre-collect non-blank header names so auto-generated names can avoid collisions.
|
|
33
|
+
used = headers.reject { |h| blank?(h) }
|
|
27
34
|
headers.map do |header|
|
|
28
|
-
|
|
29
|
-
|
|
35
|
+
if blank?(header)
|
|
36
|
+
# Empty headers use missing_header_prefix (e.g. "column_1", "column_2") so they
|
|
37
|
+
# produce a usable key instead of :"" which gets silently deleted downstream.
|
|
38
|
+
# Skip ahead if the generated name collides with an existing header.
|
|
39
|
+
begin
|
|
40
|
+
empty_count += 1
|
|
41
|
+
candidate = "#{prefix}#{empty_count}"
|
|
42
|
+
end while used.include?(candidate)
|
|
43
|
+
used << candidate
|
|
44
|
+
candidate
|
|
45
|
+
else
|
|
46
|
+
counts[header] += 1
|
|
47
|
+
counts[header] > 1 ? "#{header}#{options[:duplicate_header_suffix]}#{counts[header]}" : header
|
|
48
|
+
end
|
|
30
49
|
end
|
|
31
50
|
end
|
|
32
51
|
|
data/lib/smarter_csv/headers.rb
CHANGED
|
@@ -13,7 +13,8 @@ module SmarterCSV
|
|
|
13
13
|
# process the header line in the CSV file..
|
|
14
14
|
# the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
|
|
15
15
|
header_line = @raw_header = next_line_with_counts(filehandle, options)
|
|
16
|
-
header_line = preprocess_header_line(header_line, options)
|
|
16
|
+
header_line = preprocess_header_line(header_line, options) unless header_line.nil?
|
|
17
|
+
raise SmarterCSV::EmptyFileError, "Empty CSV file" if blank?(header_line)
|
|
17
18
|
|
|
18
19
|
file_header_array, file_header_size = parse(header_line, options)
|
|
19
20
|
|