smarter_csv 1.17.3 → 1.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +50 -1
- data/CONTRIBUTORS.md +2 -1
- data/README.md +7 -2
- data/docs/data_transformations.md +33 -0
- data/docs/migrating_from_csv.md +18 -0
- data/docs/options.md +2 -1
- data/docs/upgrade_wizard.html +14 -10
- data/ext/smarter_csv/smarter_csv.c +204 -32
- data/ext/smarter_csv/vendor/LICENSE-fast_float-MIT +27 -0
- data/ext/smarter_csv/vendor/eisel_lemire.h +117 -0
- data/ext/smarter_csv/vendor/eisel_lemire.md +29 -0
- data/ext/smarter_csv/vendor/eisel_lemire_powers.h +663 -0
- data/lib/smarter_csv/hash_transformations.rb +51 -2
- data/lib/smarter_csv/reader.rb +18 -6
- data/lib/smarter_csv/reader_options.rb +24 -0
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv.rb +1 -0
- data/smarter_csv.gemspec +3 -0
- metadata +22 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3335e39a1c0792f01df9e95401c7f3885c49a0d64eeb9c76e5c20e25d01a62f5
|
|
4
|
+
data.tar.gz: e43f00228777b56fc1ee0814a74acaa6a23c51fe8da6f64e42ad92fe1b54002f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2abcd136f30d284c3c27cbd2b6c9782aec4235ec62cc27ea7620380ae9efc889f9f1c05c10ad957d6e7c84d65be75d35d96d4ac59ba5780dc5e65e0151c661e6
|
|
7
|
+
data.tar.gz: 6c61062c08d0a89dea2c91a7faafd88f6d09c88015ad8c7f4facb2eb474b44b18e0c5eaffc36fa5e2885f0b2c02487497f2460d66ce2433f62c09bf42d92a4ce
|
data/CHANGELOG.md
CHANGED
|
@@ -2,7 +2,56 @@
|
|
|
2
2
|
# SmarterCSV 1.x Change Log
|
|
3
3
|
|
|
4
4
|
> [!TIP]
|
|
5
|
-
> **Upgrading?** The [SmarterCSV Upgrade Wizard](https://tilo.github.io/smarter_csv/upgrade_wizard.html) walks you through what (if anything) you need to change for your specific version. Most
|
|
5
|
+
> **Upgrading?** The [SmarterCSV Upgrade Wizard](https://tilo.github.io/smarter_csv/upgrade_wizard.html) walks you through what (if anything) you need to change for your specific version. Most steps do not require any changes.
|
|
6
|
+
|
|
7
|
+
## 1.18.0 (2026-06-17)
|
|
8
|
+
|
|
9
|
+
This release is focused on both performance and the introduction of automatic conversion of decimals to big_decimal or float, preserving the precision, and also supporting scientific notation.
|
|
10
|
+
|
|
11
|
+
⚠️ This version is particularly interesting if you have geolocation, scientific, or high-precision data.
|
|
12
|
+
|
|
13
|
+
### New Features
|
|
14
|
+
|
|
15
|
+
- **`decimal_precision` option** (`:auto` default, or `:float` / `:bigdecimal`) — controls how decimal values are converted. `:auto` returns a `Float` unless the value carries more than 16 significant digits, in which case it returns a `BigDecimal` so no precision is lost; `:float` always returns `Float`; `:bigdecimal` always returns `BigDecimal`. Integers are unaffected (always `Integer`). Works identically on the C and Ruby paths. (Ruby's standard-library CSV has no high-precision option — its `:numeric`/`:float` converters use `Float()` and lose precision.)
|
|
16
|
+
- **Float** conversion on the C path now uses the fast **Eisel-Lemire** algorithm (fast_float, vendored) for mantissas up to 19 significant digits — correctly rounded, bit-for-bit identical to `String#to_f` — with a `strtod` fallback beyond that (more than 19 digits / extreme exponents). High-precision values that become `BigDecimal` under `:auto`/`:bigdecimal` are parsed by Ruby's `BigDecimal`.
|
|
17
|
+
|
|
18
|
+
### Behavior Changes
|
|
19
|
+
|
|
20
|
+
- **Scientific notation now converts to a number** (e.g. `"1e3"`, `"1.5e-5"`, `"6.022e23"`). Previously the Ruby path left these as Strings and the C path was inconsistent.
|
|
21
|
+
- **The C and Ruby numeric-conversion paths are now aligned.** Bare-dot forms like `".5"` and `"3."` stay Strings on **both** paths (the shared grammar requires an integer part and, when a dot is present, a fraction digit). Previously the C path converted these and the Ruby path did not.
|
|
22
|
+
- With the default `decimal_precision: :auto`, decimal values carrying more than 16 significant digits are now returned as `BigDecimal` instead of `Float`. Pass `decimal_precision: :float` to keep the previous always-`Float` behavior.
|
|
23
|
+
- `bigdecimal` is now a runtime dependency (it is no longer a default gem on Ruby 3.4+).
|
|
24
|
+
|
|
25
|
+
### Performance
|
|
26
|
+
|
|
27
|
+
The C-accelerated path is faster across the board, **up to ~1.5× on the right shapes** — numeric-heavy data and backslash-escaped quoted fields — and ~1.04–1.08× on typical files.
|
|
28
|
+
|
|
29
|
+
- Eisel-Lemire (Mushtak-Lemire) algorithm on the C path to convert decimals to `Float` or `BigDecimal`. Numeric-heavy data (many float/decimal columns) parses significantly faster.
|
|
30
|
+
- SIMD scanner for backslash-escaped quoted fields (C-path), using NEON (arm64) and SSE2 (x86-64) with a scalar fallback. Speeds up `quote_escaping: :backslash` parsing of long quoted fields.
|
|
31
|
+
|
|
32
|
+
| File | C-path | driver |
|
|
33
|
+
|---------------------------------|----------------------------------|-----------------------|
|
|
34
|
+
| backslash_long_fields_60k | 1.48× faster (0.1880s → 0.1273s) | SIMD quote/backslash scanner |
|
|
35
|
+
| sensor_data_50krows_50cols | 1.40× faster (0.2763s → 0.1975s) | Eisel-Lemire numeric conversion |
|
|
36
|
+
|
|
37
|
+
### Improvements
|
|
38
|
+
|
|
39
|
+
- Improved robustness of symbol-valued enum option processing.
|
|
40
|
+
|
|
41
|
+
### Tests
|
|
42
|
+
|
|
43
|
+
- added parity tests for long quoted-field scanning across 16-byte boundaries, running on both the C and Ruby paths.
|
|
44
|
+
- added tests for string-to-symbol coercion of the enum options.
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
## 1.17.4 (2026-06-03)
|
|
49
|
+
|
|
50
|
+
### Bug Fix
|
|
51
|
+
|
|
52
|
+
- fixed [Issue #337](https://github.com/tilo/smarter_csv/issues/337): `Pathname` input no longer worked (regression since 1.17.0); passing a `Pathname` raised `NoMethodError: private method 'gets' called`. `SmarterCSV` now opens any path-like input (`String` or `Pathname`) and reads directly from any already-open IO. Thanks to [Alex Shenia](https://github.com/alexshenia)
|
|
53
|
+
|
|
54
|
+
|
|
6
55
|
|
|
7
56
|
## 1.17.3 (2026-05-26)
|
|
8
57
|
|
data/CONTRIBUTORS.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# A Big Thank You to all
|
|
1
|
+
# A Big Thank You to all 65 Contributors!!
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
A Big Thank you to everyone who filed issues, sent comments, and who contributed with pull requests:
|
|
@@ -67,3 +67,4 @@ A Big Thank you to everyone who filed issues, sent comments, and who contributed
|
|
|
67
67
|
* [Paho Lurie-Gregg](https://github.com/paholg)
|
|
68
68
|
* [Jonas Staškevičius](https://github.com/pirminis)
|
|
69
69
|
* [conorg](https://github.com/conorg)
|
|
70
|
+
* [Alex Shenia](https://github.com/alexshenia)
|
data/README.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
 [](https://codecov.io/gh/tilo/smarter_csv) [](https://rubygems.org/gems/smarter_csv) [](https://rubygems.org/gems/smarter_csv) [](https://www.ruby-toolbox.com/projects/smarter_csv) [](https://tilo.github.io/smarter_csv/upgrade_wizard.html)
|
|
5
5
|
|
|
6
6
|
> [!TIP]
|
|
7
|
-
> **Upgrading from an older version?** Use the [SmarterCSV Upgrade Wizard](https://tilo.github.io/smarter_csv/upgrade_wizard.html) to walk through what (if anything) you need to change for your specific version. Most
|
|
7
|
+
> **Upgrading from an older version?** Use the [SmarterCSV Upgrade Wizard](https://tilo.github.io/smarter_csv/upgrade_wizard.html) to walk through what (if anything) you need to change for your specific version. Most steps do not require any changes.
|
|
8
8
|
|
|
9
9
|
SmarterCSV is a high-performance CSV ingestion and generation for Ruby, focused on fast end-to-end CSV ingestion of real-world data — no silent failures, no surprises, not just tokenization.
|
|
10
10
|
|
|
@@ -15,6 +15,9 @@
|
|
|
15
15
|
|
|
16
16
|
> See [**Ruby CSV Pitfalls**](docs/ruby_csv_pitfalls.md) for 10 ways `CSV.read` silently corrupts or loses data, and how SmarterCSV handles them.
|
|
17
17
|
|
|
18
|
+
> [!TIP]
|
|
19
|
+
> **No silent precision loss (new in 1.18.0).** For scientific data, GPS/geo coordinates, and financial figures — which routinely carry 16+ significant digits — Ruby's standard CSV converts with `Float()`, so a value like `1234567890.123456789` is silently rounded to `1234567890.1234567`. SmarterCSV's default `decimal_precision: :auto` returns a `BigDecimal` for values beyond 16 significant digits (and `Float` otherwise) — full precision, no data loss. Floats are decoded with the Eisel-Lemire algorithm: correctly rounded, bit-for-bit identical to `String#to_f`.
|
|
20
|
+
|
|
18
21
|
Beyond raw speed, SmarterCSV is designed to provide a significantly more convenient and developer-friendly interface than traditional CSV libraries. Instead of returning raw arrays that require substantial post-processing, SmarterCSV produces Rails-ready hashes for each row, making the data immediately usable with ActiveRecord, Sidekiq pipelines, parallel processing, and JSON-based workflows such as S3.
|
|
19
22
|
|
|
20
23
|
In a Rails app, warnings auto-route through `Rails.logger` and instrumentation hooks compose with `ActiveSupport::Notifications` — no setup required. Outside Rails, warnings fall back to `$stderr` and the same APIs work without any framework dependency.
|
|
@@ -89,6 +92,8 @@ rows = SmarterCSV.process('data.csv')
|
|
|
89
92
|
data = SmarterCSV.parse(csv_string)
|
|
90
93
|
```
|
|
91
94
|
|
|
95
|
+
Numeric conversion is also more accurate: where Ruby's `:numeric`/`:float` converters round high-precision decimals through `Float()`, SmarterCSV's default `decimal_precision: :auto` returns a `BigDecimal` past 16 significant digits, so no precision is lost (pass `decimal_precision: :float` for like-for-like `Float` output).
|
|
96
|
+
|
|
92
97
|
* See [**Migrating from Ruby CSV**](docs/migrating_from_csv.md) for a full comparison of options, behavior differences, and a quick-reference table.
|
|
93
98
|
|
|
94
99
|
## Examples
|
|
@@ -336,7 +341,7 @@ For reporting issues, please:
|
|
|
336
341
|
* open a pull-request adding a test that demonstrates the issue
|
|
337
342
|
* mention your version of SmarterCSV, Ruby, Rails
|
|
338
343
|
|
|
339
|
-
# [A Special Thanks to all
|
|
344
|
+
# [A Special Thanks to all 65 Contributors!](CONTRIBUTORS.md) 🎉🎉🎉
|
|
340
345
|
|
|
341
346
|
|
|
342
347
|
## Contributing
|
|
@@ -156,6 +156,39 @@ data = SmarterCSV.process(file,
|
|
|
156
156
|
convert_values_to_numeric: { only: [:quantity, :price] })
|
|
157
157
|
```
|
|
158
158
|
|
|
159
|
+
Scientific notation (e.g. `"1.5e3"`, `"6.022e23"`) is recognized and converted too. Bare-dot forms like `".5"` and `"3."` are left as Strings (they are not valid numbers here). Integers and floats convert identically on the C-accelerated and pure-Ruby paths.
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
## `decimal_precision`
|
|
164
|
+
|
|
165
|
+
**Default: `:auto`**
|
|
166
|
+
|
|
167
|
+
Controls how decimal values (those with a `.` or an exponent) are converted. Integers are unaffected — they are always returned as `Integer`.
|
|
168
|
+
|
|
169
|
+
| Value | Result |
|
|
170
|
+
|---------------|-----------------------------------------------------------------------------------------|
|
|
171
|
+
| `:auto` | `Float`, unless the value carries more than 16 significant digits — then `BigDecimal`. |
|
|
172
|
+
| `:float` | Always `Float` (correctly rounded; matches `String#to_f`). |
|
|
173
|
+
| `:bigdecimal` | Always `BigDecimal` (full precision). |
|
|
174
|
+
|
|
175
|
+
```ruby
|
|
176
|
+
# :auto (default) — keeps full precision only when needed
|
|
177
|
+
SmarterCSV.process(file)
|
|
178
|
+
# "3.14" => 3.14 (Float)
|
|
179
|
+
# "1234567890.123456789" => 0.1234567890123456789e10 (BigDecimal — >16 sig digits)
|
|
180
|
+
|
|
181
|
+
# :float — always Float (faster, may lose precision on long decimals)
|
|
182
|
+
SmarterCSV.process(file, decimal_precision: :float)
|
|
183
|
+
# "1234567890.123456789" => 1234567890.1234567 (Float)
|
|
184
|
+
|
|
185
|
+
# :bigdecimal — always BigDecimal
|
|
186
|
+
SmarterCSV.process(file, decimal_precision: :bigdecimal)
|
|
187
|
+
# "3.14" => 0.314e1 (BigDecimal)
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
Unlike Ruby's standard-library CSV — whose `:numeric`/`:float` converters use `Float()` and silently lose precision — `:auto` preserves high-precision decimals as `BigDecimal`. Decimal values are decoded on the C path with the Eisel-Lemire algorithm (correctly rounded, identical to `String#to_f`).
|
|
191
|
+
|
|
159
192
|
---
|
|
160
193
|
|
|
161
194
|
## `remove_empty_hashes`
|
data/docs/migrating_from_csv.md
CHANGED
|
@@ -223,6 +223,24 @@ rows = SmarterCSV.process('sample.csv',
|
|
|
223
223
|
convert_values_to_numeric: { except: [:zip_code, :phone, :account_number] })
|
|
224
224
|
```
|
|
225
225
|
|
|
226
|
+
**High-precision decimals — scientific data and geo coordinates.** GPS/geo coordinates, scientific measurements, and financial figures routinely carry 16+ significant digits, where Ruby's `Float()`-based conversion (`converters: :numeric` / `:float`) silently rounds the value. SmarterCSV's default `decimal_precision: :auto` returns a `BigDecimal` once a value exceeds 16 significant digits (and a `Float` otherwise), so the full value is preserved; scientific notation (`6.022e23`, `1.6e-19`) is recognized as numeric too.
|
|
227
|
+
|
|
228
|
+
**With Ruby CSV (precision lost):**
|
|
229
|
+
```ruby
|
|
230
|
+
CSV.read('locations.csv', headers: true, converters: :float).first['lat']
|
|
231
|
+
# => -122.42200352825247 ← Float() dropped the last digits of -122.422003528252475
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
**With SmarterCSV (full precision kept):**
|
|
235
|
+
```ruby
|
|
236
|
+
SmarterCSV.process('locations.csv').first[:lat]
|
|
237
|
+
# => -0.122422003528252475e3 (BigDecimal — all 18 significant digits preserved)
|
|
238
|
+
|
|
239
|
+
# Force Float everywhere, like-for-like with Ruby CSV:
|
|
240
|
+
SmarterCSV.process('locations.csv', decimal_precision: :float).first[:lat]
|
|
241
|
+
# => -122.42200352825247 (Float)
|
|
242
|
+
```
|
|
243
|
+
|
|
226
244
|
### 3. Empty values are removed by default
|
|
227
245
|
|
|
228
246
|
SmarterCSV drops key/value pairs where the value is `nil` or blank
|
data/docs/options.md
CHANGED
|
@@ -121,7 +121,8 @@ See [Parsing Strategy](./parsing_strategy.md) for full details on quote handling
|
|
|
121
121
|
| Option | Default | Explanation |
|
|
122
122
|
|--------|---------|-------------|
|
|
123
123
|
| `:strip_whitespace` | `true` | Remove whitespace before/after values and headers. |
|
|
124
|
-
| `:convert_values_to_numeric` | `true` | Convert strings containing integers or floats to the appropriate numeric type. Accepts `{except: [:key1, :key2]}` or `{only: :key3}` to limit which columns. |
|
|
124
|
+
| `:convert_values_to_numeric` | `true` | Convert strings containing integers or floats (including scientific notation like `1.5e3`) to the appropriate numeric type. Accepts `{except: [:key1, :key2]}` or `{only: :key3}` to limit which columns. |
|
|
125
|
+
| `:decimal_precision` | `:auto` | How decimals are converted: `:auto` returns `Float` but `BigDecimal` above 16 significant digits (no precision loss); `:float` always returns `Float`; `:bigdecimal` always returns `BigDecimal`. Integers are unaffected. |
|
|
125
126
|
| `:value_converters` | `nil` | Hash of `:header => converter`; converter can be a lambda/Proc or a class implementing `self.convert(value)`. See [Value Converters](./value_converters.md). |
|
|
126
127
|
| `:remove_empty_values` | `true` | Remove key/value pairs where the value is `nil`, empty, or whitespace-only — any Unicode whitespace, same as Ruby's `String#blank?`. |
|
|
127
128
|
| `:remove_zero_values` | `false` | Remove key/value pairs whose value is zero — numeric `0` / `0.0`, or any textual form of zero (`"0"`, `"0.0"`, `"00.00"`, `"+0"`, `"-0.0"`, …). |
|
data/docs/upgrade_wizard.html
CHANGED
|
@@ -153,7 +153,7 @@ button.primary:disabled:hover { background: #e6e6e6; }
|
|
|
153
153
|
<body>
|
|
154
154
|
|
|
155
155
|
<h1>SmarterCSV Upgrade Wizard</h1>
|
|
156
|
-
<p class="muted">This wizard walks you from your current version to the latest, one
|
|
156
|
+
<p class="muted">This wizard walks you from your current version to the latest, one step at a time.<br><br>Only the questions where you answer "Yes" will show migration steps.<br>Question answered with "No" represent risk-free upgrades.</p><br><br>
|
|
157
157
|
|
|
158
158
|
<div id="app"></div>
|
|
159
159
|
|
|
@@ -228,7 +228,7 @@ function renderHop(series, originalVersion) {
|
|
|
228
228
|
let body;
|
|
229
229
|
if (hop.actions.length === 0) {
|
|
230
230
|
body = `<div class="hop dropin">
|
|
231
|
-
|
|
231
|
+
You can upgrade directly to version ${targetRelease}. No changes needed.
|
|
232
232
|
</div>`;
|
|
233
233
|
} else {
|
|
234
234
|
body = `<div class="hop">
|
|
@@ -246,7 +246,7 @@ function renderHop(series, originalVersion) {
|
|
|
246
246
|
}
|
|
247
247
|
|
|
248
248
|
const nextLabel = hop.to === LATEST ? `Finish at ${targetRelease} →` : `Continue to ${targetRelease} →`;
|
|
249
|
-
const reminder = hop.actions.length === 0 ? `<p class="reminder">You can upgrade directly to
|
|
249
|
+
const reminder = hop.actions.length === 0 ? `<p class="reminder">You can upgrade directly to version ${targetRelease}. No changes needed.</p>` :
|
|
250
250
|
`<p class="reminder">If there are actions listed above, please ensure they are fixed before clicking "Continue".</p>`;
|
|
251
251
|
app.innerHTML = `
|
|
252
252
|
<p class="progress">Upgrading from ${series}.x → ${targetRelease}</p>
|
|
@@ -288,7 +288,7 @@ function renderHop(series, originalVersion) {
|
|
|
288
288
|
} else if (yesAnswers.length === 0) {
|
|
289
289
|
// All answered, all "No" — nothing applies, upgrade is direct.
|
|
290
290
|
reminderEl.style.display = "";
|
|
291
|
-
reminderEl.textContent =
|
|
291
|
+
reminderEl.textContent = `You can upgrade directly to version ${targetRelease}. No changes needed.`;
|
|
292
292
|
} else {
|
|
293
293
|
// All answered, at least one "Yes" — actions must be applied first.
|
|
294
294
|
reminderEl.style.display = "";
|
|
@@ -342,17 +342,21 @@ function renderDone(originalVersion) {
|
|
|
342
342
|
const fullVersion = LATEST_RELEASE || LATEST;
|
|
343
343
|
const seriesOnly = LATEST;
|
|
344
344
|
const summaryHTML = renderSummary();
|
|
345
|
+
const introHTML = decisions.length === 0
|
|
346
|
+
? `<p><strong>You can upgrade directly to version ${fullVersion}. No changes needed.</strong></p>
|
|
347
|
+
<p>You're already in the ${seriesOnly}.x series, so the upgrade is just a Gemfile bump.</p>`
|
|
348
|
+
: `<p>You've completed all the upgrade steps from ${escapeHTML(originalVersion || "your current version")} to ${fullVersion} (the latest version in the ${seriesOnly}.x series).</p>`;
|
|
345
349
|
|
|
346
350
|
app.innerHTML = `
|
|
347
351
|
<div class="done">
|
|
348
352
|
<h2>You're done</h2>
|
|
349
|
-
|
|
353
|
+
${introHTML}
|
|
350
354
|
${summaryHTML}
|
|
351
355
|
<p>Update your <code>Gemfile</code> to:</p>
|
|
352
356
|
<pre><code>gem 'smarter_csv', '~> ${seriesOnly}.0'</code></pre>
|
|
353
357
|
<p>Then run:</p>
|
|
354
358
|
<pre><code>bundle update smarter_csv</code></pre>
|
|
355
|
-
<p>After that, run your test suite. If anything behaves unexpectedly, click "Start over" and walk back through the
|
|
359
|
+
<p>After that, run your test suite. If anything behaves unexpectedly, click "Start over" and walk back through the steps to find the migration item you might have missed.</p>
|
|
356
360
|
<p class="muted">Questions? Open an issue at <a href="https://github.com/tilo/smarter_csv/issues">github.com/tilo/smarter_csv/issues</a>.</p>
|
|
357
361
|
<p><button id="restart">Start over</button></p>
|
|
358
362
|
</div>
|
|
@@ -366,17 +370,17 @@ function renderSummary() {
|
|
|
366
370
|
const matchedCount = decisions.reduce((n, d) => n + d.matched.length, 0);
|
|
367
371
|
const dropInCount = decisions.filter(d => d.dropIn).length;
|
|
368
372
|
const intro = matchedCount === 0
|
|
369
|
-
? `<p>Good news
|
|
370
|
-
: `<p>
|
|
373
|
+
? `<p>Good news — <strong>none of the conditions applied to your code</strong>. The upgrade is just a Gemfile bump.</p>`
|
|
374
|
+
: `<p>Apply the following <strong>${matchedCount} change${matchedCount === 1 ? "" : "s"}</strong> to your code, then run <code>bundle update</code>:</p>`;
|
|
371
375
|
|
|
372
376
|
const list = decisions.map(d => {
|
|
373
377
|
const targetRelease = latestReleaseFor(d.to);
|
|
374
378
|
const heading = `<p class="summary-hop-heading"><strong>${d.from}.x → ${targetRelease}</strong></p>`;
|
|
375
379
|
if (d.dropIn) {
|
|
376
|
-
return `<div class="summary-hop">${heading}<p class="muted">
|
|
380
|
+
return `<div class="summary-hop">${heading}<p class="muted">No code changes needed for this step.</p></div>`;
|
|
377
381
|
}
|
|
378
382
|
if (d.matched.length === 0) {
|
|
379
|
-
return `<div class="summary-hop">${heading}<p class="muted">None of the conditions
|
|
383
|
+
return `<div class="summary-hop">${heading}<p class="muted">None of the conditions in this step applied to your code.</p></div>`;
|
|
380
384
|
}
|
|
381
385
|
const items = d.matched.map(a => `<li><strong>If</strong> ${a["if"]}<br>→ ${a.then}</li>`).join("");
|
|
382
386
|
return `<div class="summary-hop">${heading}<ul>${items}</ul></div>`;
|
|
@@ -7,6 +7,14 @@
|
|
|
7
7
|
#include <stdlib.h>
|
|
8
8
|
#include <errno.h>
|
|
9
9
|
|
|
10
|
+
#ifdef __ARM_NEON
|
|
11
|
+
#include <arm_neon.h>
|
|
12
|
+
#elif defined(__SSE2__)
|
|
13
|
+
#include <immintrin.h>
|
|
14
|
+
#endif
|
|
15
|
+
|
|
16
|
+
#include "vendor/eisel_lemire.h" /* Eisel-Lemire decimal->double, correctly rounded (fast_float) */
|
|
17
|
+
|
|
10
18
|
#ifndef bool
|
|
11
19
|
#define bool int
|
|
12
20
|
#define false ((bool)0)
|
|
@@ -41,6 +49,8 @@ static ID id_only, id_except, id_quote_boundary;
|
|
|
41
49
|
static ID id_only_headers, id_except_headers, id_keep_cols, id_strict;
|
|
42
50
|
static ID id_keep_bitmap, id_keep_extra_cols, id_early_exit_after_sym;
|
|
43
51
|
static ID id_backslash, id_standard;
|
|
52
|
+
static ID id_decimal_precision, id_float, id_bigdecimal;
|
|
53
|
+
static ID id_BigDecimal; /* the Kernel#BigDecimal() method (require 'bigdecimal' done in Ruby) */
|
|
44
54
|
|
|
45
55
|
/* ================================================================================
|
|
46
56
|
* ParseContext — wraps all per-file parse options as a GC-managed TypedData object.
|
|
@@ -70,6 +80,9 @@ typedef struct {
|
|
|
70
80
|
/* Numeric conversion: 0=off, 1=all, 2=only listed keys, 3=except listed keys */
|
|
71
81
|
int numeric_mode;
|
|
72
82
|
|
|
83
|
+
/* Decimal handling: 0=float, 1=auto (BigDecimal above 16 sig digits), 2=bigdecimal */
|
|
84
|
+
int decimal_precision;
|
|
85
|
+
|
|
73
86
|
/* Column filter bitmap (xmalloc'd; NULL when no filtering active) */
|
|
74
87
|
bool *keep_bitmap;
|
|
75
88
|
long keep_bitmap_len;
|
|
@@ -133,6 +146,51 @@ static const rb_data_type_t parse_context_type = {
|
|
|
133
146
|
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
|
134
147
|
};
|
|
135
148
|
|
|
149
|
+
/* Scan [p, end) for the first `quote` char or backslash; returns a pointer to it,
|
|
150
|
+
* or `end` if neither occurs. NEON (arm64) or SSE2 (x86-64) processes 16 bytes per
|
|
151
|
+
* iteration; scalar fallback elsewhere. Ported from smarter_json's fj_scan_str.
|
|
152
|
+
*
|
|
153
|
+
* Used by the quoted-field slow path in :backslash escaping mode, where the only bytes
|
|
154
|
+
* that can change parser state inside a quoted field are the quote char (closing /
|
|
155
|
+
* doubled) and the backslash (escape). Bulk-skipping the plain content between them
|
|
156
|
+
* keeps the byte-by-byte state machine's behavior but avoids stepping every byte.
|
|
157
|
+
* In RFC mode the slow path uses a plain memchr-to-quote instead (only one byte class
|
|
158
|
+
* matters there), so this two-class scan is reserved for backslash mode. */
|
|
159
|
+
static inline const char *scan_quote_or_backslash(const char *p, const char *end, char quote) {
|
|
160
|
+
#ifdef __ARM_NEON
|
|
161
|
+
const uint8x16_t vq = vdupq_n_u8((uint8_t)quote);
|
|
162
|
+
const uint8x16_t vbs = vdupq_n_u8((uint8_t)'\\');
|
|
163
|
+
while (p + 16 <= end) {
|
|
164
|
+
uint8x16_t chunk = vld1q_u8((const uint8_t *)p);
|
|
165
|
+
uint8x16_t m = vorrq_u8(vceqq_u8(chunk, vq), vceqq_u8(chunk, vbs));
|
|
166
|
+
/* movemask emulation (Oj's technique): pack to 4 bits/byte, then ctz/4. */
|
|
167
|
+
uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(m), 4);
|
|
168
|
+
uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
|
|
169
|
+
if (__builtin_expect(mask != 0, 0)) { /* most 16-byte chunks contain neither */
|
|
170
|
+
mask &= 0x8888888888888888ull;
|
|
171
|
+
return p + (__builtin_ctzll(mask) >> 2);
|
|
172
|
+
}
|
|
173
|
+
p += 16;
|
|
174
|
+
}
|
|
175
|
+
#elif defined(__SSE2__)
|
|
176
|
+
const __m128i vq = _mm_set1_epi8(quote);
|
|
177
|
+
const __m128i vbs = _mm_set1_epi8('\\');
|
|
178
|
+
while (p + 16 <= end) {
|
|
179
|
+
__m128i chunk = _mm_loadu_si128((const __m128i *)p);
|
|
180
|
+
__m128i m = _mm_or_si128(_mm_cmpeq_epi8(chunk, vq), _mm_cmpeq_epi8(chunk, vbs));
|
|
181
|
+
int mask = _mm_movemask_epi8(m); /* one bit per byte that matched */
|
|
182
|
+
if (__builtin_expect(mask != 0, 0)) { /* most 16-byte chunks contain neither */
|
|
183
|
+
return p + __builtin_ctz(mask);
|
|
184
|
+
}
|
|
185
|
+
p += 16;
|
|
186
|
+
}
|
|
187
|
+
#endif
|
|
188
|
+
for (; p < end; p++) {
|
|
189
|
+
if (*p == quote || *p == '\\') return p;
|
|
190
|
+
}
|
|
191
|
+
return end;
|
|
192
|
+
}
|
|
193
|
+
|
|
136
194
|
static VALUE unescape_quotes(char *str, long len, char quote_char, rb_encoding *encoding) {
|
|
137
195
|
// Fast path: scan for any doubled quote pair. If none present, the field has
|
|
138
196
|
// nothing to unescape — emit it directly via rb_enc_str_new and skip the
|
|
@@ -386,6 +444,20 @@ static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quot
|
|
|
386
444
|
backslash_count = 0;
|
|
387
445
|
field_started = false; // reset for next field
|
|
388
446
|
} else {
|
|
447
|
+
/* Backslash mode: NEON scan-ahead to the next quote OR backslash (Opt #7).
|
|
448
|
+
* Inside a quoted field the only state-changing bytes are the quote char and the
|
|
449
|
+
* backslash; bulk-skip the plain content between them. Skipped bytes are plain
|
|
450
|
+
* content, which the byte-by-byte loop resets backslash_count to 0 on, so reset
|
|
451
|
+
* it here whenever we actually move p. */
|
|
452
|
+
if (allow_escaped_quotes && in_quotes) {
|
|
453
|
+
const char *hit = scan_quote_or_backslash(p, endP, quote_char_val);
|
|
454
|
+
if (hit != p) {
|
|
455
|
+
backslash_count = 0;
|
|
456
|
+
p = (char *)hit;
|
|
457
|
+
if (p == endP) continue; /* no quote/backslash before end → unclosed */
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
|
|
389
461
|
if (allow_escaped_quotes && *p == '\\') {
|
|
390
462
|
backslash_count++;
|
|
391
463
|
if (__builtin_expect(quote_boundary_standard, 1) && !in_quotes) field_started = true;
|
|
@@ -525,47 +597,101 @@ static inline VALUE get_key_for_index(long index, VALUE headers, long headers_le
|
|
|
525
597
|
* Handles overflow: if strtol overflows (ERANGE), falls back to rb_cstr_to_inum
|
|
526
598
|
* which produces a Ruby Bignum.
|
|
527
599
|
*/
|
|
528
|
-
static inline VALUE try_numeric_conversion(char *
|
|
529
|
-
// Quick pre-check: first char must be digit
|
|
530
|
-
char first =
|
|
531
|
-
if (!((first >= '0' && first <= '9') || first == '+' || first == '-'
|
|
600
|
+
static inline VALUE try_numeric_conversion(char *s, long n, int decimal_precision) {
|
|
601
|
+
// Quick pre-check: first char must be a digit or a sign.
|
|
602
|
+
char first = s[0];
|
|
603
|
+
if (!((first >= '0' && first <= '9') || first == '+' || first == '-')) {
|
|
532
604
|
return Qundef;
|
|
533
605
|
}
|
|
534
606
|
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
if (
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
607
|
+
/* Single pass: validate the token against the same grammar as the Ruby path's
|
|
608
|
+
* NUMERIC_REGEX = /\A[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?\z/ and, in the same pass,
|
|
609
|
+
* extract everything the fast paths need:
|
|
610
|
+
* - mantissa value m10 (exact for <= 18 digits; `overflow` flags beyond)
|
|
611
|
+
* - significant-digit count `sig` (leading zeros excluded; matches the Ruby
|
|
612
|
+
* significant_digits helper / Oj dec_cnt) — drives the :auto Float/BigDecimal split
|
|
613
|
+
* - base-10 exponent e10 (from the fraction length and any explicit exponent)
|
|
614
|
+
* Anything the grammar rejects returns Qundef (stays a String), keeping the C and
|
|
615
|
+
* Ruby paths byte-identical on what does and does not convert. */
|
|
616
|
+
long i = 0;
|
|
617
|
+
int neg = 0;
|
|
618
|
+
if (s[i] == '+' || s[i] == '-') { neg = (s[i] == '-'); i++; }
|
|
619
|
+
|
|
620
|
+
uint64_t m10 = 0;
|
|
621
|
+
int m10digits = 0; /* mantissa digits accumulated into m10 (capped at 19) */
|
|
622
|
+
long sig = 0; /* significant digits (leading zeros excluded) */
|
|
623
|
+
int sig_started = 0;
|
|
624
|
+
bool overflow = false;
|
|
625
|
+
long int_digits = 0, frac_digits = 0;
|
|
626
|
+
bool seen_dot = false, seen_exp = false, any_digit = false, exp_any = false;
|
|
627
|
+
int64_t exp_val = 0; int exp_neg = 0;
|
|
628
|
+
|
|
629
|
+
for (; i < n; i++) {
|
|
630
|
+
char c = s[i];
|
|
631
|
+
if (c >= '0' && c <= '9') {
|
|
632
|
+
any_digit = true;
|
|
633
|
+
if (!seen_exp) {
|
|
634
|
+
if (seen_dot) frac_digits++; else int_digits++;
|
|
635
|
+
if (sig_started) sig++;
|
|
636
|
+
else if (c != '0') { sig_started = 1; sig = 1; }
|
|
637
|
+
if (m10digits < 19) { m10 = m10 * 10 + (uint64_t)(c - '0'); m10digits++; }
|
|
638
|
+
else overflow = true;
|
|
639
|
+
} else {
|
|
640
|
+
exp_any = true;
|
|
641
|
+
exp_val = exp_val * 10 + (c - '0');
|
|
642
|
+
if (exp_val > 1000000) overflow = true; /* extreme exponent → strtod fallback */
|
|
554
643
|
}
|
|
555
|
-
|
|
644
|
+
} else if (c == '.' && !seen_dot && !seen_exp) {
|
|
645
|
+
seen_dot = true;
|
|
646
|
+
} else if ((c == 'e' || c == 'E') && !seen_exp && any_digit) {
|
|
647
|
+
seen_exp = true;
|
|
648
|
+
if (i + 1 < n && (s[i + 1] == '+' || s[i + 1] == '-')) { exp_neg = (s[i + 1] == '-'); i++; }
|
|
649
|
+
} else {
|
|
650
|
+
return Qundef; /* invalid char for a number → not numeric */
|
|
556
651
|
}
|
|
557
652
|
}
|
|
558
653
|
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
654
|
+
/* Enforce NUMERIC_REGEX exactly: an integer part is required; a dot requires a
|
|
655
|
+
* fraction digit; an exponent requires an exponent digit. */
|
|
656
|
+
if (int_digits == 0) return Qundef;
|
|
657
|
+
if (seen_dot && frac_digits == 0) return Qundef;
|
|
658
|
+
if (seen_exp && !exp_any) return Qundef;
|
|
659
|
+
|
|
660
|
+
bool is_decimal = seen_dot || seen_exp;
|
|
661
|
+
|
|
662
|
+
if (!is_decimal) {
|
|
663
|
+
/* Integer. Fast path when it fits in a long; otherwise a Ruby Integer/Bignum. */
|
|
664
|
+
if (!overflow && m10digits <= 18) {
|
|
665
|
+
long v = (long)m10;
|
|
666
|
+
return LONG2NUM(neg ? -v : v);
|
|
565
667
|
}
|
|
668
|
+
VALUE str = rb_str_new(s, n);
|
|
669
|
+
return rb_cstr_to_inum(RSTRING_PTR(str), 10, false);
|
|
566
670
|
}
|
|
567
671
|
|
|
568
|
-
|
|
672
|
+
/* Decimal (has a '.' or an exponent) — honor decimal_precision. 0=float, 1=auto, 2=bigdecimal */
|
|
673
|
+
if (decimal_precision == 2 || (decimal_precision == 1 && sig > 16)) {
|
|
674
|
+
VALUE str = rb_str_new(s, n);
|
|
675
|
+
return rb_funcall(rb_cObject, id_BigDecimal, 1, str);
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
/* Float. base-10 exponent = explicit exponent minus the fraction length. */
|
|
679
|
+
int64_t e10 = (exp_neg ? -exp_val : exp_val) - (int64_t)frac_digits;
|
|
680
|
+
double d;
|
|
681
|
+
if (!overflow && m10digits >= 1 && m10digits <= 19 && ((long)m10digits + e10) >= -307) {
|
|
682
|
+
/* Eisel-Lemire is correctly-rounded for any nonzero mantissa that fits exactly in a
|
|
683
|
+
* uint64 — i.e. up to 19 significant digits (the max 19-digit value ~1.0e19 is below
|
|
684
|
+
* UINT64_MAX ~1.8e19). Verified bit-for-bit vs the stdlib over 1..19-digit ties. */
|
|
685
|
+
d = (m10 == 0) ? (neg ? -0.0 : 0.0) : fj_eisel_lemire_s2d(e10, m10, neg);
|
|
686
|
+
} else {
|
|
687
|
+
/* >19 digits / extreme or subnormal exponent: fall back to Ruby's own correctly-rounded
|
|
688
|
+
* strtod (rb_cstr_to_dbl) — the exact conversion String#to_f uses — so the C path and the
|
|
689
|
+
* Ruby path produce the identical double on every platform, not just where the system
|
|
690
|
+
* strtod happens to be correctly rounded. The token is pre-validated, so badcheck=0. */
|
|
691
|
+
VALUE str = rb_str_new(s, n);
|
|
692
|
+
d = rb_cstr_to_dbl(RSTRING_PTR(str), 0);
|
|
693
|
+
}
|
|
694
|
+
return DBL2NUM(d);
|
|
569
695
|
}
|
|
570
696
|
|
|
571
697
|
/*
|
|
@@ -614,6 +740,7 @@ typedef struct {
|
|
|
614
740
|
long headers_len;
|
|
615
741
|
long hash_capa; // Pre-computed capacity for lazy hash allocation
|
|
616
742
|
int numeric_mode; // 0=off, 1=all, 2=only, 3=except
|
|
743
|
+
int decimal_precision; // 0=float, 1=auto (BigDecimal above 16 sig digits), 2=bigdecimal
|
|
617
744
|
bool remove_empty_values;
|
|
618
745
|
bool remove_zero_values;
|
|
619
746
|
} field_transform_opts;
|
|
@@ -705,7 +832,7 @@ static inline __attribute__((always_inline)) bool insert_field_into_hash(
|
|
|
705
832
|
(opts->numeric_mode == 2 && rb_ary_includes(opts->numeric_keys, key) == Qtrue) ||
|
|
706
833
|
(opts->numeric_mode == 3 && rb_ary_includes(opts->numeric_keys, key) != Qtrue);
|
|
707
834
|
if (do_convert) {
|
|
708
|
-
VALUE numeric = try_numeric_conversion(trim_start, trimmed_len);
|
|
835
|
+
VALUE numeric = try_numeric_conversion(trim_start, trimmed_len, opts->decimal_precision);
|
|
709
836
|
if (numeric != Qundef) {
|
|
710
837
|
ensure_hash_allocated(opts);
|
|
711
838
|
rb_hash_aset(opts->hash, key, numeric);
|
|
@@ -752,6 +879,18 @@ void parse_numeric_option(VALUE options_hash, int *out_mode, VALUE *out_keys) {
|
|
|
752
879
|
}
|
|
753
880
|
}
|
|
754
881
|
|
|
882
|
+
/* Read decimal_precision into 0=float, 1=auto, 2=bigdecimal. Default :auto (1).
|
|
883
|
+
* The option is validated and coerced to a symbol on the Ruby side before we get here. */
|
|
884
|
+
static inline int parse_decimal_precision(VALUE options_hash) {
|
|
885
|
+
VALUE v = rb_hash_aref(options_hash, ID2SYM(id_decimal_precision));
|
|
886
|
+
if (RB_TYPE_P(v, T_SYMBOL)) {
|
|
887
|
+
ID s = SYM2ID(v);
|
|
888
|
+
if (s == id_float) return 0;
|
|
889
|
+
if (s == id_bigdecimal) return 2;
|
|
890
|
+
}
|
|
891
|
+
return 1; // :auto (also the default when unset)
|
|
892
|
+
}
|
|
893
|
+
|
|
755
894
|
/*
|
|
756
895
|
* ================================================================================
|
|
757
896
|
* rb_parse_line_to_hash - Parse CSV line directly into a Ruby Hash
|
|
@@ -826,6 +965,7 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash(VALUE self, VALUE line,
|
|
|
826
965
|
int numeric_mode = 0;
|
|
827
966
|
VALUE numeric_keys = Qnil;
|
|
828
967
|
parse_numeric_option(options_hash, &numeric_mode, &numeric_keys);
|
|
968
|
+
int decimal_precision = parse_decimal_precision(options_hash);
|
|
829
969
|
|
|
830
970
|
// quote_escaping and quote_boundary are only needed in Section 5 (quoted/slow path).
|
|
831
971
|
// They are declared here as forward declarations so Section 5 can set them lazily.
|
|
@@ -990,6 +1130,7 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash(VALUE self, VALUE line,
|
|
|
990
1130
|
.headers_len = headers_len,
|
|
991
1131
|
.hash_capa = hash_size,
|
|
992
1132
|
.numeric_mode = numeric_mode,
|
|
1133
|
+
.decimal_precision = decimal_precision,
|
|
993
1134
|
.remove_empty_values = remove_empty_values,
|
|
994
1135
|
.remove_zero_values = remove_zero_values,
|
|
995
1136
|
};
|
|
@@ -1160,6 +1301,20 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash(VALUE self, VALUE line,
|
|
|
1160
1301
|
p = next_quote; /* jump to quote char; fall through to quote-handling code */
|
|
1161
1302
|
}
|
|
1162
1303
|
|
|
1304
|
+
/* Backslash mode: NEON scan-ahead to the next quote OR backslash (Opt #7).
|
|
1305
|
+
* The RFC memchr skip above only matters for one byte class; with escaping on
|
|
1306
|
+
* a backslash also changes state, so scan for both. Skipped bytes are plain
|
|
1307
|
+
* content (the byte-by-byte loop resets backslash_count to 0 on them), so reset
|
|
1308
|
+
* it here whenever we actually move p. */
|
|
1309
|
+
if (allow_escaped_quotes && in_quotes) {
|
|
1310
|
+
const char *hit = scan_quote_or_backslash(p, endP, quote_char_val);
|
|
1311
|
+
if (hit != p) {
|
|
1312
|
+
backslash_count = 0;
|
|
1313
|
+
p = (char *)hit;
|
|
1314
|
+
if (p == endP) continue; /* no quote/backslash before end → unclosed */
|
|
1315
|
+
}
|
|
1316
|
+
}
|
|
1317
|
+
|
|
1163
1318
|
if (allow_escaped_quotes && *p == '\\') {
|
|
1164
1319
|
// Count consecutive backslashes for escape sequence detection
|
|
1165
1320
|
backslash_count++;
|
|
@@ -1354,6 +1509,7 @@ __attribute__((cold)) static VALUE rb_new_parse_context(VALUE self, VALUE header
|
|
|
1354
1509
|
|
|
1355
1510
|
/* Numeric conversion */
|
|
1356
1511
|
parse_numeric_option(options_hash, &ctx->numeric_mode, &ctx->numeric_keys);
|
|
1512
|
+
ctx->decimal_precision = parse_decimal_precision(options_hash);
|
|
1357
1513
|
|
|
1358
1514
|
/* quote_escaping → allow_escaped_quotes */
|
|
1359
1515
|
VALUE quote_escaping_val = rb_hash_aref(options_hash, ID2SYM(id_quote_escaping));
|
|
@@ -1474,6 +1630,7 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
|
|
|
1474
1630
|
bool remove_empty_values = ctx->remove_empty_values;
|
|
1475
1631
|
bool remove_zero_values = ctx->remove_zero_values;
|
|
1476
1632
|
int numeric_mode = ctx->numeric_mode;
|
|
1633
|
+
int decimal_precision = ctx->decimal_precision;
|
|
1477
1634
|
VALUE numeric_keys = ctx->numeric_keys;
|
|
1478
1635
|
bool *keep_bitmap = ctx->keep_bitmap;
|
|
1479
1636
|
/* keep_bitmap is cached in the context (xmalloc'd once at construction, sized to the header count
|
|
@@ -1525,6 +1682,7 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
|
|
|
1525
1682
|
.headers_len = headers_len,
|
|
1526
1683
|
.hash_capa = hash_size,
|
|
1527
1684
|
.numeric_mode = numeric_mode,
|
|
1685
|
+
.decimal_precision = decimal_precision,
|
|
1528
1686
|
.remove_empty_values = remove_empty_values,
|
|
1529
1687
|
.remove_zero_values = remove_zero_values,
|
|
1530
1688
|
};
|
|
@@ -1654,6 +1812,16 @@ __attribute__((hot)) static VALUE rb_parse_line_to_hash_ctx(VALUE self, VALUE li
|
|
|
1654
1812
|
p = next_quote; /* fall through to quote-handling code */
|
|
1655
1813
|
}
|
|
1656
1814
|
|
|
1815
|
+
/* Backslash mode: NEON scan-ahead to the next quote OR backslash (Opt #7). */
|
|
1816
|
+
if (allow_escaped_quotes && in_quotes) {
|
|
1817
|
+
const char *hit = scan_quote_or_backslash(p, endP, quote_char_val);
|
|
1818
|
+
if (hit != p) {
|
|
1819
|
+
backslash_count = 0;
|
|
1820
|
+
p = (char *)hit;
|
|
1821
|
+
if (p == endP) continue; /* no quote/backslash before end → unclosed */
|
|
1822
|
+
}
|
|
1823
|
+
}
|
|
1824
|
+
|
|
1657
1825
|
if (allow_escaped_quotes && *p == '\\') {
|
|
1658
1826
|
backslash_count++;
|
|
1659
1827
|
if (__builtin_expect(quote_boundary_standard, 1) && !in_quotes) field_started = true;
|
|
@@ -1878,6 +2046,10 @@ void Init_smarter_csv(void) {
|
|
|
1878
2046
|
id_strict = rb_intern("strict");
|
|
1879
2047
|
id_backslash = rb_intern("backslash");
|
|
1880
2048
|
id_standard = rb_intern("standard");
|
|
2049
|
+
id_decimal_precision = rb_intern("decimal_precision");
|
|
2050
|
+
id_float = rb_intern("float");
|
|
2051
|
+
id_bigdecimal = rb_intern("bigdecimal");
|
|
2052
|
+
id_BigDecimal = rb_intern("BigDecimal"); /* Kernel#BigDecimal(); 'bigdecimal' is required in lib/smarter_csv.rb */
|
|
1881
2053
|
|
|
1882
2054
|
rb_define_module_function(Parser, "parse_csv_line_c", rb_parse_csv_line, 9);
|
|
1883
2055
|
rb_define_module_function(Parser, "count_quote_chars_c", rb_count_quote_chars, 4);
|