smarter_csv 1.17.3 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,27 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2021 The fast_float authors
4
+
5
+ Permission is hereby granted, free of charge, to any
6
+ person obtaining a copy of this software and associated
7
+ documentation files (the "Software"), to deal in the
8
+ Software without restriction, including without
9
+ limitation the rights to use, copy, modify, merge,
10
+ publish, distribute, sublicense, and/or sell copies of
11
+ the Software, and to permit persons to whom the Software
12
+ is furnished to do so, subject to the following
13
+ conditions:
14
+
15
+ The above copyright notice and this permission notice
16
+ shall be included in all copies or substantial portions
17
+ of the Software.
18
+
19
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
20
+ ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
21
+ TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
22
+ PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
23
+ SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
26
+ IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27
+ DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,117 @@
1
+ /* Eisel-Lemire decimal->double, ported from fast_float:
2
+ * include/fast_float/decimal_to_binary.h, the compute_float<binary_format<double>>
3
+ * + compute_product_approximation routines.
4
+ *
5
+ * Algorithm authors: Michael Eisel (original approach) and Daniel Lemire
6
+ * (formalization, proof, and the fast_float implementation) — hence "Eisel-Lemire".
7
+ *
8
+ * Copyright (c) 2021 The fast_float authors. Tri-licensed Apache-2.0 / MIT / BSL-1.0;
9
+ * used here under MIT — see LICENSE-fast_float-MIT in this directory.
10
+ *
11
+ * This is the "without fallback" variant (Noble Mushtak & Daniel Lemire, "Fast
12
+ * Number Parsing Without Fallback"): for ANY nonzero mantissa w that fits exactly
13
+ * in a uint64 (i.e. <= 19 significant digits, not truncated) and decimal exponent
14
+ * q, it returns the correctly-rounded binary64 with no slow-path needed.
15
+ *
16
+ * smarter_json uses it as THE decimal->double path for mantissas up to 18 digits
17
+ * (everything wider / overflowed / with an extreme exponent goes to the strtod
18
+ * round-to-odd fallback). It is correctly-rounded across that whole range, with no
19
+ * round-to-even tie loss, and is fast on the common short-mantissa case.
20
+ * Verified bit-for-bit vs JSON.parse. See eisel_lemire.md for provenance. */
21
+ #ifndef FJ_EISEL_LEMIRE_H
22
+ #define FJ_EISEL_LEMIRE_H
23
+
24
+ #include <stdint.h>
25
+ #include <string.h>
26
+ #include "eisel_lemire_powers.h"
27
+
28
+ /* binary_format<double> constants from fast_float. */
29
+ #define FJ_EL_MANTISSA_BITS 52
30
+ #define FJ_EL_MIN_EXPONENT (-1023)
31
+ #define FJ_EL_INFINITE_POWER 0x7FF
32
+ #define FJ_EL_SMALLEST_POW10 (-342)
33
+ #define FJ_EL_LARGEST_POW10 308
34
+ #define FJ_EL_MIN_RTE (-4) /* min_exponent_round_to_even */
35
+ #define FJ_EL_MAX_RTE 23 /* max_exponent_round_to_even */
36
+
37
+ /* (((152170 + 65536) * q) >> 16) + 63 == floor(log2(10^q)) + q + 63, see paper. */
38
+ static inline int32_t fj_el_power(int32_t q) {
39
+ return (((152170 + 65536) * q) >> 16) + 63;
40
+ }
41
+
42
+ static inline void fj_el_mul128(uint64_t a, uint64_t b, uint64_t *hi, uint64_t *lo) {
43
+ #if defined(__SIZEOF_INT128__)
44
+ __uint128_t p = (__uint128_t)a * (__uint128_t)b;
45
+ *lo = (uint64_t)p;
46
+ *hi = (uint64_t)(p >> 64);
47
+ #else
48
+ uint64_t a0 = (uint32_t)a, a1 = a >> 32, b0 = (uint32_t)b, b1 = b >> 32;
49
+ uint64_t p00 = a0 * b0, p01 = a0 * b1, p10 = a1 * b0, p11 = a1 * b1;
50
+ uint64_t mid = p10 + (p00 >> 32) + (uint32_t)p01;
51
+ *hi = p11 + (mid >> 32) + (p01 >> 32);
52
+ *lo = (mid << 32) | (uint32_t)p00;
53
+ #endif
54
+ }
55
+
56
+ static inline double fj_el_bits2double(uint64_t bits) {
57
+ double d;
58
+ memcpy(&d, &bits, sizeof(d));
59
+ return d;
60
+ }
61
+
62
+ /* q = power of ten, w = mantissa (NONZERO, exact, fits in uint64). neg = sign. */
63
+ static inline double fj_eisel_lemire_s2d(int64_t q, uint64_t w, int neg) {
64
+ const uint64_t sign = (uint64_t)(neg != 0) << 63;
65
+ uint64_t mantissa, prod_hi, prod_lo, sp_hi, sp_lo;
66
+ int32_t power2;
67
+ int lz, upperbit, shift, index;
68
+
69
+ if (q < FJ_EL_SMALLEST_POW10) return fj_el_bits2double(sign); /* underflow -> 0 */
70
+ if (q > FJ_EL_LARGEST_POW10)
71
+ return fj_el_bits2double(sign | ((uint64_t)FJ_EL_INFINITE_POWER << FJ_EL_MANTISSA_BITS));
72
+
73
+ lz = __builtin_clzll(w);
74
+ w <<= lz;
75
+
76
+ /* compute_product_approximation<mantissa_bits + 3 = 55>: precision_mask = 0x1FF. */
77
+ index = 2 * (int)(q - FJ_EL_SMALLEST_POWER_OF_FIVE);
78
+ fj_el_mul128(w, fj_power_of_five_128[index], &prod_hi, &prod_lo);
79
+ if ((prod_hi & 0x1FF) == 0x1FF) {
80
+ fj_el_mul128(w, fj_power_of_five_128[index + 1], &sp_hi, &sp_lo);
81
+ prod_lo += sp_hi;
82
+ if (sp_hi > prod_lo) prod_hi++;
83
+ }
84
+
85
+ upperbit = (int)(prod_hi >> 63);
86
+ shift = upperbit + 64 - FJ_EL_MANTISSA_BITS - 3; /* upperbit + 9 */
87
+ mantissa = prod_hi >> shift;
88
+ power2 = (int32_t)(fj_el_power((int32_t)q) + upperbit - lz - FJ_EL_MIN_EXPONENT);
89
+
90
+ if (power2 <= 0) { /* subnormal */
91
+ if (-power2 + 1 >= 64) return fj_el_bits2double(sign); /* far below min -> 0 */
92
+ mantissa >>= (-power2 + 1);
93
+ mantissa += (mantissa & 1);
94
+ mantissa >>= 1;
95
+ power2 = (mantissa < ((uint64_t)1 << FJ_EL_MANTISSA_BITS)) ? 0 : 1;
96
+ return fj_el_bits2double(sign | ((uint64_t)power2 << FJ_EL_MANTISSA_BITS) | mantissa);
97
+ }
98
+
99
+ /* round-to-even: if we land exactly between two doubles, round down. */
100
+ if ((prod_lo <= 1) && (q >= FJ_EL_MIN_RTE) && (q <= FJ_EL_MAX_RTE) &&
101
+ ((mantissa & 3) == 1) && ((mantissa << shift) == prod_hi)) {
102
+ mantissa &= ~(uint64_t)1;
103
+ }
104
+
105
+ mantissa += (mantissa & 1);
106
+ mantissa >>= 1;
107
+ if (mantissa >= ((uint64_t)2 << FJ_EL_MANTISSA_BITS)) {
108
+ mantissa = (uint64_t)1 << FJ_EL_MANTISSA_BITS;
109
+ power2++;
110
+ }
111
+ mantissa &= ~((uint64_t)1 << FJ_EL_MANTISSA_BITS); /* drop implicit bit */
112
+ if (power2 >= FJ_EL_INFINITE_POWER)
113
+ return fj_el_bits2double(sign | ((uint64_t)FJ_EL_INFINITE_POWER << FJ_EL_MANTISSA_BITS));
114
+ return fj_el_bits2double(sign | ((uint64_t)power2 << FJ_EL_MANTISSA_BITS) | mantissa);
115
+ }
116
+
117
+ #endif
@@ -0,0 +1,29 @@
1
+ # Eisel-Lemire decimal→double, from fast_float
2
+
3
+ - The algorithm is **Eisel-Lemire**, named for **Michael Eisel** (who proposed/motivated the original approach) and **Daniel Lemire** (who formalized it, proved its bounds, and wrote the fast_float implementation). We use the later "without fallback" form proven by **Noble Mushtak & Daniel Lemire, _Fast Number Parsing Without Fallback_**. It converts a decimal mantissa+exponent to a correctly-rounded binary64 with no slow path, for any nonzero mantissa that fits exactly in a `uint64` (≤ 19 significant digits).
4
+ - Vendored from **fastfloat/fast_float** — https://github.com/fastfloat/fast_float — license Apache-2.0 / MIT / Boost-1.0 (your choice).
5
+
6
+ ## What smarter_json uses it for
7
+
8
+ `try_numeric_conversion` (in `smarter_csv.c`) routes decimal tokens with `m10digits ≤ 19 → fj_eisel_lemire_s2d`, and `> 19 / overflow / extreme exponent → strtod`. Eisel-Lemire is correctly-rounded across the whole ≤19-digit range — every mantissa that fits exactly in a uint64, no round-to-even tie loss — **and** fast on the common short-mantissa case. (smarter_json uses it the same way via `fj_float_from_parts`.)
9
+
10
+ ## These two files are DERIVED, not verbatim copies
11
+
12
+ - **`eisel_lemire_powers.h`** — the `power_of_five_128` table (1302 × `uint64`), extracted **verbatim** (the constants are byte-for-byte) from fast_float `include/fast_float/fast_table.h`, but **rewrapped**: a plain C `static const uint64_t fj_power_of_five_128[...]` array instead of fast_float's C++ `powers_template` struct. `FJ_EL_SMALLEST_POWER_OF_FIVE` / `FJ_EL_LARGEST_POWER_OF_FIVE` are the `-342` / `308` bounds.
13
+ - **`eisel_lemire.h`** — a C **port** of `compute_float<binary_format<double>>` + `compute_product_approximation` from fast_float `include/fast_float/decimal_to_binary.h`. Adapted to: (a) plain C (no templates), (b) take our already-extracted `(q, w)` = `(e10, m10)` instead of re-parsing a string, (c) the binary64 constants inlined as `FJ_EL_*` macros, (d) a portable `fj_el_mul128` (`__uint128_t` when available, else a 32×32 split). The control flow — `compute_product_approximation<55>`, the `0x1FF` precision mask, `upperbit`/`shift`, the subnormal branch, and the round-to-even "land exactly between two doubles → round down" check — mirrors the source.
14
+
15
+ Because they're derived (rewrapped table, ported algorithm), they're named after the **algorithm** (Eisel-Lemire) rather than their upstream source (`fast_float`). The verification that they faithfully reproduce upstream is the bit-for-bit stress vs `JSON.parse` (8–10M random numbers incl. ties / subnormals / near-overflow, 0 mismatches).
16
+
17
+ ## To refresh from upstream
18
+
19
+ Re-pull the two source files and re-derive:
20
+
21
+ - table: `curl -L https://raw.githubusercontent.com/fastfloat/fast_float/main/include/fast_float/fast_table.h` — copy the `power_of_five_128[...] = { ... };` body into `eisel_lemire_powers.h`'s array (constants only).
22
+ - algorithm: `curl -L https://raw.githubusercontent.com/fastfloat/fast_float/main/include/fast_float/decimal_to_binary.h` — re-check `compute_float` / `compute_product_approximation` against the port in `eisel_lemire.h`.
23
+
24
+ Then re-run the bit-exact stress (≥ several million random 1–19-digit numbers, with forced round-to-even ties and exponents spanning subnormal → overflow) vs `JSON.parse` before trusting it.
25
+
26
+ - origin: Eisel-Lemire (`fastfloat/fast_float`)
27
+ - vendored from: fast_float (upstream `main`)
28
+ - copyright: (c) 2021 The fast_float authors
29
+ - license: tri-licensed Apache-2.0 / MIT / BSL-1.0; vendored here under **MIT**, full text in `LICENSE-fast_float-MIT` (this directory)