fast_float_lemire 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/README.md +15 -4
- data/ext/fast_float_lemire/fast_float_lemire.c +243 -17
- data/lib/fast_float_lemire/fast_float_lemire.so +0 -0
- data/lib/fast_float_lemire/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 326b271e3b4184bac12adf01ef09bdbad83426afff6e47a0bf455d46effef3cb
|
|
4
|
+
data.tar.gz: 17fc3cab59ff9dc65fa78ad89152c1d9efcb239d2651e6c7f52f0e4fe813b4e2
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f848b55888e57fc1d66fe2f7869f00acb43f69ce6254a724446914ad4a16a6c1020ee5bc559b09226fa5ba47c4ae320960aafcb9900f9e825ae784ed7c983329
|
|
7
|
+
data.tar.gz: 482f232c512901bfa03d97cca1d369c139c1c3d44b2957f9966f67242a58269aa8a91ec6b1d27908b434a5bb20cdd687e3f20aaf3521b396e87a1f7326a154c3
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# FastFloatLemire Changelog
|
|
2
|
+
|
|
3
|
+
## 0.2.0 (2025-12-19)
|
|
4
|
+
- **[Feature]** Add ultra-fast path for small integers (0-999).
|
|
5
|
+
- **[Feature]** Add ultra-fast path for simple decimals (X.Y, X.YZ patterns).
|
|
6
|
+
- **[Feature]** Add exact power-of-10 fast path using precomputed 10^0 to 10^22.
|
|
7
|
+
- [Enhancement] Remove strlen() overhead by parsing until null terminator.
|
|
8
|
+
- [Enhancement] Eliminate duplicate whitespace skipping.
|
|
9
|
+
- [Enhancement] Performance improvements: simple decimals ~7% faster than Ruby, prices ~3% faster, complex numbers ~2.8x faster.
|
|
10
|
+
|
|
11
|
+
## 0.1.1 (2025-12-18)
|
|
12
|
+
- [Enhancement] Use VERSION constant in gemspec instead of hardcoded value.
|
|
13
|
+
- [Change] Require Ruby >= 3.2.0.
|
|
14
|
+
- [Maintenance] Configure trusted publishing for RubyGems releases.
|
|
15
|
+
|
|
16
|
+
## 0.1.0 (2025-12-18)
|
|
17
|
+
- **[Feature]** Initial release with Eisel-Lemire algorithm implementation.
|
|
18
|
+
- [Feature] `FastFloatLemire.parse` method for single string-to-float conversion.
|
|
19
|
+
- [Feature] `FastFloatLemire.parse_array` method for bulk parsing.
|
|
20
|
+
- [Feature] Support for special values: infinity, NaN, scientific notation.
|
|
21
|
+
- [Feature] 128-bit multiplication with precomputed powers of 5 for high precision.
|
data/README.md
CHANGED
|
@@ -7,14 +7,25 @@ Eisel-Lemire algorithm for string-to-float conversion in Ruby.
|
|
|
7
7
|
|
|
8
8
|
## About
|
|
9
9
|
|
|
10
|
-
This
|
|
10
|
+
This gem implements the Eisel-Lemire algorithm with additional fast paths for common number formats.
|
|
11
11
|
|
|
12
12
|
| Number Type | vs String#to_f |
|
|
13
13
|
|-------------|----------------|
|
|
14
|
-
| Simple (`"1.5"`, `"
|
|
15
|
-
|
|
|
14
|
+
| Simple decimals (`"1.5"`, `"3.14"`) | **~7% faster** |
|
|
15
|
+
| Prices (`"9.99"`, `"19.95"`) | **~3% faster** |
|
|
16
|
+
| Scientific (`"1e5"`) | ~6% slower |
|
|
17
|
+
| Complex (`"3.141592653589793"`) | **~2.8x faster** |
|
|
16
18
|
|
|
17
|
-
|
|
19
|
+
### Optimizations
|
|
20
|
+
|
|
21
|
+
The implementation includes several fast paths that bypass the full Eisel-Lemire algorithm:
|
|
22
|
+
|
|
23
|
+
1. **Small integer fast path** - handles `"5"`, `"42"`, `"-123"` (up to 3 digits)
|
|
24
|
+
2. **Simple decimal fast path** - handles `"1.5"`, `"9.99"`, `"199.95"` (up to 3+3 digits)
|
|
25
|
+
3. **Exact power-of-10 fast path** - uses precomputed exact powers of 10 (10^0 to 10^22)
|
|
26
|
+
4. **Removed overhead** - no `strlen()`, single whitespace skip
|
|
27
|
+
|
|
28
|
+
These optimizations are based on insights from [Nigel Tao's Eisel-Lemire blog post](https://nigeltao.github.io/blog/2020/eisel-lemire.html).
|
|
18
29
|
|
|
19
30
|
## Installation
|
|
20
31
|
|
|
@@ -33,6 +33,22 @@
|
|
|
33
33
|
#define DOUBLE_MANTISSA_BITS 52
|
|
34
34
|
#define DOUBLE_EXPONENT_BIAS 1023
|
|
35
35
|
|
|
36
|
+
/* Maximum mantissa value that fits exactly in a double (2^53) */
|
|
37
|
+
#define MAX_EXACT_MANTISSA 9007199254740992ULL
|
|
38
|
+
|
|
39
|
+
/* Maximum exponent for exact power-of-10 representation in double */
|
|
40
|
+
#define MAX_EXACT_POW10 22
|
|
41
|
+
|
|
42
|
+
/*
|
|
43
|
+
* Exact powers of 10 for fast path.
|
|
44
|
+
* 10^0 through 10^22 are exactly representable in IEEE 754 double.
|
|
45
|
+
*/
|
|
46
|
+
static const double EXACT_POWERS_OF_10[] = {
|
|
47
|
+
1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
|
|
48
|
+
1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
|
|
49
|
+
1e20, 1e21, 1e22
|
|
50
|
+
};
|
|
51
|
+
|
|
36
52
|
/* ============================================================================
|
|
37
53
|
* 128-bit multiplication helpers
|
|
38
54
|
* ============================================================================ */
|
|
@@ -984,15 +1000,210 @@ parse_decimal(const char *p, const char *end, uint64_t *mantissa, int *exp10,
|
|
|
984
1000
|
return p;
|
|
985
1001
|
}
|
|
986
1002
|
|
|
1003
|
+
/*
|
|
1004
|
+
* Ultra-fast path for small integers (0-999).
|
|
1005
|
+
* Returns true if handled, false to continue with regular parsing.
|
|
1006
|
+
*/
|
|
1007
|
+
static inline bool
|
|
1008
|
+
try_small_integer_fast_path(const char *p, double *result)
|
|
1009
|
+
{
|
|
1010
|
+
bool negative = false;
|
|
1011
|
+
|
|
1012
|
+
/* Handle optional sign */
|
|
1013
|
+
if (*p == '-') {
|
|
1014
|
+
negative = true;
|
|
1015
|
+
p++;
|
|
1016
|
+
} else if (*p == '+') {
|
|
1017
|
+
p++;
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1020
|
+
/* Must start with a digit */
|
|
1021
|
+
if (!is_digit(*p)) {
|
|
1022
|
+
return false;
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
/* Parse up to 3 digits */
|
|
1026
|
+
uint64_t val = *p++ - '0';
|
|
1027
|
+
|
|
1028
|
+
if (is_digit(*p)) {
|
|
1029
|
+
val = val * 10 + (*p++ - '0');
|
|
1030
|
+
if (is_digit(*p)) {
|
|
1031
|
+
val = val * 10 + (*p++ - '0');
|
|
1032
|
+
}
|
|
1033
|
+
}
|
|
1034
|
+
|
|
1035
|
+
/* Must end with null, whitespace, or end of number */
|
|
1036
|
+
if (*p != '\0' && !is_space(*p)) {
|
|
1037
|
+
return false; /* Has decimal point, exponent, or more digits */
|
|
1038
|
+
}
|
|
1039
|
+
|
|
1040
|
+
*result = negative ? -(double)val : (double)val;
|
|
1041
|
+
return true;
|
|
1042
|
+
}
|
|
1043
|
+
|
|
1044
|
+
/*
|
|
1045
|
+
* Ultra-fast path for simple decimals like "1.5", "9.99", "3.14".
|
|
1046
|
+
* Handles patterns: D.D, D.DD, D.DDD, DD.D, DD.DD (up to 3 integer + 3 decimal digits)
|
|
1047
|
+
* These are the most common float formats in real Ruby applications.
|
|
1048
|
+
*/
|
|
1049
|
+
static inline bool
|
|
1050
|
+
try_simple_decimal_fast_path(const char *p, double *result)
|
|
1051
|
+
{
|
|
1052
|
+
bool negative = false;
|
|
1053
|
+
uint64_t int_part = 0;
|
|
1054
|
+
uint64_t frac_part = 0;
|
|
1055
|
+
int frac_digits = 0;
|
|
1056
|
+
|
|
1057
|
+
/* Handle optional sign */
|
|
1058
|
+
if (*p == '-') {
|
|
1059
|
+
negative = true;
|
|
1060
|
+
p++;
|
|
1061
|
+
} else if (*p == '+') {
|
|
1062
|
+
p++;
|
|
1063
|
+
}
|
|
1064
|
+
|
|
1065
|
+
/* Must start with a digit */
|
|
1066
|
+
if (!is_digit(*p)) {
|
|
1067
|
+
return false;
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
/* Parse integer part (up to 3 digits) */
|
|
1071
|
+
int_part = *p++ - '0';
|
|
1072
|
+
if (is_digit(*p)) {
|
|
1073
|
+
int_part = int_part * 10 + (*p++ - '0');
|
|
1074
|
+
if (is_digit(*p)) {
|
|
1075
|
+
int_part = int_part * 10 + (*p++ - '0');
|
|
1076
|
+
if (is_digit(*p)) {
|
|
1077
|
+
return false; /* Too many integer digits, use regular path */
|
|
1078
|
+
}
|
|
1079
|
+
}
|
|
1080
|
+
}
|
|
1081
|
+
|
|
1082
|
+
/* Must have decimal point */
|
|
1083
|
+
if (*p != '.') {
|
|
1084
|
+
return false; /* No decimal point, try integer path instead */
|
|
1085
|
+
}
|
|
1086
|
+
p++;
|
|
1087
|
+
|
|
1088
|
+
/* Must have at least one fractional digit */
|
|
1089
|
+
if (!is_digit(*p)) {
|
|
1090
|
+
return false;
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
/* Parse fractional part (up to 3 digits for speed) */
|
|
1094
|
+
frac_part = *p++ - '0';
|
|
1095
|
+
frac_digits = 1;
|
|
1096
|
+
|
|
1097
|
+
if (is_digit(*p)) {
|
|
1098
|
+
frac_part = frac_part * 10 + (*p++ - '0');
|
|
1099
|
+
frac_digits = 2;
|
|
1100
|
+
if (is_digit(*p)) {
|
|
1101
|
+
frac_part = frac_part * 10 + (*p++ - '0');
|
|
1102
|
+
frac_digits = 3;
|
|
1103
|
+
if (is_digit(*p)) {
|
|
1104
|
+
return false; /* Too many fractional digits, use regular path */
|
|
1105
|
+
}
|
|
1106
|
+
}
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
/* Must end cleanly (no exponent) */
|
|
1110
|
+
if (*p != '\0' && !is_space(*p)) {
|
|
1111
|
+
return false;
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
/* Compute result using precomputed divisors */
|
|
1115
|
+
static const double divisors[] = { 1.0, 10.0, 100.0, 1000.0 };
|
|
1116
|
+
double val = (double)int_part + (double)frac_part / divisors[frac_digits];
|
|
1117
|
+
*result = negative ? -val : val;
|
|
1118
|
+
return true;
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
/*
|
|
1122
|
+
* Exact power-of-10 fast path.
|
|
1123
|
+
* For small mantissas and exponents in [-22, 22], we can compute exactly.
|
|
1124
|
+
*
|
|
1125
|
+
* Based on insight from Nigel Tao's blog:
|
|
1126
|
+
* "If the mantissa fits within 53 bits and the exponent is among the first
|
|
1127
|
+
* 23 powers of 10, the value is exactly representable as an f64."
|
|
1128
|
+
*/
|
|
1129
|
+
static inline bool
|
|
1130
|
+
try_exact_pow10_fast_path(uint64_t mantissa, int exp10, bool negative, double *result)
|
|
1131
|
+
{
|
|
1132
|
+
/* Zero is always exact */
|
|
1133
|
+
if (mantissa == 0) {
|
|
1134
|
+
*result = negative ? -0.0 : 0.0;
|
|
1135
|
+
return true;
|
|
1136
|
+
}
|
|
1137
|
+
|
|
1138
|
+
/*
|
|
1139
|
+
* For positive exponents: mantissa * 10^exp must fit in 53 bits.
|
|
1140
|
+
* We check: mantissa <= 2^53 / 10^exp
|
|
1141
|
+
*/
|
|
1142
|
+
if (exp10 >= 0 && exp10 <= MAX_EXACT_POW10) {
|
|
1143
|
+
/* Check if mantissa * 10^exp10 would overflow 53-bit precision */
|
|
1144
|
+
double m = (double)mantissa;
|
|
1145
|
+
double p = EXACT_POWERS_OF_10[exp10];
|
|
1146
|
+
double val = m * p;
|
|
1147
|
+
|
|
1148
|
+
/*
|
|
1149
|
+
* The multiplication is exact if:
|
|
1150
|
+
* 1. mantissa fits in 53 bits (always true for <= 19 digits)
|
|
1151
|
+
* 2. The result fits in 53 bits
|
|
1152
|
+
*/
|
|
1153
|
+
if (mantissa < MAX_EXACT_MANTISSA && val < (double)MAX_EXACT_MANTISSA) {
|
|
1154
|
+
*result = negative ? -val : val;
|
|
1155
|
+
return true;
|
|
1156
|
+
}
|
|
1157
|
+
}
|
|
1158
|
+
|
|
1159
|
+
/*
|
|
1160
|
+
* For negative exponents: we divide by power of 10.
|
|
1161
|
+
* This is trickier because division isn't exact, but for small mantissas
|
|
1162
|
+
* and small negative exponents, the result is often correct.
|
|
1163
|
+
*
|
|
1164
|
+
* We use a conservative check: if mantissa is small enough that
|
|
1165
|
+
* all 53 bits of precision are preserved after division.
|
|
1166
|
+
*/
|
|
1167
|
+
if (exp10 < 0 && exp10 >= -MAX_EXACT_POW10) {
|
|
1168
|
+
/*
|
|
1169
|
+
* For negative exponents, use multiplication by reciprocal.
|
|
1170
|
+
* This is faster than division but may lose precision.
|
|
1171
|
+
*
|
|
1172
|
+
* However, if the mantissa has few significant digits relative to
|
|
1173
|
+
* the exponent magnitude, we can still get exact results.
|
|
1174
|
+
*
|
|
1175
|
+
* Conservative approach: only use this for very small mantissas
|
|
1176
|
+
* where we're confident the result is correct.
|
|
1177
|
+
*
|
|
1178
|
+
* A mantissa with <= 15 significant digits divided by 10^n where n <= 7
|
|
1179
|
+
* will typically give correct results.
|
|
1180
|
+
*/
|
|
1181
|
+
if (mantissa < 1000000000000000ULL && exp10 >= -7) {
|
|
1182
|
+
double m = (double)mantissa;
|
|
1183
|
+
double p = EXACT_POWERS_OF_10[-exp10];
|
|
1184
|
+
double val = m / p;
|
|
1185
|
+
*result = negative ? -val : val;
|
|
1186
|
+
return true;
|
|
1187
|
+
}
|
|
1188
|
+
}
|
|
1189
|
+
|
|
1190
|
+
return false;
|
|
1191
|
+
}
|
|
1192
|
+
|
|
987
1193
|
/*
|
|
988
1194
|
* Main parsing function using Eisel-Lemire algorithm.
|
|
989
1195
|
* Falls back to Ruby's strtod for edge cases.
|
|
1196
|
+
*
|
|
1197
|
+
* Optimizations over basic implementation:
|
|
1198
|
+
* 1. Ultra-fast path for small integers (no strlen, minimal parsing)
|
|
1199
|
+
* 2. Exact power-of-10 path for simple decimals (avoids 128-bit math)
|
|
1200
|
+
* 3. Removed strlen() - parse until null terminator
|
|
1201
|
+
* 4. Single whitespace skip (not duplicated)
|
|
990
1202
|
*/
|
|
991
1203
|
static double
|
|
992
1204
|
fast_float_parse(const char *str)
|
|
993
1205
|
{
|
|
994
1206
|
const char *p = str;
|
|
995
|
-
const char *end = str + strlen(str);
|
|
996
1207
|
uint64_t mantissa;
|
|
997
1208
|
int exp10;
|
|
998
1209
|
bool negative;
|
|
@@ -1001,22 +1212,32 @@ fast_float_parse(const char *str)
|
|
|
1001
1212
|
double result;
|
|
1002
1213
|
|
|
1003
1214
|
/* Skip leading whitespace */
|
|
1004
|
-
while (
|
|
1215
|
+
while (is_space(*p)) p++;
|
|
1005
1216
|
|
|
1006
|
-
/*
|
|
1007
|
-
if (p
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1217
|
+
/* Ultra-fast path for small integers (handles "5", "42", "-123", etc.) */
|
|
1218
|
+
if (try_small_integer_fast_path(p, &result)) {
|
|
1219
|
+
return result;
|
|
1220
|
+
}
|
|
1221
|
+
|
|
1222
|
+
/* Ultra-fast path for simple decimals (handles "1.5", "9.99", "3.14", etc.) */
|
|
1223
|
+
if (try_simple_decimal_fast_path(p, &result)) {
|
|
1224
|
+
return result;
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
/* Handle special values - check without strlen */
|
|
1228
|
+
if (*p != '\0') {
|
|
1229
|
+
if ((*p == 'i' || *p == 'I') &&
|
|
1230
|
+
(p[1] == 'n' || p[1] == 'N') &&
|
|
1231
|
+
(p[2] == 'f' || p[2] == 'F')) {
|
|
1232
|
+
return INFINITY;
|
|
1012
1233
|
}
|
|
1013
|
-
if ((p
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1234
|
+
if ((*p == 'n' || *p == 'N') &&
|
|
1235
|
+
(p[1] == 'a' || p[1] == 'A') &&
|
|
1236
|
+
(p[2] == 'n' || p[2] == 'N')) {
|
|
1237
|
+
return NAN;
|
|
1017
1238
|
}
|
|
1018
|
-
if (
|
|
1019
|
-
bool neg = (p
|
|
1239
|
+
if (*p == '+' || *p == '-') {
|
|
1240
|
+
bool neg = (*p == '-');
|
|
1020
1241
|
if ((p[1] == 'i' || p[1] == 'I') &&
|
|
1021
1242
|
(p[2] == 'n' || p[2] == 'N') &&
|
|
1022
1243
|
(p[3] == 'f' || p[3] == 'F')) {
|
|
@@ -1034,14 +1255,19 @@ fast_float_parse(const char *str)
|
|
|
1034
1255
|
}
|
|
1035
1256
|
}
|
|
1036
1257
|
|
|
1037
|
-
/* Parse decimal */
|
|
1038
|
-
parse_decimal(str,
|
|
1258
|
+
/* Parse decimal - pass large end pointer to avoid strlen */
|
|
1259
|
+
parse_decimal(str, str + 1000, &mantissa, &exp10, &negative, &valid, &too_many_digits);
|
|
1039
1260
|
|
|
1040
1261
|
if (!valid || too_many_digits) {
|
|
1041
1262
|
return strtod(str, NULL);
|
|
1042
1263
|
}
|
|
1043
1264
|
|
|
1044
|
-
/* Try
|
|
1265
|
+
/* Try exact power-of-10 fast path (avoids 128-bit math) */
|
|
1266
|
+
if (try_exact_pow10_fast_path(mantissa, exp10, negative, &result)) {
|
|
1267
|
+
return result;
|
|
1268
|
+
}
|
|
1269
|
+
|
|
1270
|
+
/* Try Eisel-Lemire for complex cases */
|
|
1045
1271
|
if (eisel_lemire64(mantissa, exp10, negative, &result)) {
|
|
1046
1272
|
return result;
|
|
1047
1273
|
}
|
|
Binary file
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: fast_float_lemire
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Maciej Mensfeld
|
|
@@ -24,6 +24,7 @@ extensions:
|
|
|
24
24
|
- ext/fast_float_lemire/extconf.rb
|
|
25
25
|
extra_rdoc_files: []
|
|
26
26
|
files:
|
|
27
|
+
- CHANGELOG.md
|
|
27
28
|
- LICENSE.txt
|
|
28
29
|
- README.md
|
|
29
30
|
- ext/fast_float_lemire/extconf.rb
|
|
@@ -36,6 +37,7 @@ licenses:
|
|
|
36
37
|
- MIT
|
|
37
38
|
metadata:
|
|
38
39
|
homepage_uri: https://github.com/mensfeld/fast_float_lemire
|
|
40
|
+
changelog_uri: https://github.com/mensfeld/fast_float_lemire/blob/master/CHANGELOG.md
|
|
39
41
|
source_code_uri: https://github.com/mensfeld/fast_float_lemire
|
|
40
42
|
documentation_uri: https://github.com/mensfeld/fast_float_lemire
|
|
41
43
|
rubygems_mfa_required: 'true'
|
|
@@ -53,7 +55,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
53
55
|
- !ruby/object:Gem::Version
|
|
54
56
|
version: '0'
|
|
55
57
|
requirements: []
|
|
56
|
-
rubygems_version: 3.6.
|
|
58
|
+
rubygems_version: 3.6.9
|
|
57
59
|
specification_version: 4
|
|
58
60
|
summary: Eisel-Lemire algorithm for fast string-to-float conversion
|
|
59
61
|
test_files: []
|