json 2.6.3 → 2.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/BSDL +22 -0
- data/CHANGES.md +212 -17
- data/LEGAL +8 -0
- data/README.md +75 -219
- data/ext/json/ext/fbuffer/fbuffer.h +178 -95
- data/ext/json/ext/generator/extconf.rb +14 -2
- data/ext/json/ext/generator/generator.c +1336 -805
- data/ext/json/ext/parser/extconf.rb +8 -25
- data/ext/json/ext/parser/parser.c +1365 -3205
- data/ext/json/ext/simd/conf.rb +20 -0
- data/ext/json/ext/simd/simd.h +187 -0
- data/ext/json/ext/vendor/fpconv.c +479 -0
- data/ext/json/ext/vendor/jeaiii-ltoa.h +267 -0
- data/json.gemspec +48 -53
- data/lib/json/add/bigdecimal.rb +39 -10
- data/lib/json/add/complex.rb +29 -6
- data/lib/json/add/core.rb +1 -1
- data/lib/json/add/date.rb +27 -7
- data/lib/json/add/date_time.rb +26 -9
- data/lib/json/add/exception.rb +25 -7
- data/lib/json/add/ostruct.rb +32 -9
- data/lib/json/add/range.rb +33 -8
- data/lib/json/add/rational.rb +28 -6
- data/lib/json/add/regexp.rb +26 -8
- data/lib/json/add/set.rb +25 -6
- data/lib/json/add/struct.rb +29 -7
- data/lib/json/add/symbol.rb +34 -7
- data/lib/json/add/time.rb +29 -15
- data/lib/json/common.rb +654 -253
- data/lib/json/ext/generator/state.rb +106 -0
- data/lib/json/ext.rb +35 -5
- data/lib/json/generic_object.rb +7 -3
- data/lib/json/truffle_ruby/generator.rb +690 -0
- data/lib/json/version.rb +3 -7
- data/lib/json.rb +58 -21
- metadata +19 -26
- data/VERSION +0 -1
- data/ext/json/ext/generator/depend +0 -1
- data/ext/json/ext/generator/generator.h +0 -174
- data/ext/json/ext/parser/depend +0 -1
- data/ext/json/ext/parser/parser.h +0 -96
- data/ext/json/ext/parser/parser.rl +0 -986
- data/ext/json/extconf.rb +0 -3
- data/lib/json/pure/generator.rb +0 -479
- data/lib/json/pure/parser.rb +0 -337
- data/lib/json/pure.rb +0 -15
- /data/{LICENSE → COPYING} +0 -0
@@ -0,0 +1,20 @@
|
|
1
|
+
case RbConfig::CONFIG['host_cpu']
|
2
|
+
when /^(arm|aarch64)/
|
3
|
+
# Try to compile a small program using NEON instructions
|
4
|
+
header, type, init = 'arm_neon.h', 'uint8x16_t', 'vdupq_n_u8(32)'
|
5
|
+
when /^(x86_64|x64)/
|
6
|
+
header, type, init = 'x86intrin.h', '__m128i', '_mm_set1_epi8(32)'
|
7
|
+
end
|
8
|
+
if header
|
9
|
+
have_header(header) && try_compile(<<~SRC)
|
10
|
+
#{cpp_include(header)}
|
11
|
+
int main(int argc, char **argv) {
|
12
|
+
#{type} test = #{init};
|
13
|
+
if (argc > 100000) printf("%p", &test);
|
14
|
+
return 0;
|
15
|
+
}
|
16
|
+
SRC
|
17
|
+
$defs.push("-DJSON_ENABLE_SIMD")
|
18
|
+
end
|
19
|
+
|
20
|
+
have_header('cpuid.h')
|
@@ -0,0 +1,187 @@
|
|
1
|
+
typedef enum {
|
2
|
+
SIMD_NONE,
|
3
|
+
SIMD_NEON,
|
4
|
+
SIMD_SSE2
|
5
|
+
} SIMD_Implementation;
|
6
|
+
|
7
|
+
#ifdef JSON_ENABLE_SIMD
|
8
|
+
|
9
|
+
#ifdef __clang__
|
10
|
+
#if __has_builtin(__builtin_ctzll)
|
11
|
+
#define HAVE_BUILTIN_CTZLL 1
|
12
|
+
#else
|
13
|
+
#define HAVE_BUILTIN_CTZLL 0
|
14
|
+
#endif
|
15
|
+
#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
|
16
|
+
#define HAVE_BUILTIN_CTZLL 1
|
17
|
+
#else
|
18
|
+
#define HAVE_BUILTIN_CTZLL 0
|
19
|
+
#endif
|
20
|
+
|
21
|
+
static inline uint32_t trailing_zeros64(uint64_t input)
|
22
|
+
{
|
23
|
+
#if HAVE_BUILTIN_CTZLL
|
24
|
+
return __builtin_ctzll(input);
|
25
|
+
#else
|
26
|
+
uint32_t trailing_zeros = 0;
|
27
|
+
uint64_t temp = input;
|
28
|
+
while ((temp & 1) == 0 && temp > 0) {
|
29
|
+
trailing_zeros++;
|
30
|
+
temp >>= 1;
|
31
|
+
}
|
32
|
+
return trailing_zeros;
|
33
|
+
#endif
|
34
|
+
}
|
35
|
+
|
36
|
+
static inline int trailing_zeros(int input)
|
37
|
+
{
|
38
|
+
#if HAVE_BUILTIN_CTZLL
|
39
|
+
return __builtin_ctz(input);
|
40
|
+
#else
|
41
|
+
int trailing_zeros = 0;
|
42
|
+
int temp = input;
|
43
|
+
while ((temp & 1) == 0 && temp > 0) {
|
44
|
+
trailing_zeros++;
|
45
|
+
temp >>= 1;
|
46
|
+
}
|
47
|
+
return trailing_zeros;
|
48
|
+
#endif
|
49
|
+
}
|
50
|
+
|
51
|
+
#if (defined(__GNUC__ ) || defined(__clang__))
|
52
|
+
#define FORCE_INLINE __attribute__((always_inline))
|
53
|
+
#else
|
54
|
+
#define FORCE_INLINE
|
55
|
+
#endif
|
56
|
+
|
57
|
+
|
58
|
+
#define SIMD_MINIMUM_THRESHOLD 6
|
59
|
+
|
60
|
+
#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
|
61
|
+
#include <arm_neon.h>
|
62
|
+
|
63
|
+
#define FIND_SIMD_IMPLEMENTATION_DEFINED 1
|
64
|
+
static inline SIMD_Implementation find_simd_implementation(void)
|
65
|
+
{
|
66
|
+
return SIMD_NEON;
|
67
|
+
}
|
68
|
+
|
69
|
+
#define HAVE_SIMD 1
|
70
|
+
#define HAVE_SIMD_NEON 1
|
71
|
+
|
72
|
+
// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
73
|
+
static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
|
74
|
+
{
|
75
|
+
const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
|
76
|
+
const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
|
77
|
+
return mask & 0x8888888888888888ull;
|
78
|
+
}
|
79
|
+
|
80
|
+
static inline FORCE_INLINE uint64_t compute_chunk_mask_neon(const char *ptr)
|
81
|
+
{
|
82
|
+
uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
|
83
|
+
|
84
|
+
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
85
|
+
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
86
|
+
const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
|
87
|
+
|
88
|
+
uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
|
89
|
+
uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
|
90
|
+
return neon_match_mask(needs_escape);
|
91
|
+
}
|
92
|
+
|
93
|
+
static inline FORCE_INLINE int string_scan_simd_neon(const char **ptr, const char *end, uint64_t *mask)
|
94
|
+
{
|
95
|
+
while (*ptr + sizeof(uint8x16_t) <= end) {
|
96
|
+
uint64_t chunk_mask = compute_chunk_mask_neon(*ptr);
|
97
|
+
if (chunk_mask) {
|
98
|
+
*mask = chunk_mask;
|
99
|
+
return 1;
|
100
|
+
}
|
101
|
+
*ptr += sizeof(uint8x16_t);
|
102
|
+
}
|
103
|
+
return 0;
|
104
|
+
}
|
105
|
+
|
106
|
+
uint8x16x4_t load_uint8x16_4(const unsigned char *table) {
|
107
|
+
uint8x16x4_t tab;
|
108
|
+
tab.val[0] = vld1q_u8(table);
|
109
|
+
tab.val[1] = vld1q_u8(table+16);
|
110
|
+
tab.val[2] = vld1q_u8(table+32);
|
111
|
+
tab.val[3] = vld1q_u8(table+48);
|
112
|
+
return tab;
|
113
|
+
}
|
114
|
+
|
115
|
+
#endif /* ARM Neon Support.*/
|
116
|
+
|
117
|
+
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
|
118
|
+
|
119
|
+
#ifdef HAVE_X86INTRIN_H
|
120
|
+
#include <x86intrin.h>
|
121
|
+
|
122
|
+
#define HAVE_SIMD 1
|
123
|
+
#define HAVE_SIMD_SSE2 1
|
124
|
+
|
125
|
+
#ifdef HAVE_CPUID_H
|
126
|
+
#define FIND_SIMD_IMPLEMENTATION_DEFINED 1
|
127
|
+
|
128
|
+
#if defined(__clang__) || defined(__GNUC__)
|
129
|
+
#define TARGET_SSE2 __attribute__((target("sse2")))
|
130
|
+
#else
|
131
|
+
#define TARGET_SSE2
|
132
|
+
#endif
|
133
|
+
|
134
|
+
#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
|
135
|
+
#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
|
136
|
+
#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
|
137
|
+
#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
|
138
|
+
|
139
|
+
static inline TARGET_SSE2 FORCE_INLINE int compute_chunk_mask_sse2(const char *ptr)
|
140
|
+
{
|
141
|
+
__m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
|
142
|
+
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
143
|
+
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
144
|
+
__m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
|
145
|
+
__m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
|
146
|
+
__m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
|
147
|
+
return _mm_movemask_epi8(needs_escape);
|
148
|
+
}
|
149
|
+
|
150
|
+
static inline TARGET_SSE2 FORCE_INLINE int string_scan_simd_sse2(const char **ptr, const char *end, int *mask)
|
151
|
+
{
|
152
|
+
while (*ptr + sizeof(__m128i) <= end) {
|
153
|
+
int chunk_mask = compute_chunk_mask_sse2(*ptr);
|
154
|
+
if (chunk_mask) {
|
155
|
+
*mask = chunk_mask;
|
156
|
+
return 1;
|
157
|
+
}
|
158
|
+
*ptr += sizeof(__m128i);
|
159
|
+
}
|
160
|
+
|
161
|
+
return 0;
|
162
|
+
}
|
163
|
+
|
164
|
+
#include <cpuid.h>
|
165
|
+
#endif /* HAVE_CPUID_H */
|
166
|
+
|
167
|
+
static inline SIMD_Implementation find_simd_implementation(void)
|
168
|
+
{
|
169
|
+
// TODO Revisit. I think the SSE version now only uses SSE2 instructions.
|
170
|
+
if (__builtin_cpu_supports("sse2")) {
|
171
|
+
return SIMD_SSE2;
|
172
|
+
}
|
173
|
+
|
174
|
+
return SIMD_NONE;
|
175
|
+
}
|
176
|
+
|
177
|
+
#endif /* HAVE_X86INTRIN_H */
|
178
|
+
#endif /* X86_64 Support */
|
179
|
+
|
180
|
+
#endif /* JSON_ENABLE_SIMD */
|
181
|
+
|
182
|
+
#ifndef FIND_SIMD_IMPLEMENTATION_DEFINED
|
183
|
+
static inline SIMD_Implementation find_simd_implementation(void)
|
184
|
+
{
|
185
|
+
return SIMD_NONE;
|
186
|
+
}
|
187
|
+
#endif
|
@@ -0,0 +1,479 @@
|
|
1
|
+
// Boost Software License - Version 1.0 - August 17th, 2003
|
2
|
+
//
|
3
|
+
// Permission is hereby granted, free of charge, to any person or organization
|
4
|
+
// obtaining a copy of the software and accompanying documentation covered by
|
5
|
+
// this license (the "Software") to use, reproduce, display, distribute,
|
6
|
+
// execute, and transmit the Software, and to prepare derivative works of the
|
7
|
+
// Software, and to permit third-parties to whom the Software is furnished to
|
8
|
+
// do so, all subject to the following:
|
9
|
+
//
|
10
|
+
// The copyright notices in the Software and this entire statement, including
|
11
|
+
// the above license grant, this restriction and the following disclaimer,
|
12
|
+
// must be included in all copies of the Software, in whole or in part, and
|
13
|
+
// all derivative works of the Software, unless such copies or derivative
|
14
|
+
// works are solely in the form of machine-executable object code generated by
|
15
|
+
// a source language processor.
|
16
|
+
//
|
17
|
+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
18
|
+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
19
|
+
// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
20
|
+
// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
21
|
+
// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
22
|
+
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
23
|
+
// DEALINGS IN THE SOFTWARE.
|
24
|
+
|
25
|
+
// The contents of this file is extracted from https://github.com/night-shift/fpconv
|
26
|
+
// It was slightly modified to append ".0" to plain floats, for use with the https://github.com/ruby/json package.
|
27
|
+
|
28
|
+
#include <stdbool.h>
|
29
|
+
#include <string.h>
|
30
|
+
#include <stdint.h>
|
31
|
+
|
32
|
+
#define npowers 87
|
33
|
+
#define steppowers 8
|
34
|
+
#define firstpower -348 /* 10 ^ -348 */
|
35
|
+
|
36
|
+
#define expmax -32
|
37
|
+
#define expmin -60
|
38
|
+
|
39
|
+
typedef struct Fp {
|
40
|
+
uint64_t frac;
|
41
|
+
int exp;
|
42
|
+
} Fp;
|
43
|
+
|
44
|
+
static const Fp powers_ten[] = {
|
45
|
+
{ 18054884314459144840U, -1220 }, { 13451937075301367670U, -1193 },
|
46
|
+
{ 10022474136428063862U, -1166 }, { 14934650266808366570U, -1140 },
|
47
|
+
{ 11127181549972568877U, -1113 }, { 16580792590934885855U, -1087 },
|
48
|
+
{ 12353653155963782858U, -1060 }, { 18408377700990114895U, -1034 },
|
49
|
+
{ 13715310171984221708U, -1007 }, { 10218702384817765436U, -980 },
|
50
|
+
{ 15227053142812498563U, -954 }, { 11345038669416679861U, -927 },
|
51
|
+
{ 16905424996341287883U, -901 }, { 12595523146049147757U, -874 },
|
52
|
+
{ 9384396036005875287U, -847 }, { 13983839803942852151U, -821 },
|
53
|
+
{ 10418772551374772303U, -794 }, { 15525180923007089351U, -768 },
|
54
|
+
{ 11567161174868858868U, -741 }, { 17236413322193710309U, -715 },
|
55
|
+
{ 12842128665889583758U, -688 }, { 9568131466127621947U, -661 },
|
56
|
+
{ 14257626930069360058U, -635 }, { 10622759856335341974U, -608 },
|
57
|
+
{ 15829145694278690180U, -582 }, { 11793632577567316726U, -555 },
|
58
|
+
{ 17573882009934360870U, -529 }, { 13093562431584567480U, -502 },
|
59
|
+
{ 9755464219737475723U, -475 }, { 14536774485912137811U, -449 },
|
60
|
+
{ 10830740992659433045U, -422 }, { 16139061738043178685U, -396 },
|
61
|
+
{ 12024538023802026127U, -369 }, { 17917957937422433684U, -343 },
|
62
|
+
{ 13349918974505688015U, -316 }, { 9946464728195732843U, -289 },
|
63
|
+
{ 14821387422376473014U, -263 }, { 11042794154864902060U, -236 },
|
64
|
+
{ 16455045573212060422U, -210 }, { 12259964326927110867U, -183 },
|
65
|
+
{ 18268770466636286478U, -157 }, { 13611294676837538539U, -130 },
|
66
|
+
{ 10141204801825835212U, -103 }, { 15111572745182864684U, -77 },
|
67
|
+
{ 11258999068426240000U, -50 }, { 16777216000000000000U, -24 },
|
68
|
+
{ 12500000000000000000U, 3 }, { 9313225746154785156U, 30 },
|
69
|
+
{ 13877787807814456755U, 56 }, { 10339757656912845936U, 83 },
|
70
|
+
{ 15407439555097886824U, 109 }, { 11479437019748901445U, 136 },
|
71
|
+
{ 17105694144590052135U, 162 }, { 12744735289059618216U, 189 },
|
72
|
+
{ 9495567745759798747U, 216 }, { 14149498560666738074U, 242 },
|
73
|
+
{ 10542197943230523224U, 269 }, { 15709099088952724970U, 295 },
|
74
|
+
{ 11704190886730495818U, 322 }, { 17440603504673385349U, 348 },
|
75
|
+
{ 12994262207056124023U, 375 }, { 9681479787123295682U, 402 },
|
76
|
+
{ 14426529090290212157U, 428 }, { 10748601772107342003U, 455 },
|
77
|
+
{ 16016664761464807395U, 481 }, { 11933345169920330789U, 508 },
|
78
|
+
{ 17782069995880619868U, 534 }, { 13248674568444952270U, 561 },
|
79
|
+
{ 9871031767461413346U, 588 }, { 14708983551653345445U, 614 },
|
80
|
+
{ 10959046745042015199U, 641 }, { 16330252207878254650U, 667 },
|
81
|
+
{ 12166986024289022870U, 694 }, { 18130221999122236476U, 720 },
|
82
|
+
{ 13508068024458167312U, 747 }, { 10064294952495520794U, 774 },
|
83
|
+
{ 14996968138956309548U, 800 }, { 11173611982879273257U, 827 },
|
84
|
+
{ 16649979327439178909U, 853 }, { 12405201291620119593U, 880 },
|
85
|
+
{ 9242595204427927429U, 907 }, { 13772540099066387757U, 933 },
|
86
|
+
{ 10261342003245940623U, 960 }, { 15290591125556738113U, 986 },
|
87
|
+
{ 11392378155556871081U, 1013 }, { 16975966327722178521U, 1039 },
|
88
|
+
{ 12648080533535911531U, 1066 }
|
89
|
+
};
|
90
|
+
|
91
|
+
static Fp find_cachedpow10(int exp, int* k)
|
92
|
+
{
|
93
|
+
const double one_log_ten = 0.30102999566398114;
|
94
|
+
|
95
|
+
int approx = (int)(-(exp + npowers) * one_log_ten);
|
96
|
+
int idx = (approx - firstpower) / steppowers;
|
97
|
+
|
98
|
+
while(1) {
|
99
|
+
int current = exp + powers_ten[idx].exp + 64;
|
100
|
+
|
101
|
+
if(current < expmin) {
|
102
|
+
idx++;
|
103
|
+
continue;
|
104
|
+
}
|
105
|
+
|
106
|
+
if(current > expmax) {
|
107
|
+
idx--;
|
108
|
+
continue;
|
109
|
+
}
|
110
|
+
|
111
|
+
*k = (firstpower + idx * steppowers);
|
112
|
+
|
113
|
+
return powers_ten[idx];
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
#define fracmask 0x000FFFFFFFFFFFFFU
|
118
|
+
#define expmask 0x7FF0000000000000U
|
119
|
+
#define hiddenbit 0x0010000000000000U
|
120
|
+
#define signmask 0x8000000000000000U
|
121
|
+
#define expbias (1023 + 52)
|
122
|
+
|
123
|
+
#define absv(n) ((n) < 0 ? -(n) : (n))
|
124
|
+
#define minv(a, b) ((a) < (b) ? (a) : (b))
|
125
|
+
|
126
|
+
static const uint64_t tens[] = {
|
127
|
+
10000000000000000000U, 1000000000000000000U, 100000000000000000U,
|
128
|
+
10000000000000000U, 1000000000000000U, 100000000000000U,
|
129
|
+
10000000000000U, 1000000000000U, 100000000000U,
|
130
|
+
10000000000U, 1000000000U, 100000000U,
|
131
|
+
10000000U, 1000000U, 100000U,
|
132
|
+
10000U, 1000U, 100U,
|
133
|
+
10U, 1U
|
134
|
+
};
|
135
|
+
|
136
|
+
static inline uint64_t get_dbits(double d)
|
137
|
+
{
|
138
|
+
union {
|
139
|
+
double dbl;
|
140
|
+
uint64_t i;
|
141
|
+
} dbl_bits = { d };
|
142
|
+
|
143
|
+
return dbl_bits.i;
|
144
|
+
}
|
145
|
+
|
146
|
+
static Fp build_fp(double d)
|
147
|
+
{
|
148
|
+
uint64_t bits = get_dbits(d);
|
149
|
+
|
150
|
+
Fp fp;
|
151
|
+
fp.frac = bits & fracmask;
|
152
|
+
fp.exp = (bits & expmask) >> 52;
|
153
|
+
|
154
|
+
if(fp.exp) {
|
155
|
+
fp.frac += hiddenbit;
|
156
|
+
fp.exp -= expbias;
|
157
|
+
|
158
|
+
} else {
|
159
|
+
fp.exp = -expbias + 1;
|
160
|
+
}
|
161
|
+
|
162
|
+
return fp;
|
163
|
+
}
|
164
|
+
|
165
|
+
static void normalize(Fp* fp)
|
166
|
+
{
|
167
|
+
while ((fp->frac & hiddenbit) == 0) {
|
168
|
+
fp->frac <<= 1;
|
169
|
+
fp->exp--;
|
170
|
+
}
|
171
|
+
|
172
|
+
int shift = 64 - 52 - 1;
|
173
|
+
fp->frac <<= shift;
|
174
|
+
fp->exp -= shift;
|
175
|
+
}
|
176
|
+
|
177
|
+
static void get_normalized_boundaries(Fp* fp, Fp* lower, Fp* upper)
|
178
|
+
{
|
179
|
+
upper->frac = (fp->frac << 1) + 1;
|
180
|
+
upper->exp = fp->exp - 1;
|
181
|
+
|
182
|
+
while ((upper->frac & (hiddenbit << 1)) == 0) {
|
183
|
+
upper->frac <<= 1;
|
184
|
+
upper->exp--;
|
185
|
+
}
|
186
|
+
|
187
|
+
int u_shift = 64 - 52 - 2;
|
188
|
+
|
189
|
+
upper->frac <<= u_shift;
|
190
|
+
upper->exp = upper->exp - u_shift;
|
191
|
+
|
192
|
+
|
193
|
+
int l_shift = fp->frac == hiddenbit ? 2 : 1;
|
194
|
+
|
195
|
+
lower->frac = (fp->frac << l_shift) - 1;
|
196
|
+
lower->exp = fp->exp - l_shift;
|
197
|
+
|
198
|
+
|
199
|
+
lower->frac <<= lower->exp - upper->exp;
|
200
|
+
lower->exp = upper->exp;
|
201
|
+
}
|
202
|
+
|
203
|
+
static Fp multiply(Fp* a, Fp* b)
|
204
|
+
{
|
205
|
+
const uint64_t lomask = 0x00000000FFFFFFFF;
|
206
|
+
|
207
|
+
uint64_t ah_bl = (a->frac >> 32) * (b->frac & lomask);
|
208
|
+
uint64_t al_bh = (a->frac & lomask) * (b->frac >> 32);
|
209
|
+
uint64_t al_bl = (a->frac & lomask) * (b->frac & lomask);
|
210
|
+
uint64_t ah_bh = (a->frac >> 32) * (b->frac >> 32);
|
211
|
+
|
212
|
+
uint64_t tmp = (ah_bl & lomask) + (al_bh & lomask) + (al_bl >> 32);
|
213
|
+
/* round up */
|
214
|
+
tmp += 1U << 31;
|
215
|
+
|
216
|
+
Fp fp = {
|
217
|
+
ah_bh + (ah_bl >> 32) + (al_bh >> 32) + (tmp >> 32),
|
218
|
+
a->exp + b->exp + 64
|
219
|
+
};
|
220
|
+
|
221
|
+
return fp;
|
222
|
+
}
|
223
|
+
|
224
|
+
static void round_digit(char* digits, int ndigits, uint64_t delta, uint64_t rem, uint64_t kappa, uint64_t frac)
|
225
|
+
{
|
226
|
+
while (rem < frac && delta - rem >= kappa &&
|
227
|
+
(rem + kappa < frac || frac - rem > rem + kappa - frac)) {
|
228
|
+
|
229
|
+
digits[ndigits - 1]--;
|
230
|
+
rem += kappa;
|
231
|
+
}
|
232
|
+
}
|
233
|
+
|
234
|
+
static int generate_digits(Fp* fp, Fp* upper, Fp* lower, char* digits, int* K)
|
235
|
+
{
|
236
|
+
uint64_t wfrac = upper->frac - fp->frac;
|
237
|
+
uint64_t delta = upper->frac - lower->frac;
|
238
|
+
|
239
|
+
Fp one;
|
240
|
+
one.frac = 1ULL << -upper->exp;
|
241
|
+
one.exp = upper->exp;
|
242
|
+
|
243
|
+
uint64_t part1 = upper->frac >> -one.exp;
|
244
|
+
uint64_t part2 = upper->frac & (one.frac - 1);
|
245
|
+
|
246
|
+
int idx = 0, kappa = 10;
|
247
|
+
const uint64_t* divp;
|
248
|
+
/* 1000000000 */
|
249
|
+
for(divp = tens + 10; kappa > 0; divp++) {
|
250
|
+
|
251
|
+
uint64_t div = *divp;
|
252
|
+
unsigned digit = (unsigned) (part1 / div);
|
253
|
+
|
254
|
+
if (digit || idx) {
|
255
|
+
digits[idx++] = digit + '0';
|
256
|
+
}
|
257
|
+
|
258
|
+
part1 -= digit * div;
|
259
|
+
kappa--;
|
260
|
+
|
261
|
+
uint64_t tmp = (part1 <<-one.exp) + part2;
|
262
|
+
if (tmp <= delta) {
|
263
|
+
*K += kappa;
|
264
|
+
round_digit(digits, idx, delta, tmp, div << -one.exp, wfrac);
|
265
|
+
|
266
|
+
return idx;
|
267
|
+
}
|
268
|
+
}
|
269
|
+
|
270
|
+
/* 10 */
|
271
|
+
const uint64_t* unit = tens + 18;
|
272
|
+
|
273
|
+
while(true) {
|
274
|
+
part2 *= 10;
|
275
|
+
delta *= 10;
|
276
|
+
kappa--;
|
277
|
+
|
278
|
+
unsigned digit = (unsigned) (part2 >> -one.exp);
|
279
|
+
if (digit || idx) {
|
280
|
+
digits[idx++] = digit + '0';
|
281
|
+
}
|
282
|
+
|
283
|
+
part2 &= one.frac - 1;
|
284
|
+
if (part2 < delta) {
|
285
|
+
*K += kappa;
|
286
|
+
round_digit(digits, idx, delta, part2, one.frac, wfrac * *unit);
|
287
|
+
|
288
|
+
return idx;
|
289
|
+
}
|
290
|
+
|
291
|
+
unit--;
|
292
|
+
}
|
293
|
+
}
|
294
|
+
|
295
|
+
static int grisu2(double d, char* digits, int* K)
|
296
|
+
{
|
297
|
+
Fp w = build_fp(d);
|
298
|
+
|
299
|
+
Fp lower, upper;
|
300
|
+
get_normalized_boundaries(&w, &lower, &upper);
|
301
|
+
|
302
|
+
normalize(&w);
|
303
|
+
|
304
|
+
int k;
|
305
|
+
Fp cp = find_cachedpow10(upper.exp, &k);
|
306
|
+
|
307
|
+
w = multiply(&w, &cp);
|
308
|
+
upper = multiply(&upper, &cp);
|
309
|
+
lower = multiply(&lower, &cp);
|
310
|
+
|
311
|
+
lower.frac++;
|
312
|
+
upper.frac--;
|
313
|
+
|
314
|
+
*K = -k;
|
315
|
+
|
316
|
+
return generate_digits(&w, &upper, &lower, digits, K);
|
317
|
+
}
|
318
|
+
|
319
|
+
static int emit_digits(char* digits, int ndigits, char* dest, int K, bool neg)
|
320
|
+
{
|
321
|
+
int exp = absv(K + ndigits - 1);
|
322
|
+
|
323
|
+
int max_trailing_zeros = 7;
|
324
|
+
|
325
|
+
if(neg) {
|
326
|
+
max_trailing_zeros -= 1;
|
327
|
+
}
|
328
|
+
|
329
|
+
/* write plain integer */
|
330
|
+
if(K >= 0 && (exp < (ndigits + max_trailing_zeros))) {
|
331
|
+
|
332
|
+
memcpy(dest, digits, ndigits);
|
333
|
+
memset(dest + ndigits, '0', K);
|
334
|
+
|
335
|
+
/* add a .0 to mark this as a float. */
|
336
|
+
dest[ndigits + K] = '.';
|
337
|
+
dest[ndigits + K + 1] = '0';
|
338
|
+
|
339
|
+
return ndigits + K + 2;
|
340
|
+
}
|
341
|
+
|
342
|
+
/* write decimal w/o scientific notation */
|
343
|
+
if(K < 0 && (K > -7 || exp < 10)) {
|
344
|
+
int offset = ndigits - absv(K);
|
345
|
+
/* fp < 1.0 -> write leading zero */
|
346
|
+
if(offset <= 0) {
|
347
|
+
offset = -offset;
|
348
|
+
dest[0] = '0';
|
349
|
+
dest[1] = '.';
|
350
|
+
memset(dest + 2, '0', offset);
|
351
|
+
memcpy(dest + offset + 2, digits, ndigits);
|
352
|
+
|
353
|
+
return ndigits + 2 + offset;
|
354
|
+
|
355
|
+
/* fp > 1.0 */
|
356
|
+
} else {
|
357
|
+
memcpy(dest, digits, offset);
|
358
|
+
dest[offset] = '.';
|
359
|
+
memcpy(dest + offset + 1, digits + offset, ndigits - offset);
|
360
|
+
|
361
|
+
return ndigits + 1;
|
362
|
+
}
|
363
|
+
}
|
364
|
+
|
365
|
+
/* write decimal w/ scientific notation */
|
366
|
+
ndigits = minv(ndigits, 18 - neg);
|
367
|
+
|
368
|
+
int idx = 0;
|
369
|
+
dest[idx++] = digits[0];
|
370
|
+
|
371
|
+
if(ndigits > 1) {
|
372
|
+
dest[idx++] = '.';
|
373
|
+
memcpy(dest + idx, digits + 1, ndigits - 1);
|
374
|
+
idx += ndigits - 1;
|
375
|
+
}
|
376
|
+
|
377
|
+
dest[idx++] = 'e';
|
378
|
+
|
379
|
+
char sign = K + ndigits - 1 < 0 ? '-' : '+';
|
380
|
+
dest[idx++] = sign;
|
381
|
+
|
382
|
+
int cent = 0;
|
383
|
+
|
384
|
+
if(exp > 99) {
|
385
|
+
cent = exp / 100;
|
386
|
+
dest[idx++] = cent + '0';
|
387
|
+
exp -= cent * 100;
|
388
|
+
}
|
389
|
+
if(exp > 9) {
|
390
|
+
int dec = exp / 10;
|
391
|
+
dest[idx++] = dec + '0';
|
392
|
+
exp -= dec * 10;
|
393
|
+
|
394
|
+
} else if(cent) {
|
395
|
+
dest[idx++] = '0';
|
396
|
+
}
|
397
|
+
|
398
|
+
dest[idx++] = exp % 10 + '0';
|
399
|
+
|
400
|
+
return idx;
|
401
|
+
}
|
402
|
+
|
403
|
+
static int filter_special(double fp, char* dest)
|
404
|
+
{
|
405
|
+
if(fp == 0.0) {
|
406
|
+
dest[0] = '0';
|
407
|
+
dest[1] = '.';
|
408
|
+
dest[2] = '0';
|
409
|
+
return 3;
|
410
|
+
}
|
411
|
+
|
412
|
+
uint64_t bits = get_dbits(fp);
|
413
|
+
|
414
|
+
bool nan = (bits & expmask) == expmask;
|
415
|
+
|
416
|
+
if(!nan) {
|
417
|
+
return 0;
|
418
|
+
}
|
419
|
+
|
420
|
+
if(bits & fracmask) {
|
421
|
+
dest[0] = 'n'; dest[1] = 'a'; dest[2] = 'n';
|
422
|
+
|
423
|
+
} else {
|
424
|
+
dest[0] = 'i'; dest[1] = 'n'; dest[2] = 'f';
|
425
|
+
}
|
426
|
+
|
427
|
+
return 3;
|
428
|
+
}
|
429
|
+
|
430
|
+
/* Fast and accurate double to string conversion based on Florian Loitsch's
|
431
|
+
* Grisu-algorithm[1].
|
432
|
+
*
|
433
|
+
* Input:
|
434
|
+
* fp -> the double to convert, dest -> destination buffer.
|
435
|
+
* The generated string will never be longer than 28 characters.
|
436
|
+
* Make sure to pass a pointer to at least 28 bytes of memory.
|
437
|
+
* The emitted string will not be null terminated.
|
438
|
+
*
|
439
|
+
* Output:
|
440
|
+
* The number of written characters.
|
441
|
+
*
|
442
|
+
* Exemplary usage:
|
443
|
+
*
|
444
|
+
* void print(double d)
|
445
|
+
* {
|
446
|
+
* char buf[28 + 1] // plus null terminator
|
447
|
+
* int str_len = fpconv_dtoa(d, buf);
|
448
|
+
*
|
449
|
+
* buf[str_len] = '\0';
|
450
|
+
* printf("%s", buf);
|
451
|
+
* }
|
452
|
+
*
|
453
|
+
*/
|
454
|
+
static int fpconv_dtoa(double d, char dest[28])
|
455
|
+
{
|
456
|
+
char digits[18];
|
457
|
+
|
458
|
+
int str_len = 0;
|
459
|
+
bool neg = false;
|
460
|
+
|
461
|
+
if(get_dbits(d) & signmask) {
|
462
|
+
dest[0] = '-';
|
463
|
+
str_len++;
|
464
|
+
neg = true;
|
465
|
+
}
|
466
|
+
|
467
|
+
int spec = filter_special(d, dest + str_len);
|
468
|
+
|
469
|
+
if(spec) {
|
470
|
+
return str_len + spec;
|
471
|
+
}
|
472
|
+
|
473
|
+
int K = 0;
|
474
|
+
int ndigits = grisu2(d, digits, &K);
|
475
|
+
|
476
|
+
str_len += emit_digits(digits, ndigits, dest + str_len, K, neg);
|
477
|
+
|
478
|
+
return str_len;
|
479
|
+
}
|