simdjson 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.clang-format +5 -0
- data/.gitignore +14 -0
- data/.gitmodules +3 -0
- data/.rubocop.yml +9 -0
- data/.travis.yml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +32 -0
- data/benchmark/apache_builds.json +4421 -0
- data/benchmark/demo.json +15 -0
- data/benchmark/github_events.json +1390 -0
- data/benchmark/run_benchmark.rb +30 -0
- data/ext/simdjson/extconf.rb +22 -0
- data/ext/simdjson/simdjson.cpp +76 -0
- data/ext/simdjson/simdjson.hpp +6 -0
- data/lib/simdjson/version.rb +3 -0
- data/lib/simdjson.rb +2 -0
- data/simdjson.gemspec +35 -0
- data/vendor/.gitkeep +0 -0
- data/vendor/simdjson/AUTHORS +3 -0
- data/vendor/simdjson/CMakeLists.txt +63 -0
- data/vendor/simdjson/CONTRIBUTORS +27 -0
- data/vendor/simdjson/Dockerfile +10 -0
- data/vendor/simdjson/LICENSE +201 -0
- data/vendor/simdjson/Makefile +203 -0
- data/vendor/simdjson/Notes.md +85 -0
- data/vendor/simdjson/README.md +581 -0
- data/vendor/simdjson/amalgamation.sh +158 -0
- data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
- data/vendor/simdjson/benchmark/benchmark.h +223 -0
- data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
- data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
- data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
- data/vendor/simdjson/benchmark/parse.cpp +393 -0
- data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
- data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
- data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
- data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
- data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/gbps.png +0 -0
- data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
- data/vendor/simdjson/images/halvarflake.png +0 -0
- data/vendor/simdjson/images/logo.png +0 -0
- data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
- data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
- data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
- data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
- data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
- data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
- data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
- data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
- data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
- data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
- data/vendor/simdjson/include/simdjson/portability.h +172 -0
- data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
- data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
- data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
- data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
- data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
- data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
- data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
- data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
- data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
- data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
- data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
- data/vendor/simdjson/scripts/issue150.sh +14 -0
- data/vendor/simdjson/scripts/javascript/README.md +3 -0
- data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
- data/vendor/simdjson/scripts/minifier.sh +11 -0
- data/vendor/simdjson/scripts/parseandstat.sh +24 -0
- data/vendor/simdjson/scripts/parser.sh +11 -0
- data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
- data/vendor/simdjson/scripts/plotparse.sh +98 -0
- data/vendor/simdjson/scripts/selectparser.sh +11 -0
- data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
- data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
- data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
- data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
- data/vendor/simdjson/scripts/testjson2json.sh +99 -0
- data/vendor/simdjson/scripts/transitions/Makefile +10 -0
- data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
- data/vendor/simdjson/singleheader/README.md +1 -0
- data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
- data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
- data/vendor/simdjson/singleheader/simdjson.h +39692 -0
- data/vendor/simdjson/src/CMakeLists.txt +67 -0
- data/vendor/simdjson/src/jsonioutil.cpp +35 -0
- data/vendor/simdjson/src/jsonminifier.cpp +285 -0
- data/vendor/simdjson/src/jsonparser.cpp +91 -0
- data/vendor/simdjson/src/parsedjson.cpp +323 -0
- data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
- data/vendor/simdjson/src/simdjson.cpp +30 -0
- data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
- data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
- data/vendor/simdjson/style/clang-format-check.sh +25 -0
- data/vendor/simdjson/style/clang-format.sh +25 -0
- data/vendor/simdjson/style/run-clang-format.py +326 -0
- data/vendor/simdjson/tape.md +134 -0
- data/vendor/simdjson/tests/CMakeLists.txt +25 -0
- data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
- data/vendor/simdjson/tests/basictests.cpp +75 -0
- data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
- data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
- data/vendor/simdjson/tests/pointercheck.cpp +38 -0
- data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
- data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
- data/vendor/simdjson/tools/CMakeLists.txt +3 -0
- data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
- data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
- data/vendor/simdjson/tools/json2json.cpp +112 -0
- data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
- data/vendor/simdjson/tools/jsonstats.cpp +143 -0
- data/vendor/simdjson/tools/minify.cpp +21 -0
- data/vendor/simdjson/tools/release.py +125 -0
- data/vendor/simdjson/windows/dirent_portable.h +1043 -0
- metadata +273 -0
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,102 @@
|
|
1
|
+
#ifndef SIMDJSON_COMMON_DEFS_H
|
2
|
+
#define SIMDJSON_COMMON_DEFS_H
|
3
|
+
|
4
|
+
#include "simdjson/portability.h"
|
5
|
+
|
6
|
+
#include <cassert>
|
7
|
+
|
8
|
+
// we support documents up to 4GB
|
9
|
+
#define SIMDJSON_MAXSIZE_BYTES 0xFFFFFFFF
|
10
|
+
|
11
|
+
// the input buf should be readable up to buf + SIMDJSON_PADDING
|
12
|
+
#ifdef __AVX2__
|
13
|
+
#define SIMDJSON_PADDING sizeof(__m256i)
|
14
|
+
#else
|
15
|
+
// this is a stopgap; there should be a better description of the
|
16
|
+
// main loop and its behavior that abstracts over this
|
17
|
+
#define SIMDJSON_PADDING 32
|
18
|
+
#endif
|
19
|
+
|
20
|
+
#ifndef _MSC_VER
|
21
|
+
// Implemented using Labels as Values which works in GCC and CLANG (and maybe
|
22
|
+
// also in Intel's compiler), but won't work in MSVC.
|
23
|
+
#define SIMDJSON_USE_COMPUTED_GOTO
|
24
|
+
#endif
|
25
|
+
|
26
|
+
// Align to N-byte boundary
|
27
|
+
#define ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
|
28
|
+
#define ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
|
29
|
+
|
30
|
+
#define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0)
|
31
|
+
|
32
|
+
#ifdef _MSC_VER
|
33
|
+
#define really_inline __forceinline
|
34
|
+
#define never_inline __declspec(noinline)
|
35
|
+
|
36
|
+
#define UNUSED
|
37
|
+
#define WARN_UNUSED
|
38
|
+
|
39
|
+
#ifndef likely
|
40
|
+
#define likely(x) x
|
41
|
+
#endif
|
42
|
+
#ifndef unlikely
|
43
|
+
#define unlikely(x) x
|
44
|
+
#endif
|
45
|
+
|
46
|
+
// For Visual Studio compilers, same-page buffer overrun is not fine.
|
47
|
+
#define ALLOW_SAME_PAGE_BUFFER_OVERRUN false
|
48
|
+
|
49
|
+
#else
|
50
|
+
|
51
|
+
// For non-Visual Studio compilers, we may assume that same-page buffer overrun
|
52
|
+
// is fine. However, it will make it difficult to be "valgrind clean".
|
53
|
+
//#ifndef ALLOW_SAME_PAGE_BUFFER_OVERRUN
|
54
|
+
//#define ALLOW_SAME_PAGE_BUFFER_OVERRUN true
|
55
|
+
//#else
|
56
|
+
#define ALLOW_SAME_PAGE_BUFFER_OVERRUN false
|
57
|
+
//#endif
|
58
|
+
|
59
|
+
// The following is likely unnecessarily complex.
|
60
|
+
#ifdef __SANITIZE_ADDRESS__
|
61
|
+
// we have GCC, stuck with https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
|
62
|
+
#define ALLOW_SAME_PAGE_BUFFER_OVERRUN false
|
63
|
+
#elif defined(__has_feature)
|
64
|
+
// we have CLANG?
|
65
|
+
// todo: if we're setting ALLOW_SAME_PAGE_BUFFER_OVERRUN to false, why do we
|
66
|
+
// have a non-empty qualifier?
|
67
|
+
#if (__has_feature(address_sanitizer))
|
68
|
+
#define ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER \
|
69
|
+
__attribute__((no_sanitize("address")))
|
70
|
+
#endif
|
71
|
+
#endif
|
72
|
+
|
73
|
+
#if defined(__has_feature)
|
74
|
+
#if (__has_feature(memory_sanitizer))
|
75
|
+
#define LENIENT_MEM_SANITIZER __attribute__((no_sanitize("memory")))
|
76
|
+
#endif
|
77
|
+
#endif
|
78
|
+
|
79
|
+
#define really_inline inline __attribute__((always_inline, unused))
|
80
|
+
#define never_inline inline __attribute__((noinline, unused))
|
81
|
+
|
82
|
+
#define UNUSED __attribute__((unused))
|
83
|
+
#define WARN_UNUSED __attribute__((warn_unused_result))
|
84
|
+
|
85
|
+
#ifndef likely
|
86
|
+
#define likely(x) __builtin_expect(!!(x), 1)
|
87
|
+
#endif
|
88
|
+
#ifndef unlikely
|
89
|
+
#define unlikely(x) __builtin_expect(!!(x), 0)
|
90
|
+
#endif
|
91
|
+
|
92
|
+
#endif // MSC_VER
|
93
|
+
|
94
|
+
// if it does not apply, make it an empty macro
|
95
|
+
#ifndef ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER
|
96
|
+
#define ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER
|
97
|
+
#endif
|
98
|
+
#ifndef LENIENT_MEM_SANITIZER
|
99
|
+
#define LENIENT_MEM_SANITIZER
|
100
|
+
#endif
|
101
|
+
|
102
|
+
#endif // SIMDJSON_COMMON_DEFS_H
|
@@ -0,0 +1,152 @@
|
|
1
|
+
/* From
|
2
|
+
https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h
|
3
|
+
Highly modified.
|
4
|
+
|
5
|
+
Copyright (c) 2016- Facebook, Inc (Adam Paszke)
|
6
|
+
Copyright (c) 2014- Facebook, Inc (Soumith Chintala)
|
7
|
+
Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
|
8
|
+
Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
|
9
|
+
Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
|
10
|
+
Copyright (c) 2011-2013 NYU (Clement Farabet)
|
11
|
+
Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou,
|
12
|
+
Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute
|
13
|
+
(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert,
|
14
|
+
Samy Bengio, Johnny Mariethoz)
|
15
|
+
|
16
|
+
All rights reserved.
|
17
|
+
|
18
|
+
Redistribution and use in source and binary forms, with or without
|
19
|
+
modification, are permitted provided that the following conditions are met:
|
20
|
+
|
21
|
+
1. Redistributions of source code must retain the above copyright
|
22
|
+
notice, this list of conditions and the following disclaimer.
|
23
|
+
|
24
|
+
2. Redistributions in binary form must reproduce the above copyright
|
25
|
+
notice, this list of conditions and the following disclaimer in the
|
26
|
+
documentation and/or other materials provided with the distribution.
|
27
|
+
|
28
|
+
3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories
|
29
|
+
America and IDIAP Research Institute nor the names of its contributors may be
|
30
|
+
used to endorse or promote products derived from this software without
|
31
|
+
specific prior written permission.
|
32
|
+
|
33
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
34
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
35
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
36
|
+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
37
|
+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
38
|
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
39
|
+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
40
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
41
|
+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
42
|
+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
43
|
+
POSSIBILITY OF SUCH DAMAGE.
|
44
|
+
*/
|
45
|
+
|
46
|
+
#ifndef SIMDJSON_ISADETECTION_H
|
47
|
+
#define SIMDJSON_ISADETECTION_H
|
48
|
+
|
49
|
+
#include <stdint.h>
|
50
|
+
#include <stdlib.h>
|
51
|
+
#if defined(_MSC_VER)
|
52
|
+
#include <intrin.h>
|
53
|
+
#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
|
54
|
+
#include <cpuid.h>
|
55
|
+
#endif
|
56
|
+
|
57
|
+
namespace simdjson {
|
58
|
+
// Can be found on Intel ISA Reference for CPUID
|
59
|
+
constexpr uint32_t cpuid_avx2_bit = 1 << 5; // Bit 5 of EBX for EAX=0x7
|
60
|
+
constexpr uint32_t cpuid_bmi1_bit = 1 << 3; // bit 3 of EBX for EAX=0x7
|
61
|
+
constexpr uint32_t cpuid_bmi2_bit = 1 << 8; // bit 8 of EBX for EAX=0x7
|
62
|
+
constexpr uint32_t cpuid_sse42_bit = 1 << 20; // bit 20 of ECX for EAX=0x1
|
63
|
+
constexpr uint32_t cpuid_pclmulqdq_bit = 1 << 1; // bit 1 of ECX for EAX=0x1
|
64
|
+
|
65
|
+
enum instruction_set {
|
66
|
+
DEFAULT = 0x0,
|
67
|
+
NEON = 0x1,
|
68
|
+
AVX2 = 0x4,
|
69
|
+
SSE42 = 0x8,
|
70
|
+
PCLMULQDQ = 0x10,
|
71
|
+
BMI1 = 0x20,
|
72
|
+
BMI2 = 0x40
|
73
|
+
};
|
74
|
+
|
75
|
+
#if defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64
|
76
|
+
|
77
|
+
#if defined(__NEON__)
|
78
|
+
|
79
|
+
static inline uint32_t detect_supported_architectures() {
|
80
|
+
return instruction_set::NEON;
|
81
|
+
}
|
82
|
+
|
83
|
+
#else // ARM without NEON
|
84
|
+
|
85
|
+
static inline uint32_t detect_supported_architectures() {
|
86
|
+
return instruction_set::DEFAULT;
|
87
|
+
}
|
88
|
+
|
89
|
+
#endif
|
90
|
+
|
91
|
+
#else // x86
|
92
|
+
static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
|
93
|
+
uint32_t *edx) {
|
94
|
+
#if defined(_MSC_VER)
|
95
|
+
int cpu_info[4];
|
96
|
+
__cpuid(cpu_info, *eax);
|
97
|
+
*eax = cpu_info[0];
|
98
|
+
*ebx = cpu_info[1];
|
99
|
+
*ecx = cpu_info[2];
|
100
|
+
*edx = cpu_info[3];
|
101
|
+
#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
|
102
|
+
uint32_t level = *eax;
|
103
|
+
__get_cpuid(level, eax, ebx, ecx, edx);
|
104
|
+
#else
|
105
|
+
uint32_t a = *eax, b, c = *ecx, d;
|
106
|
+
asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d));
|
107
|
+
*eax = a;
|
108
|
+
*ebx = b;
|
109
|
+
*ecx = c;
|
110
|
+
*edx = d;
|
111
|
+
#endif
|
112
|
+
}
|
113
|
+
|
114
|
+
static inline uint32_t detect_supported_architectures() {
|
115
|
+
uint32_t eax, ebx, ecx, edx;
|
116
|
+
uint32_t host_isa = 0x0;
|
117
|
+
|
118
|
+
// ECX for EAX=0x7
|
119
|
+
eax = 0x7;
|
120
|
+
ecx = 0x0;
|
121
|
+
cpuid(&eax, &ebx, &ecx, &edx);
|
122
|
+
|
123
|
+
if (ebx & cpuid_avx2_bit) {
|
124
|
+
host_isa |= instruction_set::AVX2;
|
125
|
+
}
|
126
|
+
|
127
|
+
if (ebx & cpuid_bmi1_bit) {
|
128
|
+
host_isa |= instruction_set::BMI1;
|
129
|
+
}
|
130
|
+
|
131
|
+
if (ebx & cpuid_bmi2_bit) {
|
132
|
+
host_isa |= instruction_set::BMI2;
|
133
|
+
}
|
134
|
+
|
135
|
+
// EBX for EAX=0x1
|
136
|
+
eax = 0x1;
|
137
|
+
cpuid(&eax, &ebx, &ecx, &edx);
|
138
|
+
|
139
|
+
if (ecx & cpuid_sse42_bit) {
|
140
|
+
host_isa |= instruction_set::SSE42;
|
141
|
+
}
|
142
|
+
|
143
|
+
if (ecx & cpuid_pclmulqdq_bit) {
|
144
|
+
host_isa |= instruction_set::PCLMULQDQ;
|
145
|
+
}
|
146
|
+
|
147
|
+
return host_isa;
|
148
|
+
}
|
149
|
+
|
150
|
+
#endif // end SIMD extension detection code
|
151
|
+
} // namespace simdjson
|
152
|
+
#endif
|
@@ -0,0 +1,301 @@
|
|
1
|
+
#ifndef SIMDJSON_JSONCHARUTILS_H
|
2
|
+
#define SIMDJSON_JSONCHARUTILS_H
|
3
|
+
|
4
|
+
#include "simdjson/common_defs.h"
|
5
|
+
#include "simdjson/parsedjson.h"
|
6
|
+
|
7
|
+
namespace simdjson {
|
8
|
+
// structural chars here are
|
9
|
+
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL)
|
10
|
+
// we are also interested in the four whitespace characters
|
11
|
+
// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
|
12
|
+
|
13
|
+
// these are the chars that can follow a true/false/null or number atom
|
14
|
+
// and nothing else
|
15
|
+
const uint32_t structural_or_whitespace_or_null_negated[256] = {
|
16
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
17
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
18
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
|
19
|
+
|
20
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
21
|
+
1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
22
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
|
23
|
+
|
24
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
25
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
26
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
27
|
+
|
28
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
29
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
30
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
31
|
+
|
32
|
+
// return non-zero if not a structural or whitespace char
|
33
|
+
// zero otherwise
|
34
|
+
really_inline uint32_t is_not_structural_or_whitespace_or_null(uint8_t c) {
|
35
|
+
return structural_or_whitespace_or_null_negated[c];
|
36
|
+
}
|
37
|
+
|
38
|
+
const uint32_t structural_or_whitespace_negated[256] = {
|
39
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
40
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
41
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
|
42
|
+
|
43
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
44
|
+
1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
45
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
|
46
|
+
|
47
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
48
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
49
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
50
|
+
|
51
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
52
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
53
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
54
|
+
|
55
|
+
// return non-zero if not a structural or whitespace char
|
56
|
+
// zero otherwise
|
57
|
+
really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
|
58
|
+
return structural_or_whitespace_negated[c];
|
59
|
+
}
|
60
|
+
|
61
|
+
const uint32_t structural_or_whitespace_or_null[256] = {
|
62
|
+
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
63
|
+
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
64
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
65
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
|
66
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
67
|
+
0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
68
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
69
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
70
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
71
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
72
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
73
|
+
|
74
|
+
really_inline uint32_t is_structural_or_whitespace_or_null(uint8_t c) {
|
75
|
+
return structural_or_whitespace_or_null[c];
|
76
|
+
}
|
77
|
+
|
78
|
+
const uint32_t structural_or_whitespace[256] = {
|
79
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
80
|
+
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
81
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
82
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
|
83
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
84
|
+
0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
85
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
86
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
87
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
88
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
89
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
90
|
+
|
91
|
+
really_inline uint32_t is_structural_or_whitespace(uint8_t c) {
|
92
|
+
return structural_or_whitespace[c];
|
93
|
+
}
|
94
|
+
|
95
|
+
const uint32_t digit_to_val32[886] = {
|
96
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
97
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
98
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
99
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
100
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
101
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
102
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
103
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
104
|
+
0x0, 0x1, 0x2, 0x3, 0x4, 0x5,
|
105
|
+
0x6, 0x7, 0x8, 0x9, 0xFFFFFFFF, 0xFFFFFFFF,
|
106
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa,
|
107
|
+
0xb, 0xc, 0xd, 0xe, 0xf, 0xFFFFFFFF,
|
108
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
109
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
110
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
111
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
112
|
+
0xFFFFFFFF, 0xa, 0xb, 0xc, 0xd, 0xe,
|
113
|
+
0xf, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
114
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
115
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
116
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
117
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
118
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
119
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
120
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
121
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
122
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
123
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
124
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
125
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
126
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
127
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
128
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
129
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
130
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
131
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
132
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
133
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
134
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
135
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
136
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
137
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
138
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
139
|
+
0x0, 0x10, 0x20, 0x30, 0x40, 0x50,
|
140
|
+
0x60, 0x70, 0x80, 0x90, 0xFFFFFFFF, 0xFFFFFFFF,
|
141
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa0,
|
142
|
+
0xb0, 0xc0, 0xd0, 0xe0, 0xf0, 0xFFFFFFFF,
|
143
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
144
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
145
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
146
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
147
|
+
0xFFFFFFFF, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0,
|
148
|
+
0xf0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
149
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
150
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
151
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
152
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
153
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
154
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
155
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
156
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
157
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
158
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
159
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
160
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
161
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
162
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
163
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
164
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
165
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
166
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
167
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
168
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
169
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
170
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
171
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
172
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
173
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
174
|
+
0x0, 0x100, 0x200, 0x300, 0x400, 0x500,
|
175
|
+
0x600, 0x700, 0x800, 0x900, 0xFFFFFFFF, 0xFFFFFFFF,
|
176
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa00,
|
177
|
+
0xb00, 0xc00, 0xd00, 0xe00, 0xf00, 0xFFFFFFFF,
|
178
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
179
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
180
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
181
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
182
|
+
0xFFFFFFFF, 0xa00, 0xb00, 0xc00, 0xd00, 0xe00,
|
183
|
+
0xf00, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
184
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
185
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
186
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
187
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
188
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
189
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
190
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
191
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
192
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
193
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
194
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
195
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
196
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
197
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
198
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
199
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
200
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
201
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
202
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
203
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
204
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
205
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
206
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
207
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
208
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
209
|
+
0x0, 0x1000, 0x2000, 0x3000, 0x4000, 0x5000,
|
210
|
+
0x6000, 0x7000, 0x8000, 0x9000, 0xFFFFFFFF, 0xFFFFFFFF,
|
211
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa000,
|
212
|
+
0xb000, 0xc000, 0xd000, 0xe000, 0xf000, 0xFFFFFFFF,
|
213
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
214
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
215
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
216
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
217
|
+
0xFFFFFFFF, 0xa000, 0xb000, 0xc000, 0xd000, 0xe000,
|
218
|
+
0xf000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
219
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
220
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
221
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
222
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
223
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
224
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
225
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
226
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
227
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
228
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
229
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
230
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
231
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
232
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
233
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
234
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
235
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
236
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
237
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
238
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
239
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
240
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
241
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
242
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
243
|
+
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF};
|
244
|
+
// returns a value with the high 16 bits set if not valid
|
245
|
+
// otherwise returns the conversion of the 4 hex digits at src into the bottom
|
246
|
+
// 16 bits of the 32-bit return register
|
247
|
+
//
|
248
|
+
// see
|
249
|
+
// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
|
250
|
+
static inline uint32_t hex_to_u32_nocheck(
|
251
|
+
const uint8_t *src) { // strictly speaking, static inline is a C-ism
|
252
|
+
uint32_t v1 = digit_to_val32[630 + src[0]];
|
253
|
+
uint32_t v2 = digit_to_val32[420 + src[1]];
|
254
|
+
uint32_t v3 = digit_to_val32[210 + src[2]];
|
255
|
+
uint32_t v4 = digit_to_val32[0 + src[3]];
|
256
|
+
return v1 | v2 | v3 | v4;
|
257
|
+
}
|
258
|
+
|
259
|
+
// given a code point cp, writes to c
|
260
|
+
// the utf-8 code, outputting the length in
|
261
|
+
// bytes, if the length is zero, the code point
|
262
|
+
// is invalid
|
263
|
+
//
|
264
|
+
// This can possibly be made faster using pdep
|
265
|
+
// and clz and table lookups, but JSON documents
|
266
|
+
// have few escaped code points, and the following
|
267
|
+
// function looks cheap.
|
268
|
+
//
|
269
|
+
// Note: we assume that surrogates are treated separately
|
270
|
+
//
|
271
|
+
inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
|
272
|
+
if (cp <= 0x7F) {
|
273
|
+
c[0] = cp;
|
274
|
+
return 1; // ascii
|
275
|
+
}
|
276
|
+
if (cp <= 0x7FF) {
|
277
|
+
c[0] = (cp >> 6) + 192;
|
278
|
+
c[1] = (cp & 63) + 128;
|
279
|
+
return 2; // universal plane
|
280
|
+
// Surrogates are treated elsewhere...
|
281
|
+
//} //else if (0xd800 <= cp && cp <= 0xdfff) {
|
282
|
+
// return 0; // surrogates // could put assert here
|
283
|
+
} else if (cp <= 0xFFFF) {
|
284
|
+
c[0] = (cp >> 12) + 224;
|
285
|
+
c[1] = ((cp >> 6) & 63) + 128;
|
286
|
+
c[2] = (cp & 63) + 128;
|
287
|
+
return 3;
|
288
|
+
} else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this
|
289
|
+
// is not needed
|
290
|
+
c[0] = (cp >> 18) + 240;
|
291
|
+
c[1] = ((cp >> 12) & 63) + 128;
|
292
|
+
c[2] = ((cp >> 6) & 63) + 128;
|
293
|
+
c[3] = (cp & 63) + 128;
|
294
|
+
return 4;
|
295
|
+
}
|
296
|
+
// will return 0 when the code point was too large.
|
297
|
+
return 0; // bad r
|
298
|
+
}
|
299
|
+
} // namespace simdjson
|
300
|
+
|
301
|
+
#endif
|