simdjson 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.clang-format +5 -0
- data/.gitignore +14 -0
- data/.gitmodules +3 -0
- data/.rubocop.yml +9 -0
- data/.travis.yml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +32 -0
- data/benchmark/apache_builds.json +4421 -0
- data/benchmark/demo.json +15 -0
- data/benchmark/github_events.json +1390 -0
- data/benchmark/run_benchmark.rb +30 -0
- data/ext/simdjson/extconf.rb +22 -0
- data/ext/simdjson/simdjson.cpp +76 -0
- data/ext/simdjson/simdjson.hpp +6 -0
- data/lib/simdjson/version.rb +3 -0
- data/lib/simdjson.rb +2 -0
- data/simdjson.gemspec +35 -0
- data/vendor/.gitkeep +0 -0
- data/vendor/simdjson/AUTHORS +3 -0
- data/vendor/simdjson/CMakeLists.txt +63 -0
- data/vendor/simdjson/CONTRIBUTORS +27 -0
- data/vendor/simdjson/Dockerfile +10 -0
- data/vendor/simdjson/LICENSE +201 -0
- data/vendor/simdjson/Makefile +203 -0
- data/vendor/simdjson/Notes.md +85 -0
- data/vendor/simdjson/README.md +581 -0
- data/vendor/simdjson/amalgamation.sh +158 -0
- data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
- data/vendor/simdjson/benchmark/benchmark.h +223 -0
- data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
- data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
- data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
- data/vendor/simdjson/benchmark/parse.cpp +393 -0
- data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
- data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
- data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
- data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
- data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/gbps.png +0 -0
- data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
- data/vendor/simdjson/images/halvarflake.png +0 -0
- data/vendor/simdjson/images/logo.png +0 -0
- data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
- data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
- data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
- data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
- data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
- data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
- data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
- data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
- data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
- data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
- data/vendor/simdjson/include/simdjson/portability.h +172 -0
- data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
- data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
- data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
- data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
- data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
- data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
- data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
- data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
- data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
- data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
- data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
- data/vendor/simdjson/scripts/issue150.sh +14 -0
- data/vendor/simdjson/scripts/javascript/README.md +3 -0
- data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
- data/vendor/simdjson/scripts/minifier.sh +11 -0
- data/vendor/simdjson/scripts/parseandstat.sh +24 -0
- data/vendor/simdjson/scripts/parser.sh +11 -0
- data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
- data/vendor/simdjson/scripts/plotparse.sh +98 -0
- data/vendor/simdjson/scripts/selectparser.sh +11 -0
- data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
- data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
- data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
- data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
- data/vendor/simdjson/scripts/testjson2json.sh +99 -0
- data/vendor/simdjson/scripts/transitions/Makefile +10 -0
- data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
- data/vendor/simdjson/singleheader/README.md +1 -0
- data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
- data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
- data/vendor/simdjson/singleheader/simdjson.h +39692 -0
- data/vendor/simdjson/src/CMakeLists.txt +67 -0
- data/vendor/simdjson/src/jsonioutil.cpp +35 -0
- data/vendor/simdjson/src/jsonminifier.cpp +285 -0
- data/vendor/simdjson/src/jsonparser.cpp +91 -0
- data/vendor/simdjson/src/parsedjson.cpp +323 -0
- data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
- data/vendor/simdjson/src/simdjson.cpp +30 -0
- data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
- data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
- data/vendor/simdjson/style/clang-format-check.sh +25 -0
- data/vendor/simdjson/style/clang-format.sh +25 -0
- data/vendor/simdjson/style/run-clang-format.py +326 -0
- data/vendor/simdjson/tape.md +134 -0
- data/vendor/simdjson/tests/CMakeLists.txt +25 -0
- data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
- data/vendor/simdjson/tests/basictests.cpp +75 -0
- data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
- data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
- data/vendor/simdjson/tests/pointercheck.cpp +38 -0
- data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
- data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
- data/vendor/simdjson/tools/CMakeLists.txt +3 -0
- data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
- data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
- data/vendor/simdjson/tools/json2json.cpp +112 -0
- data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
- data/vendor/simdjson/tools/jsonstats.cpp +143 -0
- data/vendor/simdjson/tools/minify.cpp +21 -0
- data/vendor/simdjson/tools/release.py +125 -0
- data/vendor/simdjson/windows/dirent_portable.h +1043 -0
- metadata +273 -0
|
@@ -0,0 +1,587 @@
|
|
|
1
|
+
#ifndef SIMDJSON_NUMBERPARSING_H
|
|
2
|
+
#define SIMDJSON_NUMBERPARSING_H
|
|
3
|
+
|
|
4
|
+
#include "simdjson/common_defs.h"
|
|
5
|
+
#include "simdjson/jsoncharutils.h"
|
|
6
|
+
#include "simdjson/parsedjson.h"
|
|
7
|
+
#include "simdjson/portability.h"
|
|
8
|
+
|
|
9
|
+
#ifdef JSON_TEST_NUMBERS // for unit testing
|
|
10
|
+
void found_invalid_number(const uint8_t *buf);
|
|
11
|
+
void found_integer(int64_t result, const uint8_t *buf);
|
|
12
|
+
void found_float(double result, const uint8_t *buf);
|
|
13
|
+
#endif
|
|
14
|
+
|
|
15
|
+
namespace simdjson {
|
|
16
|
+
// Allowable floating-point values range from
|
|
17
|
+
// std::numeric_limits<double>::lowest() to std::numeric_limits<double>::max(),
|
|
18
|
+
// so from -1.7976e308 all the way to 1.7975e308 in binary64. The lowest
|
|
19
|
+
// non-zero normal values is std::numeric_limits<double>::min() or
|
|
20
|
+
// about 2.225074e-308.
|
|
21
|
+
static const double power_of_ten[] = {
|
|
22
|
+
1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300,
|
|
23
|
+
1e-299, 1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291,
|
|
24
|
+
1e-290, 1e-289, 1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282,
|
|
25
|
+
1e-281, 1e-280, 1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273,
|
|
26
|
+
1e-272, 1e-271, 1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264,
|
|
27
|
+
1e-263, 1e-262, 1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255,
|
|
28
|
+
1e-254, 1e-253, 1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246,
|
|
29
|
+
1e-245, 1e-244, 1e-243, 1e-242, 1e-241, 1e-240, 1e-239, 1e-238, 1e-237,
|
|
30
|
+
1e-236, 1e-235, 1e-234, 1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228,
|
|
31
|
+
1e-227, 1e-226, 1e-225, 1e-224, 1e-223, 1e-222, 1e-221, 1e-220, 1e-219,
|
|
32
|
+
1e-218, 1e-217, 1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211, 1e-210,
|
|
33
|
+
1e-209, 1e-208, 1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201,
|
|
34
|
+
1e-200, 1e-199, 1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192,
|
|
35
|
+
1e-191, 1e-190, 1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183,
|
|
36
|
+
1e-182, 1e-181, 1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174,
|
|
37
|
+
1e-173, 1e-172, 1e-171, 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165,
|
|
38
|
+
1e-164, 1e-163, 1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156,
|
|
39
|
+
1e-155, 1e-154, 1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147,
|
|
40
|
+
1e-146, 1e-145, 1e-144, 1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138,
|
|
41
|
+
1e-137, 1e-136, 1e-135, 1e-134, 1e-133, 1e-132, 1e-131, 1e-130, 1e-129,
|
|
42
|
+
1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120,
|
|
43
|
+
1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111,
|
|
44
|
+
1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102,
|
|
45
|
+
1e-101, 1e-100, 1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93,
|
|
46
|
+
1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84,
|
|
47
|
+
1e-83, 1e-82, 1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 1e-75,
|
|
48
|
+
1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66,
|
|
49
|
+
1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57,
|
|
50
|
+
1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48,
|
|
51
|
+
1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39,
|
|
52
|
+
1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30,
|
|
53
|
+
1e-29, 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21,
|
|
54
|
+
1e-20, 1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12,
|
|
55
|
+
1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3,
|
|
56
|
+
1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6,
|
|
57
|
+
1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15,
|
|
58
|
+
1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24,
|
|
59
|
+
1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31, 1e32, 1e33,
|
|
60
|
+
1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 1e40, 1e41, 1e42,
|
|
61
|
+
1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, 1e50, 1e51,
|
|
62
|
+
1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, 1e60,
|
|
63
|
+
1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69,
|
|
64
|
+
1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78,
|
|
65
|
+
1e79, 1e80, 1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87,
|
|
66
|
+
1e88, 1e89, 1e90, 1e91, 1e92, 1e93, 1e94, 1e95, 1e96,
|
|
67
|
+
1e97, 1e98, 1e99, 1e100, 1e101, 1e102, 1e103, 1e104, 1e105,
|
|
68
|
+
1e106, 1e107, 1e108, 1e109, 1e110, 1e111, 1e112, 1e113, 1e114,
|
|
69
|
+
1e115, 1e116, 1e117, 1e118, 1e119, 1e120, 1e121, 1e122, 1e123,
|
|
70
|
+
1e124, 1e125, 1e126, 1e127, 1e128, 1e129, 1e130, 1e131, 1e132,
|
|
71
|
+
1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, 1e140, 1e141,
|
|
72
|
+
1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, 1e150,
|
|
73
|
+
1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159,
|
|
74
|
+
1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168,
|
|
75
|
+
1e169, 1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177,
|
|
76
|
+
1e178, 1e179, 1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186,
|
|
77
|
+
1e187, 1e188, 1e189, 1e190, 1e191, 1e192, 1e193, 1e194, 1e195,
|
|
78
|
+
1e196, 1e197, 1e198, 1e199, 1e200, 1e201, 1e202, 1e203, 1e204,
|
|
79
|
+
1e205, 1e206, 1e207, 1e208, 1e209, 1e210, 1e211, 1e212, 1e213,
|
|
80
|
+
1e214, 1e215, 1e216, 1e217, 1e218, 1e219, 1e220, 1e221, 1e222,
|
|
81
|
+
1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229, 1e230, 1e231,
|
|
82
|
+
1e232, 1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239, 1e240,
|
|
83
|
+
1e241, 1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249,
|
|
84
|
+
1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258,
|
|
85
|
+
1e259, 1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267,
|
|
86
|
+
1e268, 1e269, 1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276,
|
|
87
|
+
1e277, 1e278, 1e279, 1e280, 1e281, 1e282, 1e283, 1e284, 1e285,
|
|
88
|
+
1e286, 1e287, 1e288, 1e289, 1e290, 1e291, 1e292, 1e293, 1e294,
|
|
89
|
+
1e295, 1e296, 1e297, 1e298, 1e299, 1e300, 1e301, 1e302, 1e303,
|
|
90
|
+
1e304, 1e305, 1e306, 1e307, 1e308};
|
|
91
|
+
|
|
92
|
+
static inline bool is_integer(char c) {
|
|
93
|
+
return (c >= '0' && c <= '9');
|
|
94
|
+
// this gets compiled to (uint8_t)(c - '0') <= 9 on all decent compilers
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// We need to check that the character following a zero is valid. This is
|
|
98
|
+
// probably frequent and it is hard than it looks. We are building all of this
|
|
99
|
+
// just to differentiate between 0x1 (invalid), 0,1 (valid) 0e1 (valid)...
|
|
100
|
+
const bool structural_or_whitespace_or_exponent_or_decimal_negated[256] = {
|
|
101
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
102
|
+
1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
|
103
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
|
|
104
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
|
|
105
|
+
1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
106
|
+
1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
107
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
108
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
109
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
110
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
111
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
|
112
|
+
|
|
113
|
+
really_inline bool
|
|
114
|
+
is_not_structural_or_whitespace_or_exponent_or_decimal(unsigned char c) {
|
|
115
|
+
return structural_or_whitespace_or_exponent_or_decimal_negated[c];
|
|
116
|
+
}
|
|
117
|
+
} // namespace simdjson
|
|
118
|
+
#ifndef SIMDJSON_DISABLE_SWAR_NUMBER_PARSING
|
|
119
|
+
#define SWAR_NUMBER_PARSING
|
|
120
|
+
#endif
|
|
121
|
+
|
|
122
|
+
#ifdef SWAR_NUMBER_PARSING
|
|
123
|
+
|
|
124
|
+
namespace simdjson {
|
|
125
|
+
// check quickly whether the next 8 chars are made of digits
|
|
126
|
+
// at a glance, it looks better than Mula's
|
|
127
|
+
// http://0x80.pl/articles/swar-digits-validate.html
|
|
128
|
+
static inline bool is_made_of_eight_digits_fast(const char *chars) {
|
|
129
|
+
uint64_t val;
|
|
130
|
+
// this can read up to 7 bytes beyond the buffer size, but we require
|
|
131
|
+
// SIMDJSON_PADDING of padding
|
|
132
|
+
static_assert(7 <= SIMDJSON_PADDING);
|
|
133
|
+
memcpy(&val, chars, 8);
|
|
134
|
+
// a branchy method might be faster:
|
|
135
|
+
// return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030)
|
|
136
|
+
// && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) ==
|
|
137
|
+
// 0x3030303030303030);
|
|
138
|
+
return (((val & 0xF0F0F0F0F0F0F0F0) |
|
|
139
|
+
(((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
|
|
140
|
+
0x3333333333333333);
|
|
141
|
+
}
|
|
142
|
+
} // namespace simdjson
|
|
143
|
+
#ifdef IS_X86_64
|
|
144
|
+
TARGET_WESTMERE
|
|
145
|
+
namespace simdjson {
|
|
146
|
+
static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
|
|
147
|
+
// this actually computes *16* values so we are being wasteful.
|
|
148
|
+
const __m128i ascii0 = _mm_set1_epi8('0');
|
|
149
|
+
const __m128i mul_1_10 =
|
|
150
|
+
_mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1);
|
|
151
|
+
const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1);
|
|
152
|
+
const __m128i mul_1_10000 =
|
|
153
|
+
_mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1);
|
|
154
|
+
const __m128i input = _mm_sub_epi8(
|
|
155
|
+
_mm_loadu_si128(reinterpret_cast<const __m128i *>(chars)), ascii0);
|
|
156
|
+
const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10);
|
|
157
|
+
const __m128i t2 = _mm_madd_epi16(t1, mul_1_100);
|
|
158
|
+
const __m128i t3 = _mm_packus_epi32(t2, t2);
|
|
159
|
+
const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000);
|
|
160
|
+
return _mm_cvtsi128_si32(
|
|
161
|
+
t4); // only captures the sum of the first 8 digits, drop the rest
|
|
162
|
+
}
|
|
163
|
+
} // namespace simdjson
|
|
164
|
+
UNTARGET_REGION
|
|
165
|
+
#endif
|
|
166
|
+
|
|
167
|
+
namespace simdjson {
|
|
168
|
+
#ifdef IS_ARM64
|
|
169
|
+
// we don't have SSE, so let us use a scalar function
|
|
170
|
+
// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
|
171
|
+
static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
|
|
172
|
+
uint64_t val;
|
|
173
|
+
memcpy(&val, chars, sizeof(uint64_t));
|
|
174
|
+
val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8;
|
|
175
|
+
val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
|
|
176
|
+
return (val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32;
|
|
177
|
+
}
|
|
178
|
+
#endif
|
|
179
|
+
|
|
180
|
+
#endif
|
|
181
|
+
|
|
182
|
+
//
|
|
183
|
+
// This function computes base * 10 ^ (- negative_exponent ).
|
|
184
|
+
// It is only even going to be used when negative_exponent is tiny.
|
|
185
|
+
static double subnormal_power10(double base, int negative_exponent) {
|
|
186
|
+
// this is probably not going to be fast
|
|
187
|
+
return base * 1e-308 * pow(10, negative_exponent + 308);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// called by parse_number when we know that the output is a float,
|
|
191
|
+
// but where there might be some integer overflow. The trick here is to
|
|
192
|
+
// parse using floats from the start.
|
|
193
|
+
// Do not call this function directly as it skips some of the checks from
|
|
194
|
+
// parse_number
|
|
195
|
+
//
|
|
196
|
+
// This function will almost never be called!!!
|
|
197
|
+
//
|
|
198
|
+
// Note: a redesign could avoid this function entirely.
|
|
199
|
+
//
|
|
200
|
+
static never_inline bool parse_float(const uint8_t *const buf, ParsedJson &pj,
|
|
201
|
+
const uint32_t offset, bool found_minus) {
|
|
202
|
+
const char *p = reinterpret_cast<const char *>(buf + offset);
|
|
203
|
+
bool negative = false;
|
|
204
|
+
if (found_minus) {
|
|
205
|
+
++p;
|
|
206
|
+
negative = true;
|
|
207
|
+
}
|
|
208
|
+
long double i;
|
|
209
|
+
if (*p == '0') { // 0 cannot be followed by an integer
|
|
210
|
+
++p;
|
|
211
|
+
i = 0;
|
|
212
|
+
} else {
|
|
213
|
+
unsigned char digit = *p - '0';
|
|
214
|
+
i = digit;
|
|
215
|
+
p++;
|
|
216
|
+
while (is_integer(*p)) {
|
|
217
|
+
digit = *p - '0';
|
|
218
|
+
i = 10 * i + digit;
|
|
219
|
+
++p;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
if ('.' == *p) {
|
|
223
|
+
++p;
|
|
224
|
+
int fractional_weight = 308;
|
|
225
|
+
if (is_integer(*p)) {
|
|
226
|
+
unsigned char digit = *p - '0';
|
|
227
|
+
++p;
|
|
228
|
+
|
|
229
|
+
fractional_weight--;
|
|
230
|
+
i = i + digit * (fractional_weight >= 0 ? power_of_ten[fractional_weight]
|
|
231
|
+
: 0);
|
|
232
|
+
} else {
|
|
233
|
+
#ifdef JSON_TEST_NUMBERS // for unit testing
|
|
234
|
+
found_invalid_number(buf + offset);
|
|
235
|
+
#endif
|
|
236
|
+
return false;
|
|
237
|
+
}
|
|
238
|
+
while (is_integer(*p)) {
|
|
239
|
+
unsigned char digit = *p - '0';
|
|
240
|
+
++p;
|
|
241
|
+
fractional_weight--;
|
|
242
|
+
i = i + digit * (fractional_weight >= 0 ? power_of_ten[fractional_weight]
|
|
243
|
+
: 0);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
if (('e' == *p) || ('E' == *p)) {
|
|
247
|
+
++p;
|
|
248
|
+
bool neg_exp = false;
|
|
249
|
+
if ('-' == *p) {
|
|
250
|
+
neg_exp = true;
|
|
251
|
+
++p;
|
|
252
|
+
} else if ('+' == *p) {
|
|
253
|
+
++p;
|
|
254
|
+
}
|
|
255
|
+
if (!is_integer(*p)) {
|
|
256
|
+
#ifdef JSON_TEST_NUMBERS // for unit testing
|
|
257
|
+
found_invalid_number(buf + offset);
|
|
258
|
+
#endif
|
|
259
|
+
return false;
|
|
260
|
+
}
|
|
261
|
+
unsigned char digit = *p - '0';
|
|
262
|
+
int64_t exp_number = digit; // exponential part
|
|
263
|
+
p++;
|
|
264
|
+
if (is_integer(*p)) {
|
|
265
|
+
digit = *p - '0';
|
|
266
|
+
exp_number = 10 * exp_number + digit;
|
|
267
|
+
++p;
|
|
268
|
+
}
|
|
269
|
+
if (is_integer(*p)) {
|
|
270
|
+
digit = *p - '0';
|
|
271
|
+
exp_number = 10 * exp_number + digit;
|
|
272
|
+
++p;
|
|
273
|
+
}
|
|
274
|
+
if (is_integer(*p)) {
|
|
275
|
+
digit = *p - '0';
|
|
276
|
+
exp_number = 10 * exp_number + digit;
|
|
277
|
+
++p;
|
|
278
|
+
}
|
|
279
|
+
while (is_integer(*p)) {
|
|
280
|
+
if (exp_number > 0x100000000) { // we need to check for overflows
|
|
281
|
+
// we refuse to parse this
|
|
282
|
+
#ifdef JSON_TEST_NUMBERS // for unit testing
|
|
283
|
+
found_invalid_number(buf + offset);
|
|
284
|
+
#endif
|
|
285
|
+
return false;
|
|
286
|
+
}
|
|
287
|
+
digit = *p - '0';
|
|
288
|
+
exp_number = 10 * exp_number + digit;
|
|
289
|
+
++p;
|
|
290
|
+
}
|
|
291
|
+
if (unlikely(exp_number > 308)) {
|
|
292
|
+
// this path is unlikely
|
|
293
|
+
if (neg_exp) {
|
|
294
|
+
// We either have zero or a subnormal.
|
|
295
|
+
// We expect this to be uncommon so we go through a slow path.
|
|
296
|
+
i = subnormal_power10(i, -exp_number);
|
|
297
|
+
} else {
|
|
298
|
+
// We know for sure that we have a number that is too large,
|
|
299
|
+
// we refuse to parse this
|
|
300
|
+
#ifdef JSON_TEST_NUMBERS // for unit testing
|
|
301
|
+
found_invalid_number(buf + offset);
|
|
302
|
+
#endif
|
|
303
|
+
return false;
|
|
304
|
+
}
|
|
305
|
+
} else {
|
|
306
|
+
int exponent = (neg_exp ? -exp_number : exp_number);
|
|
307
|
+
// we have that exp_number is [0,308] so that
|
|
308
|
+
// exponent is [-308,308] so that
|
|
309
|
+
// 308 + exponent is in [0, 2 * 308]
|
|
310
|
+
i *= power_of_ten[308 + exponent];
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
if (is_not_structural_or_whitespace(*p)) {
|
|
314
|
+
return false;
|
|
315
|
+
}
|
|
316
|
+
double d = negative ? -i : i;
|
|
317
|
+
pj.write_tape_double(d);
|
|
318
|
+
#ifdef JSON_TEST_NUMBERS // for unit testing
|
|
319
|
+
found_float(d, buf + offset);
|
|
320
|
+
#endif
|
|
321
|
+
return is_structural_or_whitespace(*p);
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
// called by parse_number when we know that the output is an integer,
|
|
325
|
+
// but where there might be some integer overflow.
|
|
326
|
+
// we want to catch overflows!
|
|
327
|
+
// Do not call this function directly as it skips some of the checks from
|
|
328
|
+
// parse_number
|
|
329
|
+
//
|
|
330
|
+
// This function will almost never be called!!!
|
|
331
|
+
//
|
|
332
|
+
static never_inline bool parse_large_integer(const uint8_t *const buf,
|
|
333
|
+
ParsedJson &pj,
|
|
334
|
+
const uint32_t offset,
|
|
335
|
+
bool found_minus) {
|
|
336
|
+
const char *p = reinterpret_cast<const char *>(buf + offset);
|
|
337
|
+
|
|
338
|
+
bool negative = false;
|
|
339
|
+
if (found_minus) {
|
|
340
|
+
++p;
|
|
341
|
+
negative = true;
|
|
342
|
+
}
|
|
343
|
+
uint64_t i;
|
|
344
|
+
if (*p == '0') { // 0 cannot be followed by an integer
|
|
345
|
+
++p;
|
|
346
|
+
i = 0;
|
|
347
|
+
} else {
|
|
348
|
+
unsigned char digit = *p - '0';
|
|
349
|
+
i = digit;
|
|
350
|
+
p++;
|
|
351
|
+
// the is_made_of_eight_digits_fast routine is unlikely to help here because
|
|
352
|
+
// we rarely see large integer parts like 123456789
|
|
353
|
+
while (is_integer(*p)) {
|
|
354
|
+
digit = *p - '0';
|
|
355
|
+
if (mul_overflow(i, 10, &i)) {
|
|
356
|
+
#ifdef JSON_TEST_NUMBERS // for unit testing
|
|
357
|
+
found_invalid_number(buf + offset);
|
|
358
|
+
#endif
|
|
359
|
+
return false; // overflow
|
|
360
|
+
}
|
|
361
|
+
if (add_overflow(i, digit, &i)) {
|
|
362
|
+
#ifdef JSON_TEST_NUMBERS // for unit testing
|
|
363
|
+
found_invalid_number(buf + offset);
|
|
364
|
+
#endif
|
|
365
|
+
return false; // overflow
|
|
366
|
+
}
|
|
367
|
+
++p;
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
if (negative) {
|
|
371
|
+
if (i > 0x8000000000000000) {
|
|
372
|
+
// overflows!
|
|
373
|
+
#ifdef JSON_TEST_NUMBERS // for unit testing
|
|
374
|
+
found_invalid_number(buf + offset);
|
|
375
|
+
#endif
|
|
376
|
+
return false; // overflow
|
|
377
|
+
}
|
|
378
|
+
} else {
|
|
379
|
+
if (i >= 0x8000000000000000) {
|
|
380
|
+
// overflows!
|
|
381
|
+
#ifdef JSON_TEST_NUMBERS // for unit testing
|
|
382
|
+
found_invalid_number(buf + offset);
|
|
383
|
+
#endif
|
|
384
|
+
return false; // overflow
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
int64_t signed_answer =
|
|
388
|
+
negative ? -static_cast<int64_t>(i) : static_cast<int64_t>(i);
|
|
389
|
+
pj.write_tape_s64(signed_answer);
|
|
390
|
+
#ifdef JSON_TEST_NUMBERS // for unit testing
|
|
391
|
+
found_integer(signed_answer, buf + offset);
|
|
392
|
+
#endif
|
|
393
|
+
return is_structural_or_whitespace(*p);
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
// parse the number at buf + offset
|
|
397
|
+
// define JSON_TEST_NUMBERS for unit testing
|
|
398
|
+
//
|
|
399
|
+
// It is assumed that the number is followed by a structural ({,},],[) character
|
|
400
|
+
// or a white space character. If that is not the case (e.g., when the JSON
|
|
401
|
+
// document is made of a single number), then it is necessary to copy the
|
|
402
|
+
// content and append a space before calling this function.
|
|
403
|
+
//
|
|
404
|
+
// Our objective is accurate parsing (ULP of 0 or 1) at high speed.
|
|
405
|
+
static really_inline bool parse_number(const uint8_t *const buf, ParsedJson &pj,
|
|
406
|
+
const uint32_t offset,
|
|
407
|
+
bool found_minus) {
|
|
408
|
+
#ifdef SIMDJSON_SKIPNUMBERPARSING // for performance analysis, it is sometimes
|
|
409
|
+
// useful to skip parsing
|
|
410
|
+
pj.write_tape_s64(0); // always write zero
|
|
411
|
+
return true; // always succeeds
|
|
412
|
+
#else
|
|
413
|
+
const char *p = reinterpret_cast<const char *>(buf + offset);
|
|
414
|
+
bool negative = false;
|
|
415
|
+
if (found_minus) {
|
|
416
|
+
++p;
|
|
417
|
+
negative = true;
|
|
418
|
+
if (!is_integer(*p)) { // a negative sign must be followed by an integer
|
|
419
|
+
#ifdef JSON_TEST_NUMBERS // for unit testing
|
|
420
|
+
found_invalid_number(buf + offset);
|
|
421
|
+
#endif
|
|
422
|
+
return false;
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
const char *const start_digits = p;
|
|
426
|
+
|
|
427
|
+
uint64_t i; // an unsigned int avoids signed overflows (which are bad)
|
|
428
|
+
if (*p == '0') { // 0 cannot be followed by an integer
|
|
429
|
+
++p;
|
|
430
|
+
if (is_not_structural_or_whitespace_or_exponent_or_decimal(*p)) {
|
|
431
|
+
#ifdef JSON_TEST_NUMBERS // for unit testing
|
|
432
|
+
found_invalid_number(buf + offset);
|
|
433
|
+
#endif
|
|
434
|
+
return false;
|
|
435
|
+
}
|
|
436
|
+
i = 0;
|
|
437
|
+
} else {
|
|
438
|
+
if (!(is_integer(*p))) { // must start with an integer
|
|
439
|
+
#ifdef JSON_TEST_NUMBERS // for unit testing
|
|
440
|
+
found_invalid_number(buf + offset);
|
|
441
|
+
#endif
|
|
442
|
+
return false;
|
|
443
|
+
}
|
|
444
|
+
unsigned char digit = *p - '0';
|
|
445
|
+
i = digit;
|
|
446
|
+
p++;
|
|
447
|
+
// the is_made_of_eight_digits_fast routine is unlikely to help here because
|
|
448
|
+
// we rarely see large integer parts like 123456789
|
|
449
|
+
while (is_integer(*p)) {
|
|
450
|
+
digit = *p - '0';
|
|
451
|
+
// a multiplication by 10 is cheaper than an arbitrary integer
|
|
452
|
+
// multiplication
|
|
453
|
+
i = 10 * i + digit; // might overflow, we will handle the overflow later
|
|
454
|
+
++p;
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
int64_t exponent = 0;
|
|
458
|
+
bool is_float = false;
|
|
459
|
+
if ('.' == *p) {
|
|
460
|
+
is_float = true; // At this point we know that we have a float
|
|
461
|
+
// we continue with the fiction that we have an integer. If the
|
|
462
|
+
// floating point number is representable as x * 10^z for some integer
|
|
463
|
+
// z that fits in 53 bits, then we will be able to convert back the
|
|
464
|
+
// the integer into a float in a lossless manner.
|
|
465
|
+
++p;
|
|
466
|
+
const char *const first_after_period = p;
|
|
467
|
+
if (is_integer(*p)) {
|
|
468
|
+
unsigned char digit = *p - '0';
|
|
469
|
+
++p;
|
|
470
|
+
i = i * 10 + digit; // might overflow + multiplication by 10 is likely
|
|
471
|
+
// cheaper than arbitrary mult.
|
|
472
|
+
// we will handle the overflow later
|
|
473
|
+
} else {
|
|
474
|
+
#ifdef JSON_TEST_NUMBERS // for unit testing
|
|
475
|
+
found_invalid_number(buf + offset);
|
|
476
|
+
#endif
|
|
477
|
+
return false;
|
|
478
|
+
}
|
|
479
|
+
#ifdef SWAR_NUMBER_PARSING
|
|
480
|
+
// this helps if we have lots of decimals!
|
|
481
|
+
// this turns out to be frequent enough.
|
|
482
|
+
if (is_made_of_eight_digits_fast(p)) {
|
|
483
|
+
i = i * 100000000 + parse_eight_digits_unrolled(p);
|
|
484
|
+
p += 8;
|
|
485
|
+
}
|
|
486
|
+
#endif
|
|
487
|
+
while (is_integer(*p)) {
|
|
488
|
+
unsigned char digit = *p - '0';
|
|
489
|
+
++p;
|
|
490
|
+
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
|
|
491
|
+
// because we have parse_highprecision_float later.
|
|
492
|
+
}
|
|
493
|
+
exponent = first_after_period - p;
|
|
494
|
+
}
|
|
495
|
+
int digit_count =
|
|
496
|
+
p - start_digits - 1; // used later to guard against overflows
|
|
497
|
+
int64_t exp_number = 0; // exponential part
|
|
498
|
+
if (('e' == *p) || ('E' == *p)) {
|
|
499
|
+
is_float = true;
|
|
500
|
+
++p;
|
|
501
|
+
bool neg_exp = false;
|
|
502
|
+
if ('-' == *p) {
|
|
503
|
+
neg_exp = true;
|
|
504
|
+
++p;
|
|
505
|
+
} else if ('+' == *p) {
|
|
506
|
+
++p;
|
|
507
|
+
}
|
|
508
|
+
if (!is_integer(*p)) {
|
|
509
|
+
#ifdef JSON_TEST_NUMBERS // for unit testing
|
|
510
|
+
found_invalid_number(buf + offset);
|
|
511
|
+
#endif
|
|
512
|
+
return false;
|
|
513
|
+
}
|
|
514
|
+
unsigned char digit = *p - '0';
|
|
515
|
+
exp_number = digit;
|
|
516
|
+
p++;
|
|
517
|
+
if (is_integer(*p)) {
|
|
518
|
+
digit = *p - '0';
|
|
519
|
+
exp_number = 10 * exp_number + digit;
|
|
520
|
+
++p;
|
|
521
|
+
}
|
|
522
|
+
if (is_integer(*p)) {
|
|
523
|
+
digit = *p - '0';
|
|
524
|
+
exp_number = 10 * exp_number + digit;
|
|
525
|
+
++p;
|
|
526
|
+
}
|
|
527
|
+
while (is_integer(*p)) {
|
|
528
|
+
if (exp_number > 0x100000000) { // we need to check for overflows
|
|
529
|
+
// we refuse to parse this
|
|
530
|
+
#ifdef JSON_TEST_NUMBERS // for unit testing
|
|
531
|
+
found_invalid_number(buf + offset);
|
|
532
|
+
#endif
|
|
533
|
+
return false;
|
|
534
|
+
}
|
|
535
|
+
digit = *p - '0';
|
|
536
|
+
exp_number = 10 * exp_number + digit;
|
|
537
|
+
++p;
|
|
538
|
+
}
|
|
539
|
+
exponent += (neg_exp ? -exp_number : exp_number);
|
|
540
|
+
}
|
|
541
|
+
if (is_float) {
|
|
542
|
+
uint64_t power_index = 308 + exponent;
|
|
543
|
+
if (unlikely((digit_count >= 19))) { // this is uncommon
|
|
544
|
+
// It is possible that the integer had an overflow.
|
|
545
|
+
// We have to handle the case where we have 0.0000somenumber.
|
|
546
|
+
const char *start = start_digits;
|
|
547
|
+
while ((*start == '0') || (*start == '.')) {
|
|
548
|
+
start++;
|
|
549
|
+
}
|
|
550
|
+
// we over-decrement by one when there is a '.'
|
|
551
|
+
digit_count -= (start - start_digits);
|
|
552
|
+
if (digit_count >= 19) {
|
|
553
|
+
// Ok, chances are good that we had an overflow!
|
|
554
|
+
// this is almost never going to get called!!!
|
|
555
|
+
// we start anew, going slowly!!!
|
|
556
|
+
return parse_float(buf, pj, offset, found_minus);
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
if (unlikely((power_index > 2 * 308))) { // this is uncommon!!!
|
|
560
|
+
// this is almost never going to get called!!!
|
|
561
|
+
// we start anew, going slowly!!!
|
|
562
|
+
return parse_float(buf, pj, offset, found_minus);
|
|
563
|
+
}
|
|
564
|
+
double factor = power_of_ten[power_index];
|
|
565
|
+
factor = negative ? -factor : factor;
|
|
566
|
+
double d = i * factor;
|
|
567
|
+
pj.write_tape_double(d);
|
|
568
|
+
#ifdef JSON_TEST_NUMBERS // for unit testing
|
|
569
|
+
found_float(d, buf + offset);
|
|
570
|
+
#endif
|
|
571
|
+
} else {
|
|
572
|
+
if (unlikely(digit_count >= 18)) { // this is uncommon!!!
|
|
573
|
+
// there is a good chance that we had an overflow, so we need
|
|
574
|
+
// need to recover: we parse the whole thing again.
|
|
575
|
+
return parse_large_integer(buf, pj, offset, found_minus);
|
|
576
|
+
}
|
|
577
|
+
i = negative ? 0 - i : i;
|
|
578
|
+
pj.write_tape_s64(i);
|
|
579
|
+
#ifdef JSON_TEST_NUMBERS // for unit testing
|
|
580
|
+
found_integer(i, buf + offset);
|
|
581
|
+
#endif
|
|
582
|
+
}
|
|
583
|
+
return is_structural_or_whitespace(*p);
|
|
584
|
+
#endif // SIMDJSON_SKIPNUMBERPARSING
|
|
585
|
+
}
|
|
586
|
+
} // simdjson
|
|
587
|
+
#endif
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
#ifndef SIMDJSON_PADDING_STRING_H
|
|
2
|
+
#define SIMDJSON_PADDING_STRING_H
|
|
3
|
+
#include "simdjson/portability.h"
|
|
4
|
+
#include <cstring>
|
|
5
|
+
#include <memory>
|
|
6
|
+
|
|
7
|
+
namespace simdjson {
|
|
8
|
+
// low-level function to allocate memory with padding so we can read passed the
|
|
9
|
+
// "length" bytes safely. if you must provide a pointer to some data, create it
|
|
10
|
+
// with this function: length is the max. size in bytes of the string caller is
|
|
11
|
+
// responsible to free the memory (free(...))
|
|
12
|
+
char *allocate_padded_buffer(size_t length);
|
|
13
|
+
|
|
14
|
+
// Simple string with padded allocation.
|
|
15
|
+
// We deliberately forbid copies, users should rely on swap or move
|
|
16
|
+
// constructors.
|
|
17
|
+
class padded_string {
|
|
18
|
+
public:
|
|
19
|
+
explicit padded_string() noexcept : viable_size(0), data_ptr(nullptr) {}
|
|
20
|
+
explicit padded_string(size_t length) noexcept
|
|
21
|
+
: viable_size(length), data_ptr(allocate_padded_buffer(length)) {
|
|
22
|
+
|
|
23
|
+
if (data_ptr != nullptr)
|
|
24
|
+
data_ptr[length] = '\0'; // easier when you need a c_str
|
|
25
|
+
}
|
|
26
|
+
explicit padded_string(char *data, size_t length) noexcept
|
|
27
|
+
: viable_size(length), data_ptr(allocate_padded_buffer(length)) {
|
|
28
|
+
if (data_ptr != nullptr) {
|
|
29
|
+
memcpy(data_ptr, data, length);
|
|
30
|
+
data_ptr[length] = '\0'; // easier when you need a c_str
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
padded_string(std::string s) noexcept
|
|
34
|
+
: viable_size(s.size()), data_ptr(allocate_padded_buffer(s.size())) {
|
|
35
|
+
if (data_ptr != nullptr) {
|
|
36
|
+
memcpy(data_ptr, s.data(), s.size());
|
|
37
|
+
data_ptr[s.size()] = '\0'; // easier when you need a c_str
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
padded_string(padded_string &&o) noexcept
|
|
41
|
+
: viable_size(o.viable_size), data_ptr(o.data_ptr) {
|
|
42
|
+
o.data_ptr = nullptr; // we take ownership
|
|
43
|
+
}
|
|
44
|
+
void swap(padded_string &o) {
|
|
45
|
+
size_t tmp_viable_size = viable_size;
|
|
46
|
+
char *tmp_data_ptr = data_ptr;
|
|
47
|
+
viable_size = o.viable_size;
|
|
48
|
+
data_ptr = o.data_ptr;
|
|
49
|
+
o.data_ptr = tmp_data_ptr;
|
|
50
|
+
o.viable_size = tmp_viable_size;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
~padded_string() { aligned_free_char(data_ptr); }
|
|
54
|
+
|
|
55
|
+
size_t size() const { return viable_size; }
|
|
56
|
+
|
|
57
|
+
size_t length() const { return viable_size; }
|
|
58
|
+
|
|
59
|
+
char *data() const { return data_ptr; }
|
|
60
|
+
|
|
61
|
+
private:
|
|
62
|
+
padded_string &operator=(const padded_string &o) = delete;
|
|
63
|
+
padded_string(const padded_string &o) = delete;
|
|
64
|
+
|
|
65
|
+
size_t viable_size;
|
|
66
|
+
char *data_ptr;
|
|
67
|
+
};
|
|
68
|
+
} // namespace simdjson
|
|
69
|
+
|
|
70
|
+
#endif
|