simdjson 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.clang-format +5 -0
- data/.gitignore +14 -0
- data/.gitmodules +3 -0
- data/.rubocop.yml +9 -0
- data/.travis.yml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +32 -0
- data/benchmark/apache_builds.json +4421 -0
- data/benchmark/demo.json +15 -0
- data/benchmark/github_events.json +1390 -0
- data/benchmark/run_benchmark.rb +30 -0
- data/ext/simdjson/extconf.rb +22 -0
- data/ext/simdjson/simdjson.cpp +76 -0
- data/ext/simdjson/simdjson.hpp +6 -0
- data/lib/simdjson/version.rb +3 -0
- data/lib/simdjson.rb +2 -0
- data/simdjson.gemspec +35 -0
- data/vendor/.gitkeep +0 -0
- data/vendor/simdjson/AUTHORS +3 -0
- data/vendor/simdjson/CMakeLists.txt +63 -0
- data/vendor/simdjson/CONTRIBUTORS +27 -0
- data/vendor/simdjson/Dockerfile +10 -0
- data/vendor/simdjson/LICENSE +201 -0
- data/vendor/simdjson/Makefile +203 -0
- data/vendor/simdjson/Notes.md +85 -0
- data/vendor/simdjson/README.md +581 -0
- data/vendor/simdjson/amalgamation.sh +158 -0
- data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
- data/vendor/simdjson/benchmark/benchmark.h +223 -0
- data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
- data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
- data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
- data/vendor/simdjson/benchmark/parse.cpp +393 -0
- data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
- data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
- data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
- data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
- data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/gbps.png +0 -0
- data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
- data/vendor/simdjson/images/halvarflake.png +0 -0
- data/vendor/simdjson/images/logo.png +0 -0
- data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
- data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
- data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
- data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
- data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
- data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
- data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
- data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
- data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
- data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
- data/vendor/simdjson/include/simdjson/portability.h +172 -0
- data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
- data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
- data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
- data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
- data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
- data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
- data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
- data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
- data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
- data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
- data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
- data/vendor/simdjson/scripts/issue150.sh +14 -0
- data/vendor/simdjson/scripts/javascript/README.md +3 -0
- data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
- data/vendor/simdjson/scripts/minifier.sh +11 -0
- data/vendor/simdjson/scripts/parseandstat.sh +24 -0
- data/vendor/simdjson/scripts/parser.sh +11 -0
- data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
- data/vendor/simdjson/scripts/plotparse.sh +98 -0
- data/vendor/simdjson/scripts/selectparser.sh +11 -0
- data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
- data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
- data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
- data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
- data/vendor/simdjson/scripts/testjson2json.sh +99 -0
- data/vendor/simdjson/scripts/transitions/Makefile +10 -0
- data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
- data/vendor/simdjson/singleheader/README.md +1 -0
- data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
- data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
- data/vendor/simdjson/singleheader/simdjson.h +39692 -0
- data/vendor/simdjson/src/CMakeLists.txt +67 -0
- data/vendor/simdjson/src/jsonioutil.cpp +35 -0
- data/vendor/simdjson/src/jsonminifier.cpp +285 -0
- data/vendor/simdjson/src/jsonparser.cpp +91 -0
- data/vendor/simdjson/src/parsedjson.cpp +323 -0
- data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
- data/vendor/simdjson/src/simdjson.cpp +30 -0
- data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
- data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
- data/vendor/simdjson/style/clang-format-check.sh +25 -0
- data/vendor/simdjson/style/clang-format.sh +25 -0
- data/vendor/simdjson/style/run-clang-format.py +326 -0
- data/vendor/simdjson/tape.md +134 -0
- data/vendor/simdjson/tests/CMakeLists.txt +25 -0
- data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
- data/vendor/simdjson/tests/basictests.cpp +75 -0
- data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
- data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
- data/vendor/simdjson/tests/pointercheck.cpp +38 -0
- data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
- data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
- data/vendor/simdjson/tools/CMakeLists.txt +3 -0
- data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
- data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
- data/vendor/simdjson/tools/json2json.cpp +112 -0
- data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
- data/vendor/simdjson/tools/jsonstats.cpp +143 -0
- data/vendor/simdjson/tools/minify.cpp +21 -0
- data/vendor/simdjson/tools/release.py +125 -0
- data/vendor/simdjson/windows/dirent_portable.h +1043 -0
- metadata +273 -0
@@ -0,0 +1,75 @@
|
|
1
|
+
#include <cassert>
|
2
|
+
#include <cinttypes>
|
3
|
+
#include <cstdio>
|
4
|
+
#include <cstdlib>
|
5
|
+
#include <cstring>
|
6
|
+
#include <iostream>
|
7
|
+
#include <string>
|
8
|
+
#include <vector>
|
9
|
+
|
10
|
+
#include "simdjson/jsonparser.h"
|
11
|
+
|
12
|
+
// returns true if successful
|
13
|
+
bool skyprophet_test() {
|
14
|
+
const size_t n_records = 100000;
|
15
|
+
std::vector<std::string> data;
|
16
|
+
char buf[1024];
|
17
|
+
for (size_t i = 0; i < n_records; ++i) {
|
18
|
+
auto n = sprintf(buf,
|
19
|
+
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
|
20
|
+
"\"school\": {\"id\": %zu, \"name\": \"school%zu\"}}",
|
21
|
+
i, i, (i % 2) ? "male" : "female", i % 10, i % 10);
|
22
|
+
data.emplace_back(std::string(buf, n));
|
23
|
+
}
|
24
|
+
for (size_t i = 0; i < n_records; ++i) {
|
25
|
+
auto n = sprintf(buf, "{\"counter\": %f, \"array\": [%s]}", i * 3.1416,
|
26
|
+
(i % 2) ? "true" : "false");
|
27
|
+
data.emplace_back(std::string(buf, n));
|
28
|
+
}
|
29
|
+
for (size_t i = 0; i < n_records; ++i) {
|
30
|
+
auto n = sprintf(buf, "{\"number\": %e}", i * 10000.31321321);
|
31
|
+
data.emplace_back(std::string(buf, n));
|
32
|
+
}
|
33
|
+
data.emplace_back(std::string("true"));
|
34
|
+
data.emplace_back(std::string("false"));
|
35
|
+
data.emplace_back(std::string("null"));
|
36
|
+
data.emplace_back(std::string("0.1"));
|
37
|
+
size_t maxsize = 0;
|
38
|
+
for (auto &s : data) {
|
39
|
+
if (maxsize < s.size())
|
40
|
+
maxsize = s.size();
|
41
|
+
}
|
42
|
+
simdjson::ParsedJson pj;
|
43
|
+
if (!pj.allocate_capacity(maxsize)) {
|
44
|
+
printf("allocation failure in skyprophet_test\n");
|
45
|
+
return false;
|
46
|
+
}
|
47
|
+
size_t counter = 0;
|
48
|
+
for (auto &rec : data) {
|
49
|
+
if ((counter % 10000) == 0) {
|
50
|
+
printf(".");
|
51
|
+
fflush(NULL);
|
52
|
+
}
|
53
|
+
counter++;
|
54
|
+
auto ok1 = json_parse(rec.c_str(), rec.length(), pj);
|
55
|
+
if (ok1 != 0 || !pj.is_valid()) {
|
56
|
+
printf("Something is wrong in skyprophet_test: %s.\n", rec.c_str());
|
57
|
+
return false;
|
58
|
+
}
|
59
|
+
auto ok2 = json_parse(rec, pj);
|
60
|
+
if (ok2 != 0 || !pj.is_valid()) {
|
61
|
+
printf("Something is wrong in skyprophet_test: %s.\n", rec.c_str());
|
62
|
+
return false;
|
63
|
+
}
|
64
|
+
}
|
65
|
+
printf("\n");
|
66
|
+
return true;
|
67
|
+
}
|
68
|
+
|
69
|
+
int main() {
|
70
|
+
std::cout << "Running basic tests." << std::endl;
|
71
|
+
if (!skyprophet_test())
|
72
|
+
return EXIT_FAILURE;
|
73
|
+
std::cout << "Basic tests are ok." << std::endl;
|
74
|
+
return EXIT_SUCCESS;
|
75
|
+
}
|
@@ -0,0 +1,136 @@
|
|
1
|
+
#include <cassert>
|
2
|
+
#include <cstring>
|
3
|
+
#ifndef _MSC_VER
|
4
|
+
#include <dirent.h>
|
5
|
+
#include <unistd.h>
|
6
|
+
#else
|
7
|
+
// Microsoft can't be bothered to provide standard utils.
|
8
|
+
#include <dirent_portable.h>
|
9
|
+
#endif
|
10
|
+
#include <cinttypes>
|
11
|
+
|
12
|
+
#include <cstdio>
|
13
|
+
#include <cstdlib>
|
14
|
+
|
15
|
+
#include "simdjson/jsonparser.h"
|
16
|
+
|
17
|
+
/**
|
18
|
+
* Does the file filename ends with the given extension.
|
19
|
+
*/
|
20
|
+
static bool has_extension(const char *filename, const char *extension) {
|
21
|
+
const char *ext = strrchr(filename, '.');
|
22
|
+
return ((ext != nullptr) && (strcmp(ext, extension) == 0));
|
23
|
+
}
|
24
|
+
|
25
|
+
bool starts_with(const char *pre, const char *str) {
|
26
|
+
size_t len_pre = strlen(pre), len_str = strlen(str);
|
27
|
+
return len_str < len_pre ? false : strncmp(pre, str, len_pre) == 0;
|
28
|
+
}
|
29
|
+
|
30
|
+
bool contains(const char *pre, const char *str) {
|
31
|
+
return (strstr(str, pre) != nullptr);
|
32
|
+
}
|
33
|
+
|
34
|
+
bool validate(const char *dirname) {
|
35
|
+
bool everything_fine = true;
|
36
|
+
const char *extension = ".json";
|
37
|
+
size_t dirlen = strlen(dirname);
|
38
|
+
struct dirent **entry_list;
|
39
|
+
int c = scandir(dirname, &entry_list, nullptr, alphasort);
|
40
|
+
if (c < 0) {
|
41
|
+
fprintf(stderr, "error accessing %s \n", dirname);
|
42
|
+
return false;
|
43
|
+
}
|
44
|
+
if (c == 0) {
|
45
|
+
printf("nothing in dir %s \n", dirname);
|
46
|
+
return false;
|
47
|
+
}
|
48
|
+
bool *is_file_as_expected = new bool[c];
|
49
|
+
for (int i = 0; i < c; i++) {
|
50
|
+
is_file_as_expected[i] = true;
|
51
|
+
}
|
52
|
+
size_t how_many = 0;
|
53
|
+
bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
|
54
|
+
for (int i = 0; i < c; i++) {
|
55
|
+
const char *name = entry_list[i]->d_name;
|
56
|
+
if (has_extension(name, extension)) {
|
57
|
+
printf("validating: file %s ", name);
|
58
|
+
fflush(nullptr);
|
59
|
+
size_t filelen = strlen(name);
|
60
|
+
char *fullpath = static_cast<char *>(malloc(dirlen + filelen + 1 + 1));
|
61
|
+
strcpy(fullpath, dirname);
|
62
|
+
if (needsep) {
|
63
|
+
fullpath[dirlen] = '/';
|
64
|
+
strcpy(fullpath + dirlen + 1, name);
|
65
|
+
} else {
|
66
|
+
strcpy(fullpath + dirlen, name);
|
67
|
+
}
|
68
|
+
simdjson::padded_string p;
|
69
|
+
try {
|
70
|
+
simdjson::get_corpus(fullpath).swap(p);
|
71
|
+
} catch (const std::exception &) {
|
72
|
+
std::cerr << "Could not load the file " << fullpath << std::endl;
|
73
|
+
return EXIT_FAILURE;
|
74
|
+
}
|
75
|
+
simdjson::ParsedJson pj;
|
76
|
+
bool allocok = pj.allocate_capacity(p.size(), 1024);
|
77
|
+
if (!allocok) {
|
78
|
+
std::cerr << "can't allocate memory" << std::endl;
|
79
|
+
return false;
|
80
|
+
}
|
81
|
+
++how_many;
|
82
|
+
const int parse_res = json_parse(p, pj);
|
83
|
+
printf("%s\n", parse_res == 0 ? "ok" : "invalid");
|
84
|
+
if (contains("EXCLUDE", name)) {
|
85
|
+
// skipping
|
86
|
+
how_many--;
|
87
|
+
} else if (starts_with("pass", name) && parse_res != 0) {
|
88
|
+
is_file_as_expected[i] = false;
|
89
|
+
printf("warning: file %s should pass but it fails. Error is: %s\n",
|
90
|
+
name, simdjson::error_message(parse_res).data());
|
91
|
+
everything_fine = false;
|
92
|
+
} else if (starts_with("fail", name) && parse_res == 0) {
|
93
|
+
is_file_as_expected[i] = false;
|
94
|
+
printf("warning: file %s should fail but it passes.\n", name);
|
95
|
+
everything_fine = false;
|
96
|
+
}
|
97
|
+
free(fullpath);
|
98
|
+
}
|
99
|
+
}
|
100
|
+
printf("%zu files checked.\n", how_many);
|
101
|
+
if (everything_fine) {
|
102
|
+
printf("All ok!\n");
|
103
|
+
} else {
|
104
|
+
fprintf(stderr,
|
105
|
+
"There were problems! Consider reviewing the following files:\n");
|
106
|
+
for (int i = 0; i < c; i++) {
|
107
|
+
if (!is_file_as_expected[i]) {
|
108
|
+
fprintf(stderr, "%s \n", entry_list[i]->d_name);
|
109
|
+
}
|
110
|
+
}
|
111
|
+
}
|
112
|
+
for (int i = 0; i < c; ++i) {
|
113
|
+
free(entry_list[i]);
|
114
|
+
}
|
115
|
+
free(entry_list);
|
116
|
+
delete[] is_file_as_expected;
|
117
|
+
return everything_fine;
|
118
|
+
}
|
119
|
+
|
120
|
+
int main(int argc, char *argv[]) {
|
121
|
+
if (argc != 2) {
|
122
|
+
std::cerr << "Usage: " << argv[0] << " <directorywithjsonfiles>"
|
123
|
+
<< std::endl;
|
124
|
+
#ifndef SIMDJSON_TEST_DATA_DIR
|
125
|
+
std::cout
|
126
|
+
<< "We are going to assume you mean to use the 'jsonchecker' directory."
|
127
|
+
<< std::endl;
|
128
|
+
return validate("jsonchecker/") ? EXIT_SUCCESS : EXIT_FAILURE;
|
129
|
+
#else
|
130
|
+
std::cout << "We are going to assume you mean to use the '"
|
131
|
+
<< SIMDJSON_TEST_DATA_DIR << "' directory." << std::endl;
|
132
|
+
return validate(SIMDJSON_TEST_DATA_DIR) ? EXIT_SUCCESS : EXIT_FAILURE;
|
133
|
+
#endif
|
134
|
+
}
|
135
|
+
return validate(argv[1]) ? EXIT_SUCCESS : EXIT_FAILURE;
|
136
|
+
}
|
@@ -0,0 +1,224 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <cstring>
|
3
|
+
#include <dirent.h>
|
4
|
+
#include <inttypes.h>
|
5
|
+
#include <math.h>
|
6
|
+
#include <stdbool.h>
|
7
|
+
#include <stdio.h>
|
8
|
+
#include <stdlib.h>
|
9
|
+
|
10
|
+
#ifndef JSON_TEST_NUMBERS
|
11
|
+
#define JSON_TEST_NUMBERS
|
12
|
+
#endif
|
13
|
+
|
14
|
+
#include "simdjson/common_defs.h"
|
15
|
+
|
16
|
+
// ulp distance
|
17
|
+
// Marc B. Reynolds, 2016-2019
|
18
|
+
// Public Domain under http://unlicense.org, see link for details.
|
19
|
+
// adapted by D. Lemire
|
20
|
+
inline uint32_t f32_ulp_dist(float a, float b) {
|
21
|
+
uint32_t ua, ub;
|
22
|
+
memcpy(&ua, &a, sizeof(ua));
|
23
|
+
memcpy(&ub, &b, sizeof(ub));
|
24
|
+
if ((int32_t)(ub ^ ua) >= 0)
|
25
|
+
return (int32_t)(ua - ub) >= 0 ? (ua - ub) : (ub - ua);
|
26
|
+
return ua + ub + 0x80000000;
|
27
|
+
}
|
28
|
+
|
29
|
+
// ulp distance
|
30
|
+
// Marc B. Reynolds, 2016-2019
|
31
|
+
// Public Domain under http://unlicense.org, see link for details.
|
32
|
+
// adapted by D. Lemire
|
33
|
+
inline uint64_t f64_ulp_dist(double a, double b) {
|
34
|
+
uint64_t ua, ub;
|
35
|
+
memcpy(&ua, &a, sizeof(ua));
|
36
|
+
memcpy(&ub, &b, sizeof(ub));
|
37
|
+
if ((int64_t)(ub ^ ua) >= 0)
|
38
|
+
return (int64_t)(ua - ub) >= 0 ? (ua - ub) : (ub - ua);
|
39
|
+
return ua + ub + 0x80000000;
|
40
|
+
}
|
41
|
+
|
42
|
+
int parse_error;
|
43
|
+
char *fullpath;
|
44
|
+
enum { PARSE_WARNING, PARSE_ERROR };
|
45
|
+
|
46
|
+
size_t float_count;
|
47
|
+
size_t int_count;
|
48
|
+
size_t invalid_count;
|
49
|
+
|
50
|
+
// strings that start with these should not be parsed as numbers
|
51
|
+
const char *really_bad[] = {"013}", "0x14", "0e]", "0e+]", "0e+-1]"};
|
52
|
+
|
53
|
+
bool starts_with(const char *pre, const char *str) {
|
54
|
+
size_t lenpre = strlen(pre);
|
55
|
+
return strncmp(pre, str, lenpre) == 0;
|
56
|
+
}
|
57
|
+
|
58
|
+
bool is_in_bad_list(const char *buf) {
|
59
|
+
if (buf[0] != '0')
|
60
|
+
return false;
|
61
|
+
for (size_t i = 0; i < sizeof(really_bad) / sizeof(really_bad[0]); i++)
|
62
|
+
if (starts_with(really_bad[i], buf))
|
63
|
+
return true;
|
64
|
+
return false;
|
65
|
+
}
|
66
|
+
|
67
|
+
void found_invalid_number(const uint8_t *buf) {
|
68
|
+
invalid_count++;
|
69
|
+
char *endptr;
|
70
|
+
double expected = strtod((const char *)buf, &endptr);
|
71
|
+
if (endptr != (const char *)buf) {
|
72
|
+
if (!is_in_bad_list((const char *)buf)) {
|
73
|
+
printf("Warning: found_invalid_number %.32s whereas strtod parses it to "
|
74
|
+
"%f, ",
|
75
|
+
buf, expected);
|
76
|
+
printf(" while parsing %s \n", fullpath);
|
77
|
+
parse_error |= PARSE_WARNING;
|
78
|
+
}
|
79
|
+
}
|
80
|
+
}
|
81
|
+
|
82
|
+
void found_integer(int64_t result, const uint8_t *buf) {
|
83
|
+
int_count++;
|
84
|
+
char *endptr;
|
85
|
+
long long expected = strtoll((const char *)buf, &endptr, 10);
|
86
|
+
if ((endptr == (const char *)buf) || (expected != result)) {
|
87
|
+
fprintf(stderr, "Error: parsed %" PRId64 " out of %.32s, ", result, buf);
|
88
|
+
fprintf(stderr, " while parsing %s \n", fullpath);
|
89
|
+
parse_error |= PARSE_ERROR;
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
void found_float(double result, const uint8_t *buf) {
|
94
|
+
char *endptr;
|
95
|
+
float_count++;
|
96
|
+
double expected = strtod((const char *)buf, &endptr);
|
97
|
+
if (endptr == (const char *)buf) {
|
98
|
+
fprintf(stderr,
|
99
|
+
"parsed %f from %.32s whereas strtod refuses to parse a float, ",
|
100
|
+
result, buf);
|
101
|
+
fprintf(stderr, " while parsing %s \n", fullpath);
|
102
|
+
parse_error |= PARSE_ERROR;
|
103
|
+
}
|
104
|
+
if (fpclassify(expected) != fpclassify(result)) {
|
105
|
+
fprintf(stderr,
|
106
|
+
"floats not in the same category expected: %f observed: %f \n",
|
107
|
+
expected, result);
|
108
|
+
fprintf(stderr, "%.32s\n", buf);
|
109
|
+
parse_error |= PARSE_ERROR;
|
110
|
+
return;
|
111
|
+
}
|
112
|
+
// we want to get some reasonable relative accuracy
|
113
|
+
uint64_t ULP = f64_ulp_dist(expected, result);
|
114
|
+
if (f64_ulp_dist(expected, result) > 1) {
|
115
|
+
fprintf(stderr, "parsed %.128e from \n", result);
|
116
|
+
fprintf(stderr, " %.32s whereas strtod gives\n", buf);
|
117
|
+
fprintf(stderr, " %.128e,", expected);
|
118
|
+
fprintf(stderr, " while parsing %s \n", fullpath);
|
119
|
+
fprintf(stderr, " =========== ULP: %u,", (unsigned int)ULP);
|
120
|
+
parse_error |= PARSE_ERROR;
|
121
|
+
}
|
122
|
+
}
|
123
|
+
|
124
|
+
#include "simdjson/jsonparser.h"
|
125
|
+
#include "src/stage2_build_tape.cpp"
|
126
|
+
|
127
|
+
/**
|
128
|
+
* Does the file filename ends with the given extension.
|
129
|
+
*/
|
130
|
+
static bool has_extension(const char *filename, const char *extension) {
|
131
|
+
const char *ext = strrchr(filename, '.');
|
132
|
+
return (ext && !strcmp(ext, extension));
|
133
|
+
}
|
134
|
+
|
135
|
+
bool validate(const char *dirname) {
|
136
|
+
parse_error = 0;
|
137
|
+
size_t total_count = 0;
|
138
|
+
const char *extension = ".json";
|
139
|
+
size_t dirlen = strlen(dirname);
|
140
|
+
struct dirent **entry_list;
|
141
|
+
int c = scandir(dirname, &entry_list, 0, alphasort);
|
142
|
+
if (c < 0) {
|
143
|
+
printf("error accessing %s \n", dirname);
|
144
|
+
return false;
|
145
|
+
}
|
146
|
+
if (c == 0) {
|
147
|
+
printf("nothing in dir %s \n", dirname);
|
148
|
+
return false;
|
149
|
+
}
|
150
|
+
bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
|
151
|
+
for (int i = 0; i < c; i++) {
|
152
|
+
const char *name = entry_list[i]->d_name;
|
153
|
+
if (has_extension(name, extension)) {
|
154
|
+
size_t filelen = strlen(name);
|
155
|
+
fullpath = (char *)malloc(dirlen + filelen + 1 + 1);
|
156
|
+
strcpy(fullpath, dirname);
|
157
|
+
if (needsep) {
|
158
|
+
fullpath[dirlen] = '/';
|
159
|
+
strcpy(fullpath + dirlen + 1, name);
|
160
|
+
} else {
|
161
|
+
strcpy(fullpath + dirlen, name);
|
162
|
+
}
|
163
|
+
simdjson::padded_string p;
|
164
|
+
try {
|
165
|
+
simdjson::get_corpus(fullpath).swap(p);
|
166
|
+
} catch (const std::exception &e) {
|
167
|
+
std::cout << "Could not load the file " << fullpath << std::endl;
|
168
|
+
return EXIT_FAILURE;
|
169
|
+
}
|
170
|
+
// terrible hack but just to get it working
|
171
|
+
simdjson::ParsedJson pj;
|
172
|
+
bool allocok = pj.allocate_capacity(p.size(), 1024);
|
173
|
+
if (!allocok) {
|
174
|
+
std::cerr << "can't allocate memory" << std::endl;
|
175
|
+
return false;
|
176
|
+
}
|
177
|
+
float_count = 0;
|
178
|
+
int_count = 0;
|
179
|
+
invalid_count = 0;
|
180
|
+
total_count += float_count + int_count + invalid_count;
|
181
|
+
bool isok = json_parse(p, pj);
|
182
|
+
if (int_count + float_count + invalid_count > 0) {
|
183
|
+
printf("File %40s %s --- integers: %10zu floats: %10zu invalid: %10zu "
|
184
|
+
"total numbers: %10zu \n",
|
185
|
+
name, isok ? " is valid " : " is not valid ", int_count,
|
186
|
+
float_count, invalid_count,
|
187
|
+
int_count + float_count + invalid_count);
|
188
|
+
}
|
189
|
+
free(fullpath);
|
190
|
+
}
|
191
|
+
}
|
192
|
+
if ((parse_error & PARSE_ERROR) != 0) {
|
193
|
+
printf("NUMBER PARSING FAILS?\n");
|
194
|
+
} else {
|
195
|
+
printf("All ok.\n");
|
196
|
+
}
|
197
|
+
for (int i = 0; i < c; ++i)
|
198
|
+
free(entry_list[i]);
|
199
|
+
free(entry_list);
|
200
|
+
return ((parse_error & PARSE_ERROR) == 0);
|
201
|
+
}
|
202
|
+
|
203
|
+
int main(int argc, char *argv[]) {
|
204
|
+
if (argc != 2) {
|
205
|
+
std::cerr << "Usage: " << argv[0] << " <directorywithjsonfiles>"
|
206
|
+
<< std::endl;
|
207
|
+
#if defined(SIMDJSON_TEST_DATA_DIR) && defined(SIMDJSON_BENCHMARK_DATA_DIR)
|
208
|
+
std::cout << "We are going to assume you mean to use the '"
|
209
|
+
<< SIMDJSON_TEST_DATA_DIR << "' and '"
|
210
|
+
<< SIMDJSON_BENCHMARK_DATA_DIR << "'directories." << std::endl;
|
211
|
+
return validate(SIMDJSON_TEST_DATA_DIR) &&
|
212
|
+
validate(SIMDJSON_BENCHMARK_DATA_DIR)
|
213
|
+
? EXIT_SUCCESS
|
214
|
+
: EXIT_FAILURE;
|
215
|
+
#else
|
216
|
+
std::cout << "We are going to assume you mean to use the 'jsonchecker' and "
|
217
|
+
"'jsonexamples' directories."
|
218
|
+
<< std::endl;
|
219
|
+
return validate("jsonchecker/") && validate("jsonexamples/") ? EXIT_SUCCESS
|
220
|
+
: EXIT_FAILURE;
|
221
|
+
#endif
|
222
|
+
}
|
223
|
+
return validate(argv[1]) ? EXIT_SUCCESS : EXIT_FAILURE;
|
224
|
+
}
|
@@ -0,0 +1,38 @@
|
|
1
|
+
#include <iostream>
|
2
|
+
|
3
|
+
#include "simdjson/jsonparser.h"
|
4
|
+
#include "simdjson/parsedjson.h"
|
5
|
+
|
6
|
+
int main() {
|
7
|
+
// {"/~01abc": [0, {"\\\" 0": ["value0", "value1"]}]}"
|
8
|
+
std::string json =
|
9
|
+
"{\"/~01abc\": [0, {\"\\\\\\\" 0\": [\"value0\", \"value1\"]}]}";
|
10
|
+
simdjson::ParsedJson pj;
|
11
|
+
assert(pj.allocate_capacity(json.length()));
|
12
|
+
simdjson::json_parse(json.c_str(), json.length(), pj);
|
13
|
+
assert(pj.is_valid());
|
14
|
+
simdjson::ParsedJson::Iterator it(pj);
|
15
|
+
|
16
|
+
// valid JSON String Representation pointer
|
17
|
+
std::string pointer1("/~1~001abc/1/\\\\\\\" 0/0");
|
18
|
+
assert(it.move_to(pointer1.c_str(), pointer1.length()));
|
19
|
+
assert(it.is_string());
|
20
|
+
assert(it.get_string() == std::string("value0"));
|
21
|
+
|
22
|
+
// valid URI Fragment Identifier Representation pointer
|
23
|
+
std::string pointer2("#/~1~001abc/1/%x5C%x22%x200/1");
|
24
|
+
assert(it.move_to(pointer2.c_str(), pointer2.length()));
|
25
|
+
assert(it.is_string());
|
26
|
+
assert(it.get_string() == std::string("value1"));
|
27
|
+
|
28
|
+
// invalid pointer with leading 0 in index
|
29
|
+
std::string pointer3("#/~1~001abc/01");
|
30
|
+
assert(!it.move_to(pointer3.c_str(), pointer3.length())); // failed
|
31
|
+
assert(it.is_string()); // has probably not moved
|
32
|
+
assert(it.get_string() == std::string("value1")); // has not move
|
33
|
+
|
34
|
+
// "the (nonexistent) member after the last array element"
|
35
|
+
std::string pointer4("/~1~001abc/-");
|
36
|
+
assert(it.move_to(pointer4.c_str(), pointer4.length()));
|
37
|
+
assert(it.get_type() == ']');
|
38
|
+
}
|
@@ -0,0 +1,22 @@
|
|
1
|
+
#include "../singleheader/simdjson.h"
|
2
|
+
#include <iostream>
|
3
|
+
|
4
|
+
using namespace simdjson;
|
5
|
+
|
6
|
+
int main() {
|
7
|
+
const char *filename = JSON_TEST_PATH;
|
8
|
+
padded_string p = get_corpus(filename);
|
9
|
+
ParsedJson pj = build_parsed_json(p); // do the parsing
|
10
|
+
if (!pj.is_valid()) {
|
11
|
+
return EXIT_FAILURE;
|
12
|
+
}
|
13
|
+
if (!pj.allocate_capacity(p.size())) {
|
14
|
+
return EXIT_FAILURE;
|
15
|
+
}
|
16
|
+
const int res = json_parse(p, pj);
|
17
|
+
if (res) {
|
18
|
+
std::cerr << error_message(res) << std::endl;
|
19
|
+
return EXIT_FAILURE;
|
20
|
+
}
|
21
|
+
return EXIT_SUCCESS;
|
22
|
+
}
|