simdjson 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.clang-format +5 -0
- data/.gitignore +14 -0
- data/.gitmodules +3 -0
- data/.rubocop.yml +9 -0
- data/.travis.yml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +32 -0
- data/benchmark/apache_builds.json +4421 -0
- data/benchmark/demo.json +15 -0
- data/benchmark/github_events.json +1390 -0
- data/benchmark/run_benchmark.rb +30 -0
- data/ext/simdjson/extconf.rb +22 -0
- data/ext/simdjson/simdjson.cpp +76 -0
- data/ext/simdjson/simdjson.hpp +6 -0
- data/lib/simdjson/version.rb +3 -0
- data/lib/simdjson.rb +2 -0
- data/simdjson.gemspec +35 -0
- data/vendor/.gitkeep +0 -0
- data/vendor/simdjson/AUTHORS +3 -0
- data/vendor/simdjson/CMakeLists.txt +63 -0
- data/vendor/simdjson/CONTRIBUTORS +27 -0
- data/vendor/simdjson/Dockerfile +10 -0
- data/vendor/simdjson/LICENSE +201 -0
- data/vendor/simdjson/Makefile +203 -0
- data/vendor/simdjson/Notes.md +85 -0
- data/vendor/simdjson/README.md +581 -0
- data/vendor/simdjson/amalgamation.sh +158 -0
- data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
- data/vendor/simdjson/benchmark/benchmark.h +223 -0
- data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
- data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
- data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
- data/vendor/simdjson/benchmark/parse.cpp +393 -0
- data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
- data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
- data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
- data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
- data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/gbps.png +0 -0
- data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
- data/vendor/simdjson/images/halvarflake.png +0 -0
- data/vendor/simdjson/images/logo.png +0 -0
- data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
- data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
- data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
- data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
- data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
- data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
- data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
- data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
- data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
- data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
- data/vendor/simdjson/include/simdjson/portability.h +172 -0
- data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
- data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
- data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
- data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
- data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
- data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
- data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
- data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
- data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
- data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
- data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
- data/vendor/simdjson/scripts/issue150.sh +14 -0
- data/vendor/simdjson/scripts/javascript/README.md +3 -0
- data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
- data/vendor/simdjson/scripts/minifier.sh +11 -0
- data/vendor/simdjson/scripts/parseandstat.sh +24 -0
- data/vendor/simdjson/scripts/parser.sh +11 -0
- data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
- data/vendor/simdjson/scripts/plotparse.sh +98 -0
- data/vendor/simdjson/scripts/selectparser.sh +11 -0
- data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
- data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
- data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
- data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
- data/vendor/simdjson/scripts/testjson2json.sh +99 -0
- data/vendor/simdjson/scripts/transitions/Makefile +10 -0
- data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
- data/vendor/simdjson/singleheader/README.md +1 -0
- data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
- data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
- data/vendor/simdjson/singleheader/simdjson.h +39692 -0
- data/vendor/simdjson/src/CMakeLists.txt +67 -0
- data/vendor/simdjson/src/jsonioutil.cpp +35 -0
- data/vendor/simdjson/src/jsonminifier.cpp +285 -0
- data/vendor/simdjson/src/jsonparser.cpp +91 -0
- data/vendor/simdjson/src/parsedjson.cpp +323 -0
- data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
- data/vendor/simdjson/src/simdjson.cpp +30 -0
- data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
- data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
- data/vendor/simdjson/style/clang-format-check.sh +25 -0
- data/vendor/simdjson/style/clang-format.sh +25 -0
- data/vendor/simdjson/style/run-clang-format.py +326 -0
- data/vendor/simdjson/tape.md +134 -0
- data/vendor/simdjson/tests/CMakeLists.txt +25 -0
- data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
- data/vendor/simdjson/tests/basictests.cpp +75 -0
- data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
- data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
- data/vendor/simdjson/tests/pointercheck.cpp +38 -0
- data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
- data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
- data/vendor/simdjson/tools/CMakeLists.txt +3 -0
- data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
- data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
- data/vendor/simdjson/tools/json2json.cpp +112 -0
- data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
- data/vendor/simdjson/tools/jsonstats.cpp +143 -0
- data/vendor/simdjson/tools/minify.cpp +21 -0
- data/vendor/simdjson/tools/release.py +125 -0
- data/vendor/simdjson/windows/dirent_portable.h +1043 -0
- metadata +273 -0
@@ -0,0 +1,347 @@
|
|
1
|
+
#include "simdjson/jsonparser.h"
|
2
|
+
#include <algorithm>
|
3
|
+
#include <unistd.h>
|
4
|
+
#include <vector>
|
5
|
+
|
6
|
+
#include "benchmark.h"
|
7
|
+
// #define RAPIDJSON_SSE2 // bad for performance
|
8
|
+
// #define RAPIDJSON_SSE42 // bad for performance
|
9
|
+
#include "rapidjson/document.h"
|
10
|
+
#include "rapidjson/reader.h"
|
11
|
+
#include "rapidjson/stringbuffer.h"
|
12
|
+
#include "rapidjson/writer.h"
|
13
|
+
|
14
|
+
#include "sajson.h"
|
15
|
+
|
16
|
+
using namespace rapidjson;
|
17
|
+
|
18
|
+
bool equals(const char *s1, const char *s2) { return strcmp(s1, s2) == 0; }
|
19
|
+
|
20
|
+
void remove_duplicates(std::vector<int64_t> &v) {
|
21
|
+
std::sort(v.begin(), v.end());
|
22
|
+
auto last = std::unique(v.begin(), v.end());
|
23
|
+
v.erase(last, v.end());
|
24
|
+
}
|
25
|
+
|
26
|
+
void print_vec(const std::vector<int64_t> &v) {
|
27
|
+
for (auto i : v) {
|
28
|
+
std::cout << i << " ";
|
29
|
+
}
|
30
|
+
std::cout << std::endl;
|
31
|
+
}
|
32
|
+
|
33
|
+
void simdjson_scan(std::vector<int64_t> &answer,
|
34
|
+
simdjson::ParsedJson::Iterator &i) {
|
35
|
+
while (i.move_forward()) {
|
36
|
+
if (i.get_scope_type() == '{') {
|
37
|
+
bool found_user = (i.get_string_length() == 4) &&
|
38
|
+
(memcmp(i.get_string(), "user", 4) == 0);
|
39
|
+
i.move_to_value();
|
40
|
+
if (found_user) {
|
41
|
+
if (i.is_object() && i.move_to_key("id", 2)) {
|
42
|
+
if (i.is_integer()) {
|
43
|
+
answer.push_back(i.get_integer());
|
44
|
+
}
|
45
|
+
i.up();
|
46
|
+
}
|
47
|
+
}
|
48
|
+
}
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
__attribute__((noinline)) std::vector<int64_t>
|
53
|
+
simdjson_just_dom(simdjson::ParsedJson &pj) {
|
54
|
+
std::vector<int64_t> answer;
|
55
|
+
simdjson::ParsedJson::Iterator i(pj);
|
56
|
+
simdjson_scan(answer, i);
|
57
|
+
remove_duplicates(answer);
|
58
|
+
return answer;
|
59
|
+
}
|
60
|
+
|
61
|
+
__attribute__((noinline)) std::vector<int64_t>
|
62
|
+
simdjson_compute_stats(const simdjson::padded_string &p) {
|
63
|
+
std::vector<int64_t> answer;
|
64
|
+
simdjson::ParsedJson pj = simdjson::build_parsed_json(p);
|
65
|
+
if (!pj.is_valid()) {
|
66
|
+
return answer;
|
67
|
+
}
|
68
|
+
simdjson::ParsedJson::Iterator i(pj);
|
69
|
+
simdjson_scan(answer, i);
|
70
|
+
remove_duplicates(answer);
|
71
|
+
return answer;
|
72
|
+
}
|
73
|
+
|
74
|
+
__attribute__((noinline)) bool
|
75
|
+
simdjson_just_parse(const simdjson::padded_string &p) {
|
76
|
+
simdjson::ParsedJson pj = simdjson::build_parsed_json(p);
|
77
|
+
bool answer = !pj.is_valid();
|
78
|
+
return answer;
|
79
|
+
}
|
80
|
+
|
81
|
+
void sajson_traverse(std::vector<int64_t> &answer, const sajson::value &node) {
|
82
|
+
using namespace sajson;
|
83
|
+
switch (node.get_type()) {
|
84
|
+
case TYPE_ARRAY: {
|
85
|
+
auto length = node.get_length();
|
86
|
+
for (size_t i = 0; i < length; ++i) {
|
87
|
+
sajson_traverse(answer, node.get_array_element(i));
|
88
|
+
}
|
89
|
+
break;
|
90
|
+
}
|
91
|
+
case TYPE_OBJECT: {
|
92
|
+
auto length = node.get_length();
|
93
|
+
// sajson has O(log n) find_object_key, but we still visit each node anyhow
|
94
|
+
// because we need to visit all values.
|
95
|
+
for (auto i = 0u; i < length; ++i) {
|
96
|
+
auto key = node.get_object_key(i); // expected: sajson::string
|
97
|
+
bool found_user =
|
98
|
+
(key.length() == 4) && (memcmp(key.data(), "user", 4) == 0);
|
99
|
+
if (found_user) { // found a user!!!
|
100
|
+
auto user_value = node.get_object_value(i); // get the value
|
101
|
+
if (user_value.get_type() ==
|
102
|
+
TYPE_OBJECT) { // the value should be an object
|
103
|
+
// now we know that we only need one value
|
104
|
+
auto user_value_length = user_value.get_length();
|
105
|
+
auto right_index =
|
106
|
+
user_value.find_object_key(sajson::string("id", 2));
|
107
|
+
if (right_index < user_value_length) {
|
108
|
+
auto v = user_value.get_object_value(right_index);
|
109
|
+
if (v.get_type() == TYPE_INTEGER) { // check that it is an integer
|
110
|
+
answer.push_back(v.get_integer_value()); // record it!
|
111
|
+
} else if (v.get_type() == TYPE_DOUBLE) {
|
112
|
+
answer.push_back((int64_t)v.get_double_value()); // record it!
|
113
|
+
}
|
114
|
+
}
|
115
|
+
}
|
116
|
+
}
|
117
|
+
sajson_traverse(answer, node.get_object_value(i));
|
118
|
+
}
|
119
|
+
break;
|
120
|
+
}
|
121
|
+
case TYPE_NULL:
|
122
|
+
case TYPE_FALSE:
|
123
|
+
case TYPE_TRUE:
|
124
|
+
case TYPE_STRING:
|
125
|
+
case TYPE_DOUBLE:
|
126
|
+
case TYPE_INTEGER:
|
127
|
+
break;
|
128
|
+
default:
|
129
|
+
assert(false && "unknown node type");
|
130
|
+
}
|
131
|
+
}
|
132
|
+
|
133
|
+
__attribute__((noinline)) std::vector<int64_t>
|
134
|
+
sasjon_just_dom(sajson::document &d) {
|
135
|
+
std::vector<int64_t> answer;
|
136
|
+
sajson_traverse(answer, d.get_root());
|
137
|
+
remove_duplicates(answer);
|
138
|
+
return answer;
|
139
|
+
}
|
140
|
+
|
141
|
+
__attribute__((noinline)) std::vector<int64_t>
|
142
|
+
sasjon_compute_stats(const simdjson::padded_string &p) {
|
143
|
+
std::vector<int64_t> answer;
|
144
|
+
char *buffer = (char *)malloc(p.size());
|
145
|
+
memcpy(buffer, p.data(), p.size());
|
146
|
+
auto d = sajson::parse(sajson::dynamic_allocation(),
|
147
|
+
sajson::mutable_string_view(p.size(), buffer));
|
148
|
+
if (!d.is_valid()) {
|
149
|
+
free(buffer);
|
150
|
+
return answer;
|
151
|
+
}
|
152
|
+
sajson_traverse(answer, d.get_root());
|
153
|
+
free(buffer);
|
154
|
+
remove_duplicates(answer);
|
155
|
+
return answer;
|
156
|
+
}
|
157
|
+
|
158
|
+
__attribute__((noinline)) bool
|
159
|
+
sasjon_just_parse(const simdjson::padded_string &p) {
|
160
|
+
char *buffer = (char *)malloc(p.size());
|
161
|
+
memcpy(buffer, p.data(), p.size());
|
162
|
+
auto d = sajson::parse(sajson::dynamic_allocation(),
|
163
|
+
sajson::mutable_string_view(p.size(), buffer));
|
164
|
+
bool answer = !d.is_valid();
|
165
|
+
free(buffer);
|
166
|
+
return answer;
|
167
|
+
}
|
168
|
+
|
169
|
+
void rapid_traverse(std::vector<int64_t> &answer, const rapidjson::Value &v) {
|
170
|
+
switch (v.GetType()) {
|
171
|
+
case kObjectType:
|
172
|
+
for (Value::ConstMemberIterator m = v.MemberBegin(); m != v.MemberEnd();
|
173
|
+
++m) {
|
174
|
+
bool found_user = (m->name.GetStringLength() == 4) &&
|
175
|
+
(memcmp(m->name.GetString(), "user", 4) == 0);
|
176
|
+
if (found_user) {
|
177
|
+
const rapidjson::Value &child = m->value;
|
178
|
+
if (child.GetType() == kObjectType) {
|
179
|
+
for (Value::ConstMemberIterator k = child.MemberBegin();
|
180
|
+
k != child.MemberEnd(); ++k) {
|
181
|
+
if (equals(k->name.GetString(), "id")) {
|
182
|
+
const rapidjson::Value &val = k->value;
|
183
|
+
if (val.GetType() == kNumberType) {
|
184
|
+
answer.push_back(val.GetInt64());
|
185
|
+
}
|
186
|
+
}
|
187
|
+
}
|
188
|
+
}
|
189
|
+
}
|
190
|
+
rapid_traverse(answer, m->value);
|
191
|
+
}
|
192
|
+
break;
|
193
|
+
case kArrayType:
|
194
|
+
for (Value::ConstValueIterator i = v.Begin(); i != v.End();
|
195
|
+
++i) { // v.Size();
|
196
|
+
rapid_traverse(answer, *i);
|
197
|
+
}
|
198
|
+
break;
|
199
|
+
case kNullType:
|
200
|
+
case kFalseType:
|
201
|
+
case kTrueType:
|
202
|
+
case kStringType:
|
203
|
+
case kNumberType:
|
204
|
+
default:
|
205
|
+
break;
|
206
|
+
}
|
207
|
+
}
|
208
|
+
|
209
|
+
__attribute__((noinline)) std::vector<int64_t>
|
210
|
+
rapid_just_dom(rapidjson::Document &d) {
|
211
|
+
std::vector<int64_t> answer;
|
212
|
+
rapid_traverse(answer, d);
|
213
|
+
remove_duplicates(answer);
|
214
|
+
return answer;
|
215
|
+
}
|
216
|
+
|
217
|
+
__attribute__((noinline)) std::vector<int64_t>
|
218
|
+
rapid_compute_stats(const simdjson::padded_string &p) {
|
219
|
+
std::vector<int64_t> answer;
|
220
|
+
char *buffer = (char *)malloc(p.size() + 1);
|
221
|
+
memcpy(buffer, p.data(), p.size());
|
222
|
+
buffer[p.size()] = '\0';
|
223
|
+
rapidjson::Document d;
|
224
|
+
d.ParseInsitu<kParseValidateEncodingFlag>(buffer);
|
225
|
+
if (d.HasParseError()) {
|
226
|
+
free(buffer);
|
227
|
+
return answer;
|
228
|
+
}
|
229
|
+
rapid_traverse(answer, d);
|
230
|
+
free(buffer);
|
231
|
+
remove_duplicates(answer);
|
232
|
+
return answer;
|
233
|
+
}
|
234
|
+
|
235
|
+
__attribute__((noinline)) bool
|
236
|
+
rapid_just_parse(const simdjson::padded_string &p) {
|
237
|
+
char *buffer = (char *)malloc(p.size() + 1);
|
238
|
+
memcpy(buffer, p.data(), p.size());
|
239
|
+
buffer[p.size()] = '\0';
|
240
|
+
rapidjson::Document d;
|
241
|
+
d.ParseInsitu<kParseValidateEncodingFlag>(buffer);
|
242
|
+
bool answer = d.HasParseError();
|
243
|
+
free(buffer);
|
244
|
+
return answer;
|
245
|
+
}
|
246
|
+
|
247
|
+
int main(int argc, char *argv[]) {
|
248
|
+
bool verbose = false;
|
249
|
+
bool just_data = false;
|
250
|
+
|
251
|
+
int c;
|
252
|
+
while ((c = getopt(argc, argv, "vt")) != -1)
|
253
|
+
switch (c) {
|
254
|
+
case 't':
|
255
|
+
just_data = true;
|
256
|
+
break;
|
257
|
+
case 'v':
|
258
|
+
verbose = true;
|
259
|
+
break;
|
260
|
+
default:
|
261
|
+
abort();
|
262
|
+
}
|
263
|
+
if (optind >= argc) {
|
264
|
+
std::cerr
|
265
|
+
<< "Using different parsers, we compute the content statistics of "
|
266
|
+
"JSON documents."
|
267
|
+
<< std::endl;
|
268
|
+
std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
|
269
|
+
std::cerr << "Or " << argv[0] << " -v <jsonfile>" << std::endl;
|
270
|
+
exit(1);
|
271
|
+
}
|
272
|
+
const char *filename = argv[optind];
|
273
|
+
if (optind + 1 < argc) {
|
274
|
+
std::cerr << "warning: ignoring everything after " << argv[optind + 1]
|
275
|
+
<< std::endl;
|
276
|
+
}
|
277
|
+
simdjson::padded_string p;
|
278
|
+
try {
|
279
|
+
simdjson::get_corpus(filename).swap(p);
|
280
|
+
} catch (const std::exception &e) { // caught by reference to base
|
281
|
+
std::cout << "Could not load the file " << filename << std::endl;
|
282
|
+
return EXIT_FAILURE;
|
283
|
+
}
|
284
|
+
|
285
|
+
if (verbose) {
|
286
|
+
std::cout << "Input has ";
|
287
|
+
if (p.size() > 1024 * 1024)
|
288
|
+
std::cout << p.size() / (1024 * 1024) << " MB ";
|
289
|
+
else if (p.size() > 1024)
|
290
|
+
std::cout << p.size() / 1024 << " KB ";
|
291
|
+
else
|
292
|
+
std::cout << p.size() << " B ";
|
293
|
+
std::cout << std::endl;
|
294
|
+
}
|
295
|
+
std::vector<int64_t> s1 = simdjson_compute_stats(p);
|
296
|
+
if (verbose) {
|
297
|
+
printf("simdjson: ");
|
298
|
+
print_vec(s1);
|
299
|
+
}
|
300
|
+
std::vector<int64_t> s2 = rapid_compute_stats(p);
|
301
|
+
if (verbose) {
|
302
|
+
printf("rapid: ");
|
303
|
+
print_vec(s2);
|
304
|
+
}
|
305
|
+
std::vector<int64_t> s3 = sasjon_compute_stats(p);
|
306
|
+
if (verbose) {
|
307
|
+
printf("sasjon: ");
|
308
|
+
print_vec(s3);
|
309
|
+
}
|
310
|
+
assert(s1 == s2);
|
311
|
+
assert(s1 == s3);
|
312
|
+
size_t size = s1.size();
|
313
|
+
|
314
|
+
int repeat = 500;
|
315
|
+
int volume = p.size();
|
316
|
+
if (just_data) {
|
317
|
+
printf(
|
318
|
+
"name cycles_per_byte cycles_per_byte_err gb_per_s gb_per_s_err \n");
|
319
|
+
}
|
320
|
+
BEST_TIME("simdjson ", simdjson_compute_stats(p).size(), size, , repeat,
|
321
|
+
volume, !just_data);
|
322
|
+
BEST_TIME("rapid ", rapid_compute_stats(p).size(), size, , repeat, volume,
|
323
|
+
!just_data);
|
324
|
+
BEST_TIME("sasjon ", sasjon_compute_stats(p).size(), size, , repeat, volume,
|
325
|
+
!just_data);
|
326
|
+
BEST_TIME("simdjson (just parse) ", simdjson_just_parse(p), false, , repeat,
|
327
|
+
volume, !just_data);
|
328
|
+
BEST_TIME("rapid (just parse) ", rapid_just_parse(p), false, , repeat,
|
329
|
+
volume, !just_data);
|
330
|
+
BEST_TIME("sasjon (just parse) ", sasjon_just_parse(p), false, , repeat,
|
331
|
+
volume, !just_data);
|
332
|
+
simdjson::ParsedJson dsimdjson = simdjson::build_parsed_json(p);
|
333
|
+
BEST_TIME("simdjson (just dom) ", simdjson_just_dom(dsimdjson).size(), size,
|
334
|
+
, repeat, volume, !just_data);
|
335
|
+
char *buffer = (char *)malloc(p.size());
|
336
|
+
memcpy(buffer, p.data(), p.size());
|
337
|
+
rapidjson::Document drapid;
|
338
|
+
drapid.ParseInsitu<kParseValidateEncodingFlag>(buffer);
|
339
|
+
BEST_TIME("rapid (just dom) ", rapid_just_dom(drapid).size(), size, , repeat,
|
340
|
+
volume, !just_data);
|
341
|
+
memcpy(buffer, p.data(), p.size());
|
342
|
+
auto dsasjon = sajson::parse(sajson::dynamic_allocation(),
|
343
|
+
sajson::mutable_string_view(p.size(), buffer));
|
344
|
+
BEST_TIME("sasjon (just dom) ", sasjon_just_dom(dsasjon).size(), size, ,
|
345
|
+
repeat, volume, !just_data);
|
346
|
+
free(buffer);
|
347
|
+
}
|
@@ -0,0 +1,93 @@
|
|
1
|
+
// https://github.com/WojciechMula/toys/blob/master/000helpers/linux-perf-events.h
|
2
|
+
#pragma once
|
3
|
+
#ifdef __linux__
|
4
|
+
|
5
|
+
#include <asm/unistd.h> // for __NR_perf_event_open
|
6
|
+
#include <linux/perf_event.h> // for perf event constants
|
7
|
+
#include <sys/ioctl.h> // for ioctl
|
8
|
+
#include <unistd.h> // for syscall
|
9
|
+
|
10
|
+
#include <cerrno> // for errno
|
11
|
+
#include <cstring> // for memset
|
12
|
+
#include <stdexcept>
|
13
|
+
|
14
|
+
#include <iostream>
|
15
|
+
#include <vector>
|
16
|
+
|
17
|
+
template <int TYPE = PERF_TYPE_HARDWARE> class LinuxEvents {
|
18
|
+
int fd;
|
19
|
+
bool working;
|
20
|
+
perf_event_attr attribs;
|
21
|
+
int num_events;
|
22
|
+
std::vector<uint64_t> temp_result_vec;
|
23
|
+
std::vector<uint64_t> ids;
|
24
|
+
|
25
|
+
public:
|
26
|
+
explicit LinuxEvents(std::vector<int> config_vec) : fd(0), working(true) {
|
27
|
+
memset(&attribs, 0, sizeof(attribs));
|
28
|
+
attribs.type = TYPE;
|
29
|
+
attribs.size = sizeof(attribs);
|
30
|
+
attribs.disabled = 1;
|
31
|
+
attribs.exclude_kernel = 1;
|
32
|
+
attribs.exclude_hv = 1;
|
33
|
+
|
34
|
+
attribs.sample_period = 0;
|
35
|
+
attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
|
36
|
+
const int pid = 0; // the current process
|
37
|
+
const int cpu = -1; // all CPUs
|
38
|
+
const unsigned long flags = 0;
|
39
|
+
|
40
|
+
int group = -1; // no group
|
41
|
+
num_events = config_vec.size();
|
42
|
+
ids.resize(config_vec.size());
|
43
|
+
uint32_t i = 0;
|
44
|
+
for (auto config : config_vec) {
|
45
|
+
attribs.config = config;
|
46
|
+
fd = syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags);
|
47
|
+
if (fd == -1) {
|
48
|
+
report_error("perf_event_open");
|
49
|
+
}
|
50
|
+
ioctl(fd, PERF_EVENT_IOC_ID, &ids[i++]);
|
51
|
+
if (group == -1) {
|
52
|
+
group = fd;
|
53
|
+
}
|
54
|
+
}
|
55
|
+
|
56
|
+
temp_result_vec.resize(num_events * 2 + 1);
|
57
|
+
}
|
58
|
+
|
59
|
+
~LinuxEvents() { close(fd); }
|
60
|
+
|
61
|
+
inline void start() {
|
62
|
+
if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
|
63
|
+
report_error("ioctl(PERF_EVENT_IOC_RESET)");
|
64
|
+
}
|
65
|
+
|
66
|
+
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
|
67
|
+
report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
71
|
+
inline void end(std::vector<unsigned long long> &results) {
|
72
|
+
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
|
73
|
+
report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
|
74
|
+
}
|
75
|
+
|
76
|
+
if (read(fd, temp_result_vec.data(), temp_result_vec.size() * 8) == -1) {
|
77
|
+
report_error("read");
|
78
|
+
}
|
79
|
+
// our actual results are in slots 1,3,5, ... of this structure
|
80
|
+
// we really should be checking our ids obtained earlier to be safe
|
81
|
+
for (uint32_t i = 1; i < temp_result_vec.size(); i += 2) {
|
82
|
+
results[i / 2] = temp_result_vec[i];
|
83
|
+
}
|
84
|
+
}
|
85
|
+
|
86
|
+
private:
|
87
|
+
void report_error(const std::string &context) {
|
88
|
+
if (working)
|
89
|
+
std::cerr << (context + ": " + std::string(strerror(errno))) << std::endl;
|
90
|
+
working = false;
|
91
|
+
}
|
92
|
+
};
|
93
|
+
#endif
|
@@ -0,0 +1,181 @@
|
|
1
|
+
#include <iostream>
|
2
|
+
#include <unistd.h>
|
3
|
+
|
4
|
+
#include "benchmark.h"
|
5
|
+
#include "simdjson/jsonioutil.h"
|
6
|
+
#include "simdjson/jsonminifier.h"
|
7
|
+
#include "simdjson/jsonparser.h"
|
8
|
+
|
9
|
+
// #define RAPIDJSON_SSE2 // bad
|
10
|
+
// #define RAPIDJSON_SSE42 // bad
|
11
|
+
#include "rapidjson/document.h"
|
12
|
+
#include "rapidjson/reader.h" // you have to check in the submodule
|
13
|
+
#include "rapidjson/stringbuffer.h"
|
14
|
+
#include "rapidjson/writer.h"
|
15
|
+
#include "sajson.h"
|
16
|
+
|
17
|
+
using namespace simdjson;
|
18
|
+
using namespace rapidjson;
|
19
|
+
|
20
|
+
std::string rapid_stringme_insitu(char *json) {
|
21
|
+
Document d;
|
22
|
+
d.ParseInsitu(json);
|
23
|
+
if (d.HasParseError()) {
|
24
|
+
std::cerr << "problem!" << std::endl;
|
25
|
+
return ""; // should do something
|
26
|
+
}
|
27
|
+
StringBuffer buffer;
|
28
|
+
Writer<StringBuffer> writer(buffer);
|
29
|
+
d.Accept(writer);
|
30
|
+
return buffer.GetString();
|
31
|
+
}
|
32
|
+
|
33
|
+
std::string rapid_stringme(char *json) {
|
34
|
+
Document d;
|
35
|
+
d.Parse(json);
|
36
|
+
if (d.HasParseError()) {
|
37
|
+
std::cerr << "problem!" << std::endl;
|
38
|
+
return ""; // should do something
|
39
|
+
}
|
40
|
+
StringBuffer buffer;
|
41
|
+
Writer<StringBuffer> writer(buffer);
|
42
|
+
d.Accept(writer);
|
43
|
+
return buffer.GetString();
|
44
|
+
}
|
45
|
+
|
46
|
+
int main(int argc, char *argv[]) {
|
47
|
+
int c;
|
48
|
+
bool verbose = false;
|
49
|
+
bool just_data = false;
|
50
|
+
|
51
|
+
while ((c = getopt(argc, argv, "vt")) != -1)
|
52
|
+
switch (c) {
|
53
|
+
case 't':
|
54
|
+
just_data = true;
|
55
|
+
break;
|
56
|
+
case 'v':
|
57
|
+
verbose = true;
|
58
|
+
break;
|
59
|
+
default:
|
60
|
+
abort();
|
61
|
+
}
|
62
|
+
if (optind >= argc) {
|
63
|
+
std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
|
64
|
+
exit(1);
|
65
|
+
}
|
66
|
+
const char *filename = argv[optind];
|
67
|
+
simdjson::padded_string p;
|
68
|
+
try {
|
69
|
+
simdjson::get_corpus(filename).swap(p);
|
70
|
+
} catch (const std::exception &e) { // caught by reference to base
|
71
|
+
std::cout << "Could not load the file " << filename << std::endl;
|
72
|
+
return EXIT_FAILURE;
|
73
|
+
}
|
74
|
+
if (verbose) {
|
75
|
+
std::cout << "Input has ";
|
76
|
+
if (p.size() > 1024 * 1024)
|
77
|
+
std::cout << p.size() / (1024 * 1024) << " MB ";
|
78
|
+
else if (p.size() > 1024)
|
79
|
+
std::cout << p.size() / 1024 << " KB ";
|
80
|
+
else
|
81
|
+
std::cout << p.size() << " B ";
|
82
|
+
std::cout << std::endl;
|
83
|
+
}
|
84
|
+
char *buffer = simdjson::allocate_padded_buffer(p.size() + 1);
|
85
|
+
memcpy(buffer, p.data(), p.size());
|
86
|
+
buffer[p.size()] = '\0';
|
87
|
+
|
88
|
+
int repeat = 50;
|
89
|
+
int volume = p.size();
|
90
|
+
if (just_data) {
|
91
|
+
printf(
|
92
|
+
"name cycles_per_byte cycles_per_byte_err gb_per_s gb_per_s_err \n");
|
93
|
+
}
|
94
|
+
size_t strlength = rapid_stringme((char *)p.data()).size();
|
95
|
+
if (verbose)
|
96
|
+
std::cout << "input length is " << p.size() << " stringified length is "
|
97
|
+
<< strlength << std::endl;
|
98
|
+
BEST_TIME_NOCHECK("despacing with RapidJSON",
|
99
|
+
rapid_stringme((char *)p.data()), , repeat, volume,
|
100
|
+
!just_data);
|
101
|
+
BEST_TIME_NOCHECK(
|
102
|
+
"despacing with RapidJSON Insitu", rapid_stringme_insitu((char *)buffer),
|
103
|
+
memcpy(buffer, p.data(), p.size()), repeat, volume, !just_data);
|
104
|
+
memcpy(buffer, p.data(), p.size());
|
105
|
+
|
106
|
+
size_t outlength = simdjson::json_minify((const uint8_t *)buffer, p.size(),
|
107
|
+
(uint8_t *)buffer);
|
108
|
+
if (verbose)
|
109
|
+
std::cout << "json_minify length is " << outlength << std::endl;
|
110
|
+
|
111
|
+
uint8_t *cbuffer = (uint8_t *)buffer;
|
112
|
+
BEST_TIME("json_minify", simdjson::json_minify(cbuffer, p.size(), cbuffer),
|
113
|
+
outlength, memcpy(buffer, p.data(), p.size()), repeat, volume,
|
114
|
+
!just_data);
|
115
|
+
printf("minisize = %zu, original size = %zu (minified down to %.2f percent "
|
116
|
+
"of original) \n",
|
117
|
+
outlength, p.size(), outlength * 100.0 / p.size());
|
118
|
+
|
119
|
+
/***
|
120
|
+
* Is it worth it to minify before parsing?
|
121
|
+
***/
|
122
|
+
rapidjson::Document d;
|
123
|
+
BEST_TIME("RapidJSON Insitu orig", d.ParseInsitu(buffer).HasParseError(),
|
124
|
+
false, memcpy(buffer, p.data(), p.size()), repeat, volume,
|
125
|
+
!just_data);
|
126
|
+
|
127
|
+
char *mini_buffer = simdjson::allocate_padded_buffer(p.size() + 1);
|
128
|
+
size_t minisize = simdjson::json_minify((const uint8_t *)p.data(), p.size(),
|
129
|
+
(uint8_t *)mini_buffer);
|
130
|
+
mini_buffer[minisize] = '\0';
|
131
|
+
|
132
|
+
BEST_TIME("RapidJSON Insitu despaced", d.ParseInsitu(buffer).HasParseError(),
|
133
|
+
false, memcpy(buffer, mini_buffer, p.size()), repeat, volume,
|
134
|
+
!just_data);
|
135
|
+
|
136
|
+
size_t ast_buffer_size = p.size() * 2;
|
137
|
+
size_t *ast_buffer = (size_t *)malloc(ast_buffer_size * sizeof(size_t));
|
138
|
+
|
139
|
+
BEST_TIME(
|
140
|
+
"sajson orig",
|
141
|
+
sajson::parse(sajson::bounded_allocation(ast_buffer, ast_buffer_size),
|
142
|
+
sajson::mutable_string_view(p.size(), buffer))
|
143
|
+
.is_valid(),
|
144
|
+
true, memcpy(buffer, p.data(), p.size()), repeat, volume, !just_data);
|
145
|
+
|
146
|
+
BEST_TIME(
|
147
|
+
"sajson despaced",
|
148
|
+
sajson::parse(sajson::bounded_allocation(ast_buffer, ast_buffer_size),
|
149
|
+
sajson::mutable_string_view(minisize, buffer))
|
150
|
+
.is_valid(),
|
151
|
+
true, memcpy(buffer, mini_buffer, p.size()), repeat, volume, !just_data);
|
152
|
+
|
153
|
+
simdjson::ParsedJson pj;
|
154
|
+
bool is_alloc_ok = pj.allocate_capacity(p.size(), 1024);
|
155
|
+
if (!is_alloc_ok) {
|
156
|
+
fprintf(stderr, "failed to allocate memory\n");
|
157
|
+
return EXIT_FAILURE;
|
158
|
+
}
|
159
|
+
bool automated_reallocation = false;
|
160
|
+
BEST_TIME("simdjson orig",
|
161
|
+
simdjson::json_parse((const uint8_t *)buffer, p.size(), pj,
|
162
|
+
automated_reallocation),
|
163
|
+
true, memcpy(buffer, p.data(), p.size()), repeat, volume,
|
164
|
+
!just_data);
|
165
|
+
|
166
|
+
simdjson::ParsedJson pj2;
|
167
|
+
bool is_alloc_ok2 = pj2.allocate_capacity(p.size(), 1024);
|
168
|
+
if (!is_alloc_ok2) {
|
169
|
+
fprintf(stderr, "failed to allocate memory\n");
|
170
|
+
return EXIT_FAILURE;
|
171
|
+
}
|
172
|
+
automated_reallocation = false;
|
173
|
+
BEST_TIME("simdjson despaced",
|
174
|
+
simdjson::json_parse((const uint8_t *)buffer, minisize, pj2,
|
175
|
+
automated_reallocation),
|
176
|
+
true, memcpy(buffer, mini_buffer, p.size()), repeat, volume,
|
177
|
+
!just_data);
|
178
|
+
free(buffer);
|
179
|
+
free(ast_buffer);
|
180
|
+
free(mini_buffer);
|
181
|
+
}
|