simdjson 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. checksums.yaml +7 -0
  2. data/.clang-format +5 -0
  3. data/.gitignore +14 -0
  4. data/.gitmodules +3 -0
  5. data/.rubocop.yml +9 -0
  6. data/.travis.yml +7 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +39 -0
  10. data/Rakefile +32 -0
  11. data/benchmark/apache_builds.json +4421 -0
  12. data/benchmark/demo.json +15 -0
  13. data/benchmark/github_events.json +1390 -0
  14. data/benchmark/run_benchmark.rb +30 -0
  15. data/ext/simdjson/extconf.rb +22 -0
  16. data/ext/simdjson/simdjson.cpp +76 -0
  17. data/ext/simdjson/simdjson.hpp +6 -0
  18. data/lib/simdjson/version.rb +3 -0
  19. data/lib/simdjson.rb +2 -0
  20. data/simdjson.gemspec +35 -0
  21. data/vendor/.gitkeep +0 -0
  22. data/vendor/simdjson/AUTHORS +3 -0
  23. data/vendor/simdjson/CMakeLists.txt +63 -0
  24. data/vendor/simdjson/CONTRIBUTORS +27 -0
  25. data/vendor/simdjson/Dockerfile +10 -0
  26. data/vendor/simdjson/LICENSE +201 -0
  27. data/vendor/simdjson/Makefile +203 -0
  28. data/vendor/simdjson/Notes.md +85 -0
  29. data/vendor/simdjson/README.md +581 -0
  30. data/vendor/simdjson/amalgamation.sh +158 -0
  31. data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
  32. data/vendor/simdjson/benchmark/benchmark.h +223 -0
  33. data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
  34. data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
  35. data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
  36. data/vendor/simdjson/benchmark/parse.cpp +393 -0
  37. data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
  38. data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
  39. data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
  40. data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
  41. data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
  42. data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
  43. data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
  44. data/vendor/simdjson/doc/gbps.png +0 -0
  45. data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
  46. data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
  47. data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
  48. data/vendor/simdjson/images/halvarflake.png +0 -0
  49. data/vendor/simdjson/images/logo.png +0 -0
  50. data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
  51. data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
  52. data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
  53. data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
  54. data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
  55. data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
  56. data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
  57. data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
  58. data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
  59. data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
  60. data/vendor/simdjson/include/simdjson/portability.h +172 -0
  61. data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
  62. data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
  63. data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
  64. data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
  65. data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
  66. data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
  67. data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
  68. data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
  69. data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
  70. data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
  71. data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
  72. data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
  73. data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
  74. data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
  75. data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
  76. data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
  77. data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
  78. data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
  79. data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
  80. data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
  81. data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
  82. data/vendor/simdjson/scripts/issue150.sh +14 -0
  83. data/vendor/simdjson/scripts/javascript/README.md +3 -0
  84. data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
  85. data/vendor/simdjson/scripts/minifier.sh +11 -0
  86. data/vendor/simdjson/scripts/parseandstat.sh +24 -0
  87. data/vendor/simdjson/scripts/parser.sh +11 -0
  88. data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
  89. data/vendor/simdjson/scripts/plotparse.sh +98 -0
  90. data/vendor/simdjson/scripts/selectparser.sh +11 -0
  91. data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
  92. data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
  93. data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
  94. data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
  95. data/vendor/simdjson/scripts/testjson2json.sh +99 -0
  96. data/vendor/simdjson/scripts/transitions/Makefile +10 -0
  97. data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
  98. data/vendor/simdjson/singleheader/README.md +1 -0
  99. data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
  100. data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
  101. data/vendor/simdjson/singleheader/simdjson.h +39692 -0
  102. data/vendor/simdjson/src/CMakeLists.txt +67 -0
  103. data/vendor/simdjson/src/jsonioutil.cpp +35 -0
  104. data/vendor/simdjson/src/jsonminifier.cpp +285 -0
  105. data/vendor/simdjson/src/jsonparser.cpp +91 -0
  106. data/vendor/simdjson/src/parsedjson.cpp +323 -0
  107. data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
  108. data/vendor/simdjson/src/simdjson.cpp +30 -0
  109. data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
  110. data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
  111. data/vendor/simdjson/style/clang-format-check.sh +25 -0
  112. data/vendor/simdjson/style/clang-format.sh +25 -0
  113. data/vendor/simdjson/style/run-clang-format.py +326 -0
  114. data/vendor/simdjson/tape.md +134 -0
  115. data/vendor/simdjson/tests/CMakeLists.txt +25 -0
  116. data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
  117. data/vendor/simdjson/tests/basictests.cpp +75 -0
  118. data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
  119. data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
  120. data/vendor/simdjson/tests/pointercheck.cpp +38 -0
  121. data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
  122. data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
  123. data/vendor/simdjson/tools/CMakeLists.txt +3 -0
  124. data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
  125. data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
  126. data/vendor/simdjson/tools/json2json.cpp +112 -0
  127. data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
  128. data/vendor/simdjson/tools/jsonstats.cpp +143 -0
  129. data/vendor/simdjson/tools/minify.cpp +21 -0
  130. data/vendor/simdjson/tools/release.py +125 -0
  131. data/vendor/simdjson/windows/dirent_portable.h +1043 -0
  132. metadata +273 -0
@@ -0,0 +1,347 @@
1
+ #include "simdjson/jsonparser.h"
2
+ #include <algorithm>
3
+ #include <unistd.h>
4
+ #include <vector>
5
+
6
+ #include "benchmark.h"
7
+ // #define RAPIDJSON_SSE2 // bad for performance
8
+ // #define RAPIDJSON_SSE42 // bad for performance
9
+ #include "rapidjson/document.h"
10
+ #include "rapidjson/reader.h"
11
+ #include "rapidjson/stringbuffer.h"
12
+ #include "rapidjson/writer.h"
13
+
14
+ #include "sajson.h"
15
+
16
+ using namespace rapidjson;
17
+
18
+ bool equals(const char *s1, const char *s2) { return strcmp(s1, s2) == 0; }
19
+
20
+ void remove_duplicates(std::vector<int64_t> &v) {
21
+ std::sort(v.begin(), v.end());
22
+ auto last = std::unique(v.begin(), v.end());
23
+ v.erase(last, v.end());
24
+ }
25
+
26
+ void print_vec(const std::vector<int64_t> &v) {
27
+ for (auto i : v) {
28
+ std::cout << i << " ";
29
+ }
30
+ std::cout << std::endl;
31
+ }
32
+
33
+ void simdjson_scan(std::vector<int64_t> &answer,
34
+ simdjson::ParsedJson::Iterator &i) {
35
+ while (i.move_forward()) {
36
+ if (i.get_scope_type() == '{') {
37
+ bool found_user = (i.get_string_length() == 4) &&
38
+ (memcmp(i.get_string(), "user", 4) == 0);
39
+ i.move_to_value();
40
+ if (found_user) {
41
+ if (i.is_object() && i.move_to_key("id", 2)) {
42
+ if (i.is_integer()) {
43
+ answer.push_back(i.get_integer());
44
+ }
45
+ i.up();
46
+ }
47
+ }
48
+ }
49
+ }
50
+ }
51
+
52
+ __attribute__((noinline)) std::vector<int64_t>
53
+ simdjson_just_dom(simdjson::ParsedJson &pj) {
54
+ std::vector<int64_t> answer;
55
+ simdjson::ParsedJson::Iterator i(pj);
56
+ simdjson_scan(answer, i);
57
+ remove_duplicates(answer);
58
+ return answer;
59
+ }
60
+
61
+ __attribute__((noinline)) std::vector<int64_t>
62
+ simdjson_compute_stats(const simdjson::padded_string &p) {
63
+ std::vector<int64_t> answer;
64
+ simdjson::ParsedJson pj = simdjson::build_parsed_json(p);
65
+ if (!pj.is_valid()) {
66
+ return answer;
67
+ }
68
+ simdjson::ParsedJson::Iterator i(pj);
69
+ simdjson_scan(answer, i);
70
+ remove_duplicates(answer);
71
+ return answer;
72
+ }
73
+
74
+ __attribute__((noinline)) bool
75
+ simdjson_just_parse(const simdjson::padded_string &p) {
76
+ simdjson::ParsedJson pj = simdjson::build_parsed_json(p);
77
+ bool answer = !pj.is_valid();
78
+ return answer;
79
+ }
80
+
81
+ void sajson_traverse(std::vector<int64_t> &answer, const sajson::value &node) {
82
+ using namespace sajson;
83
+ switch (node.get_type()) {
84
+ case TYPE_ARRAY: {
85
+ auto length = node.get_length();
86
+ for (size_t i = 0; i < length; ++i) {
87
+ sajson_traverse(answer, node.get_array_element(i));
88
+ }
89
+ break;
90
+ }
91
+ case TYPE_OBJECT: {
92
+ auto length = node.get_length();
93
+ // sajson has O(log n) find_object_key, but we still visit each node anyhow
94
+ // because we need to visit all values.
95
+ for (auto i = 0u; i < length; ++i) {
96
+ auto key = node.get_object_key(i); // expected: sajson::string
97
+ bool found_user =
98
+ (key.length() == 4) && (memcmp(key.data(), "user", 4) == 0);
99
+ if (found_user) { // found a user!!!
100
+ auto user_value = node.get_object_value(i); // get the value
101
+ if (user_value.get_type() ==
102
+ TYPE_OBJECT) { // the value should be an object
103
+ // now we know that we only need one value
104
+ auto user_value_length = user_value.get_length();
105
+ auto right_index =
106
+ user_value.find_object_key(sajson::string("id", 2));
107
+ if (right_index < user_value_length) {
108
+ auto v = user_value.get_object_value(right_index);
109
+ if (v.get_type() == TYPE_INTEGER) { // check that it is an integer
110
+ answer.push_back(v.get_integer_value()); // record it!
111
+ } else if (v.get_type() == TYPE_DOUBLE) {
112
+ answer.push_back((int64_t)v.get_double_value()); // record it!
113
+ }
114
+ }
115
+ }
116
+ }
117
+ sajson_traverse(answer, node.get_object_value(i));
118
+ }
119
+ break;
120
+ }
121
+ case TYPE_NULL:
122
+ case TYPE_FALSE:
123
+ case TYPE_TRUE:
124
+ case TYPE_STRING:
125
+ case TYPE_DOUBLE:
126
+ case TYPE_INTEGER:
127
+ break;
128
+ default:
129
+ assert(false && "unknown node type");
130
+ }
131
+ }
132
+
133
+ __attribute__((noinline)) std::vector<int64_t>
134
+ sasjon_just_dom(sajson::document &d) {
135
+ std::vector<int64_t> answer;
136
+ sajson_traverse(answer, d.get_root());
137
+ remove_duplicates(answer);
138
+ return answer;
139
+ }
140
+
141
+ __attribute__((noinline)) std::vector<int64_t>
142
+ sasjon_compute_stats(const simdjson::padded_string &p) {
143
+ std::vector<int64_t> answer;
144
+ char *buffer = (char *)malloc(p.size());
145
+ memcpy(buffer, p.data(), p.size());
146
+ auto d = sajson::parse(sajson::dynamic_allocation(),
147
+ sajson::mutable_string_view(p.size(), buffer));
148
+ if (!d.is_valid()) {
149
+ free(buffer);
150
+ return answer;
151
+ }
152
+ sajson_traverse(answer, d.get_root());
153
+ free(buffer);
154
+ remove_duplicates(answer);
155
+ return answer;
156
+ }
157
+
158
+ __attribute__((noinline)) bool
159
+ sasjon_just_parse(const simdjson::padded_string &p) {
160
+ char *buffer = (char *)malloc(p.size());
161
+ memcpy(buffer, p.data(), p.size());
162
+ auto d = sajson::parse(sajson::dynamic_allocation(),
163
+ sajson::mutable_string_view(p.size(), buffer));
164
+ bool answer = !d.is_valid();
165
+ free(buffer);
166
+ return answer;
167
+ }
168
+
169
+ void rapid_traverse(std::vector<int64_t> &answer, const rapidjson::Value &v) {
170
+ switch (v.GetType()) {
171
+ case kObjectType:
172
+ for (Value::ConstMemberIterator m = v.MemberBegin(); m != v.MemberEnd();
173
+ ++m) {
174
+ bool found_user = (m->name.GetStringLength() == 4) &&
175
+ (memcmp(m->name.GetString(), "user", 4) == 0);
176
+ if (found_user) {
177
+ const rapidjson::Value &child = m->value;
178
+ if (child.GetType() == kObjectType) {
179
+ for (Value::ConstMemberIterator k = child.MemberBegin();
180
+ k != child.MemberEnd(); ++k) {
181
+ if (equals(k->name.GetString(), "id")) {
182
+ const rapidjson::Value &val = k->value;
183
+ if (val.GetType() == kNumberType) {
184
+ answer.push_back(val.GetInt64());
185
+ }
186
+ }
187
+ }
188
+ }
189
+ }
190
+ rapid_traverse(answer, m->value);
191
+ }
192
+ break;
193
+ case kArrayType:
194
+ for (Value::ConstValueIterator i = v.Begin(); i != v.End();
195
+ ++i) { // v.Size();
196
+ rapid_traverse(answer, *i);
197
+ }
198
+ break;
199
+ case kNullType:
200
+ case kFalseType:
201
+ case kTrueType:
202
+ case kStringType:
203
+ case kNumberType:
204
+ default:
205
+ break;
206
+ }
207
+ }
208
+
209
+ __attribute__((noinline)) std::vector<int64_t>
210
+ rapid_just_dom(rapidjson::Document &d) {
211
+ std::vector<int64_t> answer;
212
+ rapid_traverse(answer, d);
213
+ remove_duplicates(answer);
214
+ return answer;
215
+ }
216
+
217
+ __attribute__((noinline)) std::vector<int64_t>
218
+ rapid_compute_stats(const simdjson::padded_string &p) {
219
+ std::vector<int64_t> answer;
220
+ char *buffer = (char *)malloc(p.size() + 1);
221
+ memcpy(buffer, p.data(), p.size());
222
+ buffer[p.size()] = '\0';
223
+ rapidjson::Document d;
224
+ d.ParseInsitu<kParseValidateEncodingFlag>(buffer);
225
+ if (d.HasParseError()) {
226
+ free(buffer);
227
+ return answer;
228
+ }
229
+ rapid_traverse(answer, d);
230
+ free(buffer);
231
+ remove_duplicates(answer);
232
+ return answer;
233
+ }
234
+
235
+ __attribute__((noinline)) bool
236
+ rapid_just_parse(const simdjson::padded_string &p) {
237
+ char *buffer = (char *)malloc(p.size() + 1);
238
+ memcpy(buffer, p.data(), p.size());
239
+ buffer[p.size()] = '\0';
240
+ rapidjson::Document d;
241
+ d.ParseInsitu<kParseValidateEncodingFlag>(buffer);
242
+ bool answer = d.HasParseError();
243
+ free(buffer);
244
+ return answer;
245
+ }
246
+
247
+ int main(int argc, char *argv[]) {
248
+ bool verbose = false;
249
+ bool just_data = false;
250
+
251
+ int c;
252
+ while ((c = getopt(argc, argv, "vt")) != -1)
253
+ switch (c) {
254
+ case 't':
255
+ just_data = true;
256
+ break;
257
+ case 'v':
258
+ verbose = true;
259
+ break;
260
+ default:
261
+ abort();
262
+ }
263
+ if (optind >= argc) {
264
+ std::cerr
265
+ << "Using different parsers, we compute the content statistics of "
266
+ "JSON documents."
267
+ << std::endl;
268
+ std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
269
+ std::cerr << "Or " << argv[0] << " -v <jsonfile>" << std::endl;
270
+ exit(1);
271
+ }
272
+ const char *filename = argv[optind];
273
+ if (optind + 1 < argc) {
274
+ std::cerr << "warning: ignoring everything after " << argv[optind + 1]
275
+ << std::endl;
276
+ }
277
+ simdjson::padded_string p;
278
+ try {
279
+ simdjson::get_corpus(filename).swap(p);
280
+ } catch (const std::exception &e) { // caught by reference to base
281
+ std::cout << "Could not load the file " << filename << std::endl;
282
+ return EXIT_FAILURE;
283
+ }
284
+
285
+ if (verbose) {
286
+ std::cout << "Input has ";
287
+ if (p.size() > 1024 * 1024)
288
+ std::cout << p.size() / (1024 * 1024) << " MB ";
289
+ else if (p.size() > 1024)
290
+ std::cout << p.size() / 1024 << " KB ";
291
+ else
292
+ std::cout << p.size() << " B ";
293
+ std::cout << std::endl;
294
+ }
295
+ std::vector<int64_t> s1 = simdjson_compute_stats(p);
296
+ if (verbose) {
297
+ printf("simdjson: ");
298
+ print_vec(s1);
299
+ }
300
+ std::vector<int64_t> s2 = rapid_compute_stats(p);
301
+ if (verbose) {
302
+ printf("rapid: ");
303
+ print_vec(s2);
304
+ }
305
+ std::vector<int64_t> s3 = sasjon_compute_stats(p);
306
+ if (verbose) {
307
+ printf("sasjon: ");
308
+ print_vec(s3);
309
+ }
310
+ assert(s1 == s2);
311
+ assert(s1 == s3);
312
+ size_t size = s1.size();
313
+
314
+ int repeat = 500;
315
+ int volume = p.size();
316
+ if (just_data) {
317
+ printf(
318
+ "name cycles_per_byte cycles_per_byte_err gb_per_s gb_per_s_err \n");
319
+ }
320
+ BEST_TIME("simdjson ", simdjson_compute_stats(p).size(), size, , repeat,
321
+ volume, !just_data);
322
+ BEST_TIME("rapid ", rapid_compute_stats(p).size(), size, , repeat, volume,
323
+ !just_data);
324
+ BEST_TIME("sasjon ", sasjon_compute_stats(p).size(), size, , repeat, volume,
325
+ !just_data);
326
+ BEST_TIME("simdjson (just parse) ", simdjson_just_parse(p), false, , repeat,
327
+ volume, !just_data);
328
+ BEST_TIME("rapid (just parse) ", rapid_just_parse(p), false, , repeat,
329
+ volume, !just_data);
330
+ BEST_TIME("sasjon (just parse) ", sasjon_just_parse(p), false, , repeat,
331
+ volume, !just_data);
332
+ simdjson::ParsedJson dsimdjson = simdjson::build_parsed_json(p);
333
+ BEST_TIME("simdjson (just dom) ", simdjson_just_dom(dsimdjson).size(), size,
334
+ , repeat, volume, !just_data);
335
+ char *buffer = (char *)malloc(p.size());
336
+ memcpy(buffer, p.data(), p.size());
337
+ rapidjson::Document drapid;
338
+ drapid.ParseInsitu<kParseValidateEncodingFlag>(buffer);
339
+ BEST_TIME("rapid (just dom) ", rapid_just_dom(drapid).size(), size, , repeat,
340
+ volume, !just_data);
341
+ memcpy(buffer, p.data(), p.size());
342
+ auto dsasjon = sajson::parse(sajson::dynamic_allocation(),
343
+ sajson::mutable_string_view(p.size(), buffer));
344
+ BEST_TIME("sasjon (just dom) ", sasjon_just_dom(dsasjon).size(), size, ,
345
+ repeat, volume, !just_data);
346
+ free(buffer);
347
+ }
@@ -0,0 +1,93 @@
1
+ // https://github.com/WojciechMula/toys/blob/master/000helpers/linux-perf-events.h
2
+ #pragma once
3
+ #ifdef __linux__
4
+
5
+ #include <asm/unistd.h> // for __NR_perf_event_open
6
+ #include <linux/perf_event.h> // for perf event constants
7
+ #include <sys/ioctl.h> // for ioctl
8
+ #include <unistd.h> // for syscall
9
+
10
+ #include <cerrno> // for errno
11
+ #include <cstring> // for memset
12
+ #include <stdexcept>
13
+
14
+ #include <iostream>
15
+ #include <vector>
16
+
17
+ template <int TYPE = PERF_TYPE_HARDWARE> class LinuxEvents {
18
+ int fd;
19
+ bool working;
20
+ perf_event_attr attribs;
21
+ int num_events;
22
+ std::vector<uint64_t> temp_result_vec;
23
+ std::vector<uint64_t> ids;
24
+
25
+ public:
26
+ explicit LinuxEvents(std::vector<int> config_vec) : fd(0), working(true) {
27
+ memset(&attribs, 0, sizeof(attribs));
28
+ attribs.type = TYPE;
29
+ attribs.size = sizeof(attribs);
30
+ attribs.disabled = 1;
31
+ attribs.exclude_kernel = 1;
32
+ attribs.exclude_hv = 1;
33
+
34
+ attribs.sample_period = 0;
35
+ attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
36
+ const int pid = 0; // the current process
37
+ const int cpu = -1; // all CPUs
38
+ const unsigned long flags = 0;
39
+
40
+ int group = -1; // no group
41
+ num_events = config_vec.size();
42
+ ids.resize(config_vec.size());
43
+ uint32_t i = 0;
44
+ for (auto config : config_vec) {
45
+ attribs.config = config;
46
+ fd = syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags);
47
+ if (fd == -1) {
48
+ report_error("perf_event_open");
49
+ }
50
+ ioctl(fd, PERF_EVENT_IOC_ID, &ids[i++]);
51
+ if (group == -1) {
52
+ group = fd;
53
+ }
54
+ }
55
+
56
+ temp_result_vec.resize(num_events * 2 + 1);
57
+ }
58
+
59
+ ~LinuxEvents() { close(fd); }
60
+
61
+ inline void start() {
62
+ if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
63
+ report_error("ioctl(PERF_EVENT_IOC_RESET)");
64
+ }
65
+
66
+ if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
67
+ report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
68
+ }
69
+ }
70
+
71
+ inline void end(std::vector<unsigned long long> &results) {
72
+ if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
73
+ report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
74
+ }
75
+
76
+ if (read(fd, temp_result_vec.data(), temp_result_vec.size() * 8) == -1) {
77
+ report_error("read");
78
+ }
79
+ // our actual results are in slots 1,3,5, ... of this structure
80
+ // we really should be checking our ids obtained earlier to be safe
81
+ for (uint32_t i = 1; i < temp_result_vec.size(); i += 2) {
82
+ results[i / 2] = temp_result_vec[i];
83
+ }
84
+ }
85
+
86
+ private:
87
+ void report_error(const std::string &context) {
88
+ if (working)
89
+ std::cerr << (context + ": " + std::string(strerror(errno))) << std::endl;
90
+ working = false;
91
+ }
92
+ };
93
+ #endif
@@ -0,0 +1,181 @@
1
+ #include <iostream>
2
+ #include <unistd.h>
3
+
4
+ #include "benchmark.h"
5
+ #include "simdjson/jsonioutil.h"
6
+ #include "simdjson/jsonminifier.h"
7
+ #include "simdjson/jsonparser.h"
8
+
9
+ // #define RAPIDJSON_SSE2 // bad
10
+ // #define RAPIDJSON_SSE42 // bad
11
+ #include "rapidjson/document.h"
12
+ #include "rapidjson/reader.h" // you have to check in the submodule
13
+ #include "rapidjson/stringbuffer.h"
14
+ #include "rapidjson/writer.h"
15
+ #include "sajson.h"
16
+
17
+ using namespace simdjson;
18
+ using namespace rapidjson;
19
+
20
+ std::string rapid_stringme_insitu(char *json) {
21
+ Document d;
22
+ d.ParseInsitu(json);
23
+ if (d.HasParseError()) {
24
+ std::cerr << "problem!" << std::endl;
25
+ return ""; // should do something
26
+ }
27
+ StringBuffer buffer;
28
+ Writer<StringBuffer> writer(buffer);
29
+ d.Accept(writer);
30
+ return buffer.GetString();
31
+ }
32
+
33
+ std::string rapid_stringme(char *json) {
34
+ Document d;
35
+ d.Parse(json);
36
+ if (d.HasParseError()) {
37
+ std::cerr << "problem!" << std::endl;
38
+ return ""; // should do something
39
+ }
40
+ StringBuffer buffer;
41
+ Writer<StringBuffer> writer(buffer);
42
+ d.Accept(writer);
43
+ return buffer.GetString();
44
+ }
45
+
46
+ int main(int argc, char *argv[]) {
47
+ int c;
48
+ bool verbose = false;
49
+ bool just_data = false;
50
+
51
+ while ((c = getopt(argc, argv, "vt")) != -1)
52
+ switch (c) {
53
+ case 't':
54
+ just_data = true;
55
+ break;
56
+ case 'v':
57
+ verbose = true;
58
+ break;
59
+ default:
60
+ abort();
61
+ }
62
+ if (optind >= argc) {
63
+ std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
64
+ exit(1);
65
+ }
66
+ const char *filename = argv[optind];
67
+ simdjson::padded_string p;
68
+ try {
69
+ simdjson::get_corpus(filename).swap(p);
70
+ } catch (const std::exception &e) { // caught by reference to base
71
+ std::cout << "Could not load the file " << filename << std::endl;
72
+ return EXIT_FAILURE;
73
+ }
74
+ if (verbose) {
75
+ std::cout << "Input has ";
76
+ if (p.size() > 1024 * 1024)
77
+ std::cout << p.size() / (1024 * 1024) << " MB ";
78
+ else if (p.size() > 1024)
79
+ std::cout << p.size() / 1024 << " KB ";
80
+ else
81
+ std::cout << p.size() << " B ";
82
+ std::cout << std::endl;
83
+ }
84
+ char *buffer = simdjson::allocate_padded_buffer(p.size() + 1);
85
+ memcpy(buffer, p.data(), p.size());
86
+ buffer[p.size()] = '\0';
87
+
88
+ int repeat = 50;
89
+ int volume = p.size();
90
+ if (just_data) {
91
+ printf(
92
+ "name cycles_per_byte cycles_per_byte_err gb_per_s gb_per_s_err \n");
93
+ }
94
+ size_t strlength = rapid_stringme((char *)p.data()).size();
95
+ if (verbose)
96
+ std::cout << "input length is " << p.size() << " stringified length is "
97
+ << strlength << std::endl;
98
+ BEST_TIME_NOCHECK("despacing with RapidJSON",
99
+ rapid_stringme((char *)p.data()), , repeat, volume,
100
+ !just_data);
101
+ BEST_TIME_NOCHECK(
102
+ "despacing with RapidJSON Insitu", rapid_stringme_insitu((char *)buffer),
103
+ memcpy(buffer, p.data(), p.size()), repeat, volume, !just_data);
104
+ memcpy(buffer, p.data(), p.size());
105
+
106
+ size_t outlength = simdjson::json_minify((const uint8_t *)buffer, p.size(),
107
+ (uint8_t *)buffer);
108
+ if (verbose)
109
+ std::cout << "json_minify length is " << outlength << std::endl;
110
+
111
+ uint8_t *cbuffer = (uint8_t *)buffer;
112
+ BEST_TIME("json_minify", simdjson::json_minify(cbuffer, p.size(), cbuffer),
113
+ outlength, memcpy(buffer, p.data(), p.size()), repeat, volume,
114
+ !just_data);
115
+ printf("minisize = %zu, original size = %zu (minified down to %.2f percent "
116
+ "of original) \n",
117
+ outlength, p.size(), outlength * 100.0 / p.size());
118
+
119
+ /***
120
+ * Is it worth it to minify before parsing?
121
+ ***/
122
+ rapidjson::Document d;
123
+ BEST_TIME("RapidJSON Insitu orig", d.ParseInsitu(buffer).HasParseError(),
124
+ false, memcpy(buffer, p.data(), p.size()), repeat, volume,
125
+ !just_data);
126
+
127
+ char *mini_buffer = simdjson::allocate_padded_buffer(p.size() + 1);
128
+ size_t minisize = simdjson::json_minify((const uint8_t *)p.data(), p.size(),
129
+ (uint8_t *)mini_buffer);
130
+ mini_buffer[minisize] = '\0';
131
+
132
+ BEST_TIME("RapidJSON Insitu despaced", d.ParseInsitu(buffer).HasParseError(),
133
+ false, memcpy(buffer, mini_buffer, p.size()), repeat, volume,
134
+ !just_data);
135
+
136
+ size_t ast_buffer_size = p.size() * 2;
137
+ size_t *ast_buffer = (size_t *)malloc(ast_buffer_size * sizeof(size_t));
138
+
139
+ BEST_TIME(
140
+ "sajson orig",
141
+ sajson::parse(sajson::bounded_allocation(ast_buffer, ast_buffer_size),
142
+ sajson::mutable_string_view(p.size(), buffer))
143
+ .is_valid(),
144
+ true, memcpy(buffer, p.data(), p.size()), repeat, volume, !just_data);
145
+
146
+ BEST_TIME(
147
+ "sajson despaced",
148
+ sajson::parse(sajson::bounded_allocation(ast_buffer, ast_buffer_size),
149
+ sajson::mutable_string_view(minisize, buffer))
150
+ .is_valid(),
151
+ true, memcpy(buffer, mini_buffer, p.size()), repeat, volume, !just_data);
152
+
153
+ simdjson::ParsedJson pj;
154
+ bool is_alloc_ok = pj.allocate_capacity(p.size(), 1024);
155
+ if (!is_alloc_ok) {
156
+ fprintf(stderr, "failed to allocate memory\n");
157
+ return EXIT_FAILURE;
158
+ }
159
+ bool automated_reallocation = false;
160
+ BEST_TIME("simdjson orig",
161
+ simdjson::json_parse((const uint8_t *)buffer, p.size(), pj,
162
+ automated_reallocation),
163
+ true, memcpy(buffer, p.data(), p.size()), repeat, volume,
164
+ !just_data);
165
+
166
+ simdjson::ParsedJson pj2;
167
+ bool is_alloc_ok2 = pj2.allocate_capacity(p.size(), 1024);
168
+ if (!is_alloc_ok2) {
169
+ fprintf(stderr, "failed to allocate memory\n");
170
+ return EXIT_FAILURE;
171
+ }
172
+ automated_reallocation = false;
173
+ BEST_TIME("simdjson despaced",
174
+ simdjson::json_parse((const uint8_t *)buffer, minisize, pj2,
175
+ automated_reallocation),
176
+ true, memcpy(buffer, mini_buffer, p.size()), repeat, volume,
177
+ !just_data);
178
+ free(buffer);
179
+ free(ast_buffer);
180
+ free(mini_buffer);
181
+ }