simdjson 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (132) hide show
  1. checksums.yaml +7 -0
  2. data/.clang-format +5 -0
  3. data/.gitignore +14 -0
  4. data/.gitmodules +3 -0
  5. data/.rubocop.yml +9 -0
  6. data/.travis.yml +7 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +39 -0
  10. data/Rakefile +32 -0
  11. data/benchmark/apache_builds.json +4421 -0
  12. data/benchmark/demo.json +15 -0
  13. data/benchmark/github_events.json +1390 -0
  14. data/benchmark/run_benchmark.rb +30 -0
  15. data/ext/simdjson/extconf.rb +22 -0
  16. data/ext/simdjson/simdjson.cpp +76 -0
  17. data/ext/simdjson/simdjson.hpp +6 -0
  18. data/lib/simdjson/version.rb +3 -0
  19. data/lib/simdjson.rb +2 -0
  20. data/simdjson.gemspec +35 -0
  21. data/vendor/.gitkeep +0 -0
  22. data/vendor/simdjson/AUTHORS +3 -0
  23. data/vendor/simdjson/CMakeLists.txt +63 -0
  24. data/vendor/simdjson/CONTRIBUTORS +27 -0
  25. data/vendor/simdjson/Dockerfile +10 -0
  26. data/vendor/simdjson/LICENSE +201 -0
  27. data/vendor/simdjson/Makefile +203 -0
  28. data/vendor/simdjson/Notes.md +85 -0
  29. data/vendor/simdjson/README.md +581 -0
  30. data/vendor/simdjson/amalgamation.sh +158 -0
  31. data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
  32. data/vendor/simdjson/benchmark/benchmark.h +223 -0
  33. data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
  34. data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
  35. data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
  36. data/vendor/simdjson/benchmark/parse.cpp +393 -0
  37. data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
  38. data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
  39. data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
  40. data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
  41. data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
  42. data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
  43. data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
  44. data/vendor/simdjson/doc/gbps.png +0 -0
  45. data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
  46. data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
  47. data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
  48. data/vendor/simdjson/images/halvarflake.png +0 -0
  49. data/vendor/simdjson/images/logo.png +0 -0
  50. data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
  51. data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
  52. data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
  53. data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
  54. data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
  55. data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
  56. data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
  57. data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
  58. data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
  59. data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
  60. data/vendor/simdjson/include/simdjson/portability.h +172 -0
  61. data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
  62. data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
  63. data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
  64. data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
  65. data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
  66. data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
  67. data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
  68. data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
  69. data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
  70. data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
  71. data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
  72. data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
  73. data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
  74. data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
  75. data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
  76. data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
  77. data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
  78. data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
  79. data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
  80. data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
  81. data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
  82. data/vendor/simdjson/scripts/issue150.sh +14 -0
  83. data/vendor/simdjson/scripts/javascript/README.md +3 -0
  84. data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
  85. data/vendor/simdjson/scripts/minifier.sh +11 -0
  86. data/vendor/simdjson/scripts/parseandstat.sh +24 -0
  87. data/vendor/simdjson/scripts/parser.sh +11 -0
  88. data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
  89. data/vendor/simdjson/scripts/plotparse.sh +98 -0
  90. data/vendor/simdjson/scripts/selectparser.sh +11 -0
  91. data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
  92. data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
  93. data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
  94. data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
  95. data/vendor/simdjson/scripts/testjson2json.sh +99 -0
  96. data/vendor/simdjson/scripts/transitions/Makefile +10 -0
  97. data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
  98. data/vendor/simdjson/singleheader/README.md +1 -0
  99. data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
  100. data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
  101. data/vendor/simdjson/singleheader/simdjson.h +39692 -0
  102. data/vendor/simdjson/src/CMakeLists.txt +67 -0
  103. data/vendor/simdjson/src/jsonioutil.cpp +35 -0
  104. data/vendor/simdjson/src/jsonminifier.cpp +285 -0
  105. data/vendor/simdjson/src/jsonparser.cpp +91 -0
  106. data/vendor/simdjson/src/parsedjson.cpp +323 -0
  107. data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
  108. data/vendor/simdjson/src/simdjson.cpp +30 -0
  109. data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
  110. data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
  111. data/vendor/simdjson/style/clang-format-check.sh +25 -0
  112. data/vendor/simdjson/style/clang-format.sh +25 -0
  113. data/vendor/simdjson/style/run-clang-format.py +326 -0
  114. data/vendor/simdjson/tape.md +134 -0
  115. data/vendor/simdjson/tests/CMakeLists.txt +25 -0
  116. data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
  117. data/vendor/simdjson/tests/basictests.cpp +75 -0
  118. data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
  119. data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
  120. data/vendor/simdjson/tests/pointercheck.cpp +38 -0
  121. data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
  122. data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
  123. data/vendor/simdjson/tools/CMakeLists.txt +3 -0
  124. data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
  125. data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
  126. data/vendor/simdjson/tools/json2json.cpp +112 -0
  127. data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
  128. data/vendor/simdjson/tools/jsonstats.cpp +143 -0
  129. data/vendor/simdjson/tools/minify.cpp +21 -0
  130. data/vendor/simdjson/tools/release.py +125 -0
  131. data/vendor/simdjson/windows/dirent_portable.h +1043 -0
  132. metadata +273 -0
@@ -0,0 +1,298 @@
1
+ #include "simdjson/jsonparser.h"
2
+ #ifndef _MSC_VER
3
+ #include "linux-perf-events.h"
4
+ #include <unistd.h>
5
+ #ifdef __linux__
6
+ #include <libgen.h>
7
+ #endif //__linux__
8
+ #endif // _MSC_VER
9
+
10
+ #include <memory>
11
+
12
+ #include "benchmark.h"
13
+
14
+ // #define RAPIDJSON_SSE2 // bad for performance
15
+ // #define RAPIDJSON_SSE42 // bad for performance
16
+ #include "rapidjson/document.h"
17
+ #include "rapidjson/reader.h"
18
+ #include "rapidjson/stringbuffer.h"
19
+ #include "rapidjson/writer.h"
20
+
21
+ #include "sajson.h"
22
+
23
+ #include <nlohmann/json.hpp>
24
+ using json = nlohmann::json;
25
+
26
+ #ifdef ALLPARSER
27
+
28
+ #include "fastjson.cpp"
29
+ #include "fastjson_dom.cpp"
30
+ #include "gason.cpp"
31
+
32
+ #include "json11.cpp"
33
+ extern "C" {
34
+ #include "cJSON.c"
35
+ #include "cJSON.h"
36
+ #include "jsmn.c"
37
+ #include "jsmn.h"
38
+ #include "ujdecode.h"
39
+ #include "ultrajsondec.c"
40
+ }
41
+
42
+ #include "jsoncpp.cpp"
43
+ #include "json/json.h"
44
+
45
+ #endif
46
+
47
+ using namespace rapidjson;
48
+
49
+ #ifdef ALLPARSER
50
+ // fastjson has a tricky interface
51
+ void on_json_error(void *, const fastjson::ErrorContext &ec) {
52
+ // std::cerr<<"ERROR: "<<ec.mesg<<std::endl;
53
+ }
54
+ bool fastjson_parse(const char *input) {
55
+ fastjson::Token token;
56
+ fastjson::dom::Chunk chunk;
57
+ return fastjson::dom::parse_string(input, &token, &chunk, 0, &on_json_error,
58
+ NULL);
59
+ }
60
+ // end of fastjson stuff
61
+ #endif
62
+
63
+ int main(int argc, char *argv[]) {
64
+ bool verbose = false;
65
+ bool just_data = false;
66
+ int c;
67
+ while ((c = getopt(argc, argv, "vt")) != -1)
68
+ switch (c) {
69
+ case 't':
70
+ just_data = true;
71
+ break;
72
+ case 'v':
73
+ verbose = true;
74
+ break;
75
+ default:
76
+ abort();
77
+ }
78
+ if (optind >= argc) {
79
+ std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
80
+ std::cerr << "Or " << argv[0] << " -v <jsonfile>" << std::endl;
81
+ std::cerr << "The '-t' flag outputs a table. " << std::endl;
82
+ exit(1);
83
+ }
84
+ const char *filename = argv[optind];
85
+ if (optind + 1 < argc) {
86
+ std::cerr << "warning: ignoring everything after " << argv[optind + 1]
87
+ << std::endl;
88
+ }
89
+ simdjson::padded_string p;
90
+ try {
91
+ simdjson::get_corpus(filename).swap(p);
92
+ } catch (const std::exception &e) { // caught by reference to base
93
+ std::cout << "Could not load the file " << filename << std::endl;
94
+ return EXIT_FAILURE;
95
+ }
96
+
97
+ if (verbose) {
98
+ std::cout << "Input has ";
99
+ if (p.size() > 1024 * 1024)
100
+ std::cout << p.size() / (1024 * 1024) << " MB ";
101
+ else if (p.size() > 1024)
102
+ std::cout << p.size() / 1024 << " KB ";
103
+ else
104
+ std::cout << p.size() << " B ";
105
+ std::cout << std::endl;
106
+ }
107
+ simdjson::ParsedJson pj;
108
+ bool allocok = pj.allocate_capacity(p.size(), 1024);
109
+
110
+ if (!allocok) {
111
+ std::cerr << "can't allocate memory" << std::endl;
112
+ return EXIT_FAILURE;
113
+ }
114
+ int repeat = (p.size() < 1 * 1000 * 1000 ? 1000 : 10);
115
+ int volume = p.size();
116
+ if (just_data) {
117
+ printf("%-42s %20s %20s %20s %20s \n", "name", "cycles_per_byte",
118
+ "cycles_per_byte_err", "gb_per_s", "gb_per_s_err");
119
+ }
120
+ if (!just_data)
121
+ BEST_TIME("simdjson (dynamic mem) ", build_parsed_json(p).is_valid(), true,
122
+ , repeat, volume, !just_data);
123
+ // (static alloc)
124
+ BEST_TIME("simdjson ", json_parse(p, pj), simdjson::SUCCESS, , repeat, volume,
125
+ !just_data);
126
+
127
+ rapidjson::Document d;
128
+
129
+ char *buffer = (char *)malloc(p.size() + 1);
130
+ memcpy(buffer, p.data(), p.size());
131
+ buffer[p.size()] = '\0';
132
+ #ifndef ALLPARSER
133
+ if (!just_data)
134
+ #endif
135
+ BEST_TIME("RapidJSON ",
136
+ d.Parse<kParseValidateEncodingFlag>((const char *)buffer)
137
+ .HasParseError(),
138
+ false, memcpy(buffer, p.data(), p.size()), repeat, volume,
139
+ !just_data);
140
+ BEST_TIME("RapidJSON (insitu)",
141
+ d.ParseInsitu<kParseValidateEncodingFlag>(buffer).HasParseError(),
142
+ false,
143
+ memcpy(buffer, p.data(), p.size()) && (buffer[p.size()] = '\0'),
144
+ repeat, volume, !just_data);
145
+ #ifndef ALLPARSER
146
+ if (!just_data)
147
+ #endif
148
+ BEST_TIME("sajson (dynamic mem)",
149
+ sajson::parse(sajson::dynamic_allocation(),
150
+ sajson::mutable_string_view(p.size(), buffer))
151
+ .is_valid(),
152
+ true, memcpy(buffer, p.data(), p.size()), repeat, volume,
153
+ !just_data);
154
+
155
+ size_t ast_buffer_size = p.size();
156
+ size_t *ast_buffer = (size_t *)malloc(ast_buffer_size * sizeof(size_t));
157
+ // (static alloc, insitu)
158
+ BEST_TIME(
159
+ "sajson",
160
+ sajson::parse(sajson::bounded_allocation(ast_buffer, ast_buffer_size),
161
+ sajson::mutable_string_view(p.size(), buffer))
162
+ .is_valid(),
163
+ true, memcpy(buffer, p.data(), p.size()), repeat, volume, !just_data);
164
+ size_t expected = json::parse(p.data(), p.data() + p.size()).size();
165
+ BEST_TIME("nlohmann-json", json::parse(buffer, buffer + p.size()).size(),
166
+ expected, memcpy(buffer, p.data(), p.size()), repeat, volume,
167
+ !just_data);
168
+
169
+ #ifdef ALLPARSER
170
+ std::string json11err;
171
+ BEST_TIME("dropbox (json11) ",
172
+ ((json11::Json::parse(buffer, json11err).is_null()) ||
173
+ (!json11err.empty())),
174
+ false, memcpy(buffer, p.data(), p.size()), repeat, volume,
175
+ !just_data);
176
+
177
+ BEST_TIME("fastjson ", fastjson_parse(buffer), true,
178
+ memcpy(buffer, p.data(), p.size()), repeat, volume, !just_data);
179
+ JsonValue value;
180
+ JsonAllocator allocator;
181
+ char *endptr;
182
+ BEST_TIME("gason ", jsonParse(buffer, &endptr, &value, allocator),
183
+ JSON_OK, memcpy(buffer, p.data(), p.size()), repeat, volume,
184
+ !just_data);
185
+ void *state;
186
+ BEST_TIME("ultrajson ",
187
+ (UJDecode(buffer, p.size(), NULL, &state) == NULL), false,
188
+ memcpy(buffer, p.data(), p.size()), repeat, volume, !just_data);
189
+
190
+ {
191
+ std::unique_ptr<jsmntok_t[]> tokens =
192
+ std::make_unique<jsmntok_t[]>(p.size());
193
+ jsmn_parser parser;
194
+ jsmn_init(&parser);
195
+ memcpy(buffer, p.data(), p.size());
196
+ buffer[p.size()] = '\0';
197
+ BEST_TIME(
198
+ "jsmn ",
199
+ (jsmn_parse(&parser, buffer, p.size(), tokens.get(), p.size()) > 0),
200
+ true, jsmn_init(&parser), repeat, volume, !just_data);
201
+ }
202
+ memcpy(buffer, p.data(), p.size());
203
+ buffer[p.size()] = '\0';
204
+ cJSON *tree = cJSON_Parse(buffer);
205
+ BEST_TIME("cJSON ", ((tree = cJSON_Parse(buffer)) != NULL), true,
206
+ cJSON_Delete(tree), repeat, volume, !just_data);
207
+ cJSON_Delete(tree);
208
+
209
+ Json::CharReaderBuilder b;
210
+ Json::CharReader *json_cpp_reader = b.newCharReader();
211
+ Json::Value root;
212
+ Json::String errs;
213
+ BEST_TIME("jsoncpp ",
214
+ json_cpp_reader->parse(buffer, buffer + volume, &root, &errs), true,
215
+ , repeat, volume, !just_data);
216
+ delete json_cpp_reader;
217
+ #endif
218
+ if (!just_data)
219
+ BEST_TIME("memcpy ",
220
+ (memcpy(buffer, p.data(), p.size()) == buffer), true, , repeat,
221
+ volume, !just_data);
222
+ #ifdef __linux__
223
+ if (!just_data) {
224
+ printf("\n \n <doing additional analysis with performance counters (Linux "
225
+ "only)>\n");
226
+ std::vector<int> evts;
227
+ evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
228
+ evts.push_back(PERF_COUNT_HW_INSTRUCTIONS);
229
+ evts.push_back(PERF_COUNT_HW_BRANCH_MISSES);
230
+ evts.push_back(PERF_COUNT_HW_CACHE_REFERENCES);
231
+ evts.push_back(PERF_COUNT_HW_CACHE_MISSES);
232
+ LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
233
+ std::vector<unsigned long long> results;
234
+ std::vector<unsigned long long> stats;
235
+ results.resize(evts.size());
236
+ stats.resize(evts.size());
237
+ std::fill(stats.begin(), stats.end(), 0); // unnecessary
238
+ for (int i = 0; i < repeat; i++) {
239
+ unified.start();
240
+ if (json_parse(p, pj) != simdjson::SUCCESS)
241
+ printf("bug\n");
242
+ unified.end(results);
243
+ std::transform(stats.begin(), stats.end(), results.begin(), stats.begin(),
244
+ std::plus<unsigned long long>());
245
+ }
246
+ printf("simdjson : cycles %10.0f instructions %10.0f branchmisses %10.0f "
247
+ "cacheref %10.0f cachemisses %10.0f bytespercachemiss %10.0f "
248
+ "inspercycle %10.1f insperbyte %10.1f\n",
249
+ stats[0] * 1.0 / repeat, stats[1] * 1.0 / repeat,
250
+ stats[2] * 1.0 / repeat, stats[3] * 1.0 / repeat,
251
+ stats[4] * 1.0 / repeat, volume * repeat * 1.0 / stats[2],
252
+ stats[1] * 1.0 / stats[0], stats[1] * 1.0 / (volume * repeat));
253
+
254
+ std::fill(stats.begin(), stats.end(), 0);
255
+ for (int i = 0; i < repeat; i++) {
256
+ memcpy(buffer, p.data(), p.size());
257
+ buffer[p.size()] = '\0';
258
+ unified.start();
259
+ if (d.ParseInsitu<kParseValidateEncodingFlag>(buffer).HasParseError() !=
260
+ false)
261
+ printf("bug\n");
262
+ unified.end(results);
263
+ std::transform(stats.begin(), stats.end(), results.begin(), stats.begin(),
264
+ std::plus<unsigned long long>());
265
+ }
266
+ printf("RapidJSON: cycles %10.0f instructions %10.0f branchmisses %10.0f "
267
+ "cacheref %10.0f cachemisses %10.0f bytespercachemiss %10.0f "
268
+ "inspercycle %10.1f insperbyte %10.1f\n",
269
+ stats[0] * 1.0 / repeat, stats[1] * 1.0 / repeat,
270
+ stats[2] * 1.0 / repeat, stats[3] * 1.0 / repeat,
271
+ stats[4] * 1.0 / repeat, volume * repeat * 1.0 / stats[2],
272
+ stats[1] * 1.0 / stats[0], stats[1] * 1.0 / (volume * repeat));
273
+
274
+ std::fill(stats.begin(), stats.end(), 0); // unnecessary
275
+ for (int i = 0; i < repeat; i++) {
276
+ memcpy(buffer, p.data(), p.size());
277
+ unified.start();
278
+ if (sajson::parse(sajson::bounded_allocation(ast_buffer, ast_buffer_size),
279
+ sajson::mutable_string_view(p.size(), buffer))
280
+ .is_valid() != true)
281
+ printf("bug\n");
282
+ unified.end(results);
283
+ std::transform(stats.begin(), stats.end(), results.begin(), stats.begin(),
284
+ std::plus<unsigned long long>());
285
+ }
286
+ printf("sajson : cycles %10.0f instructions %10.0f branchmisses %10.0f "
287
+ "cacheref %10.0f cachemisses %10.0f bytespercachemiss %10.0f "
288
+ "inspercycle %10.1f insperbyte %10.1f\n",
289
+ stats[0] * 1.0 / repeat, stats[1] * 1.0 / repeat,
290
+ stats[2] * 1.0 / repeat, stats[3] * 1.0 / repeat,
291
+ stats[4] * 1.0 / repeat, volume * repeat * 1.0 / stats[2],
292
+ stats[1] * 1.0 / stats[0], stats[1] * 1.0 / (volume * repeat));
293
+ }
294
+ #endif // __linux__
295
+
296
+ free(ast_buffer);
297
+ free(buffer);
298
+ }
@@ -0,0 +1,208 @@
1
+ #include <iostream>
2
+ #ifndef _MSC_VER
3
+ #include <unistd.h>
4
+ #endif
5
+ #include "simdjson/jsonioutil.h"
6
+ #include "simdjson/jsonparser.h"
7
+ #ifdef __linux__
8
+ #include "linux-perf-events.h"
9
+ #endif
10
+
11
+ size_t count_nonasciibytes(const uint8_t *input, size_t length) {
12
+ size_t count = 0;
13
+ for (size_t i = 0; i < length; i++) {
14
+ count += input[i] >> 7;
15
+ }
16
+ return count;
17
+ }
18
+
19
+ size_t count_backslash(const uint8_t *input, size_t length) {
20
+ size_t count = 0;
21
+ for (size_t i = 0; i < length; i++) {
22
+ count += (input[i] == '\\') ? 1 : 0;
23
+ }
24
+ return count;
25
+ }
26
+
27
+ struct stat_s {
28
+ size_t integer_count;
29
+ size_t float_count;
30
+ size_t string_count;
31
+ size_t backslash_count;
32
+ size_t non_ascii_byte_count;
33
+ size_t object_count;
34
+ size_t array_count;
35
+ size_t null_count;
36
+ size_t true_count;
37
+ size_t false_count;
38
+ size_t byte_count;
39
+ size_t structural_indexes_count;
40
+ bool valid;
41
+ };
42
+
43
+ using stat_t = struct stat_s;
44
+
45
+ stat_t simdjson_compute_stats(const simdjson::padded_string &p) {
46
+ stat_t answer;
47
+ simdjson::ParsedJson pj = simdjson::build_parsed_json(p);
48
+ answer.valid = pj.is_valid();
49
+ if (!answer.valid) {
50
+ return answer;
51
+ }
52
+ answer.backslash_count =
53
+ count_backslash(reinterpret_cast<const uint8_t *>(p.data()), p.size());
54
+ answer.non_ascii_byte_count = count_nonasciibytes(
55
+ reinterpret_cast<const uint8_t *>(p.data()), p.size());
56
+ answer.byte_count = p.size();
57
+ answer.integer_count = 0;
58
+ answer.float_count = 0;
59
+ answer.object_count = 0;
60
+ answer.array_count = 0;
61
+ answer.null_count = 0;
62
+ answer.true_count = 0;
63
+ answer.false_count = 0;
64
+ answer.string_count = 0;
65
+ answer.structural_indexes_count = pj.n_structural_indexes;
66
+ size_t tape_idx = 0;
67
+ uint64_t tape_val = pj.tape[tape_idx++];
68
+ uint8_t type = (tape_val >> 56);
69
+ size_t how_many = 0;
70
+ assert(type == 'r');
71
+ how_many = tape_val & JSON_VALUE_MASK;
72
+ for (; tape_idx < how_many; tape_idx++) {
73
+ tape_val = pj.tape[tape_idx];
74
+ // uint64_t payload = tape_val & JSON_VALUE_MASK;
75
+ type = (tape_val >> 56);
76
+ switch (type) {
77
+ case 'l': // we have a long int
78
+ answer.integer_count++;
79
+ tape_idx++; // skipping the integer
80
+ break;
81
+ case 'd': // we have a double
82
+ answer.float_count++;
83
+ tape_idx++; // skipping the double
84
+ break;
85
+ case 'n': // we have a null
86
+ answer.null_count++;
87
+ break;
88
+ case 't': // we have a true
89
+ answer.true_count++;
90
+ break;
91
+ case 'f': // we have a false
92
+ answer.false_count++;
93
+ break;
94
+ case '{': // we have an object
95
+ answer.object_count++;
96
+ break;
97
+ case '}': // we end an object
98
+ break;
99
+ case '[': // we start an array
100
+ answer.array_count++;
101
+ break;
102
+ case ']': // we end an array
103
+ break;
104
+ case '"': // we have a string
105
+ answer.string_count++;
106
+ break;
107
+ default:
108
+ break; // ignore
109
+ }
110
+ }
111
+ return answer;
112
+ }
113
+
114
+ int main(int argc, char *argv[]) {
115
+ #ifndef _MSC_VER
116
+ int c;
117
+ while ((c = getopt(argc, argv, "")) != -1) {
118
+ switch (c) {
119
+
120
+ default:
121
+ abort();
122
+ }
123
+ }
124
+ #else
125
+ int optind = 1;
126
+ #endif
127
+ if (optind >= argc) {
128
+ std::cerr << "Reads json, prints stats. " << std::endl;
129
+ std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
130
+
131
+ exit(1);
132
+ }
133
+ const char *filename = argv[optind];
134
+ if (optind + 1 < argc) {
135
+ std::cerr << "warning: ignoring everything after " << argv[optind + 1]
136
+ << std::endl;
137
+ }
138
+ simdjson::padded_string p;
139
+ try {
140
+ simdjson::get_corpus(filename).swap(p);
141
+ } catch (const std::exception &) { // caught by reference to base
142
+ std::cerr << "Could not load the file " << filename << std::endl;
143
+ return EXIT_FAILURE;
144
+ }
145
+ stat_t s = simdjson_compute_stats(p);
146
+ if (!s.valid) {
147
+ std::cerr << "not a valid JSON" << std::endl;
148
+ return EXIT_FAILURE;
149
+ }
150
+
151
+ printf("# integer_count float_count string_count backslash_count "
152
+ "non_ascii_byte_count object_count array_count null_count true_count "
153
+ "false_count byte_count structural_indexes_count ");
154
+ #ifdef __linux__
155
+ printf(" stage1_cycle_count stage1_instruction_count stage2_cycle_count "
156
+ " stage2_instruction_count stage3_cycle_count "
157
+ "stage3_instruction_count ");
158
+ #else
159
+ printf("(you are not under linux, so perf counters are disaabled)");
160
+ #endif
161
+ printf("\n");
162
+ printf("%zu %zu %zu %zu %zu %zu %zu %zu %zu %zu %zu %zu ", s.integer_count,
163
+ s.float_count, s.string_count, s.backslash_count,
164
+ s.non_ascii_byte_count, s.object_count, s.array_count, s.null_count,
165
+ s.true_count, s.false_count, s.byte_count, s.structural_indexes_count);
166
+ #ifdef __linux__
167
+ simdjson::ParsedJson pj;
168
+ bool allocok = pj.allocate_capacity(p.size());
169
+ if (!allocok) {
170
+ std::cerr << "failed to allocate memory" << std::endl;
171
+ return EXIT_FAILURE;
172
+ }
173
+ const uint32_t iterations = p.size() < 1 * 1000 * 1000 ? 1000 : 50;
174
+ std::vector<int> evts;
175
+ evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
176
+ evts.push_back(PERF_COUNT_HW_INSTRUCTIONS);
177
+ LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
178
+ unsigned long cy1 = 0, cy2 = 0;
179
+ unsigned long cl1 = 0, cl2 = 0;
180
+ std::vector<unsigned long long> results;
181
+ results.resize(evts.size());
182
+ for (uint32_t i = 0; i < iterations; i++) {
183
+ unified.start();
184
+ // The default template is simdjson::Architecture::NATIVE.
185
+ bool isok = (simdjson::find_structural_bits<>(p.data(), p.size(), pj) ==
186
+ simdjson::SUCCESS);
187
+ unified.end(results);
188
+
189
+ cy1 += results[0];
190
+ cl1 += results[1];
191
+
192
+ unified.start();
193
+ isok =
194
+ isok && (simdjson::SUCCESS == unified_machine(p.data(), p.size(), pj));
195
+ unified.end(results);
196
+
197
+ cy2 += results[0];
198
+ cl2 += results[1];
199
+ if (!isok) {
200
+ std::cerr << "failure?" << std::endl;
201
+ }
202
+ }
203
+ printf("%f %f %f %f ", cy1 * 1.0 / iterations, cl1 * 1.0 / iterations,
204
+ cy2 * 1.0 / iterations, cl2 * 1.0 / iterations);
205
+ #endif // __linux__
206
+ printf("\n");
207
+ return EXIT_SUCCESS;
208
+ }