simdjson 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. checksums.yaml +7 -0
  2. data/.clang-format +5 -0
  3. data/.gitignore +14 -0
  4. data/.gitmodules +3 -0
  5. data/.rubocop.yml +9 -0
  6. data/.travis.yml +7 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +39 -0
  10. data/Rakefile +32 -0
  11. data/benchmark/apache_builds.json +4421 -0
  12. data/benchmark/demo.json +15 -0
  13. data/benchmark/github_events.json +1390 -0
  14. data/benchmark/run_benchmark.rb +30 -0
  15. data/ext/simdjson/extconf.rb +22 -0
  16. data/ext/simdjson/simdjson.cpp +76 -0
  17. data/ext/simdjson/simdjson.hpp +6 -0
  18. data/lib/simdjson/version.rb +3 -0
  19. data/lib/simdjson.rb +2 -0
  20. data/simdjson.gemspec +35 -0
  21. data/vendor/.gitkeep +0 -0
  22. data/vendor/simdjson/AUTHORS +3 -0
  23. data/vendor/simdjson/CMakeLists.txt +63 -0
  24. data/vendor/simdjson/CONTRIBUTORS +27 -0
  25. data/vendor/simdjson/Dockerfile +10 -0
  26. data/vendor/simdjson/LICENSE +201 -0
  27. data/vendor/simdjson/Makefile +203 -0
  28. data/vendor/simdjson/Notes.md +85 -0
  29. data/vendor/simdjson/README.md +581 -0
  30. data/vendor/simdjson/amalgamation.sh +158 -0
  31. data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
  32. data/vendor/simdjson/benchmark/benchmark.h +223 -0
  33. data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
  34. data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
  35. data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
  36. data/vendor/simdjson/benchmark/parse.cpp +393 -0
  37. data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
  38. data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
  39. data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
  40. data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
  41. data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
  42. data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
  43. data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
  44. data/vendor/simdjson/doc/gbps.png +0 -0
  45. data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
  46. data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
  47. data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
  48. data/vendor/simdjson/images/halvarflake.png +0 -0
  49. data/vendor/simdjson/images/logo.png +0 -0
  50. data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
  51. data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
  52. data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
  53. data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
  54. data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
  55. data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
  56. data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
  57. data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
  58. data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
  59. data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
  60. data/vendor/simdjson/include/simdjson/portability.h +172 -0
  61. data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
  62. data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
  63. data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
  64. data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
  65. data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
  66. data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
  67. data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
  68. data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
  69. data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
  70. data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
  71. data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
  72. data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
  73. data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
  74. data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
  75. data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
  76. data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
  77. data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
  78. data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
  79. data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
  80. data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
  81. data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
  82. data/vendor/simdjson/scripts/issue150.sh +14 -0
  83. data/vendor/simdjson/scripts/javascript/README.md +3 -0
  84. data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
  85. data/vendor/simdjson/scripts/minifier.sh +11 -0
  86. data/vendor/simdjson/scripts/parseandstat.sh +24 -0
  87. data/vendor/simdjson/scripts/parser.sh +11 -0
  88. data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
  89. data/vendor/simdjson/scripts/plotparse.sh +98 -0
  90. data/vendor/simdjson/scripts/selectparser.sh +11 -0
  91. data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
  92. data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
  93. data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
  94. data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
  95. data/vendor/simdjson/scripts/testjson2json.sh +99 -0
  96. data/vendor/simdjson/scripts/transitions/Makefile +10 -0
  97. data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
  98. data/vendor/simdjson/singleheader/README.md +1 -0
  99. data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
  100. data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
  101. data/vendor/simdjson/singleheader/simdjson.h +39692 -0
  102. data/vendor/simdjson/src/CMakeLists.txt +67 -0
  103. data/vendor/simdjson/src/jsonioutil.cpp +35 -0
  104. data/vendor/simdjson/src/jsonminifier.cpp +285 -0
  105. data/vendor/simdjson/src/jsonparser.cpp +91 -0
  106. data/vendor/simdjson/src/parsedjson.cpp +323 -0
  107. data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
  108. data/vendor/simdjson/src/simdjson.cpp +30 -0
  109. data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
  110. data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
  111. data/vendor/simdjson/style/clang-format-check.sh +25 -0
  112. data/vendor/simdjson/style/clang-format.sh +25 -0
  113. data/vendor/simdjson/style/run-clang-format.py +326 -0
  114. data/vendor/simdjson/tape.md +134 -0
  115. data/vendor/simdjson/tests/CMakeLists.txt +25 -0
  116. data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
  117. data/vendor/simdjson/tests/basictests.cpp +75 -0
  118. data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
  119. data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
  120. data/vendor/simdjson/tests/pointercheck.cpp +38 -0
  121. data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
  122. data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
  123. data/vendor/simdjson/tools/CMakeLists.txt +3 -0
  124. data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
  125. data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
  126. data/vendor/simdjson/tools/json2json.cpp +112 -0
  127. data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
  128. data/vendor/simdjson/tools/jsonstats.cpp +143 -0
  129. data/vendor/simdjson/tools/minify.cpp +21 -0
  130. data/vendor/simdjson/tools/release.py +125 -0
  131. data/vendor/simdjson/windows/dirent_portable.h +1043 -0
  132. metadata +273 -0
@@ -0,0 +1,56 @@
1
+ #ifndef SIMDJSON_STRINGPARSING_ARM64_H
2
+ #define SIMDJSON_STRINGPARSING_ARM64_H
3
+
4
+ #include "simdjson/stringparsing.h"
5
+ #include "simdjson/stringparsing_macros.h"
6
+
7
+ #ifdef IS_ARM64
8
+ namespace simdjson {
9
+ template <>
10
+ really_inline parse_string_helper
11
+ find_bs_bits_and_quote_bits<Architecture::ARM64>(const uint8_t *src,
12
+ uint8_t *dst) {
13
+ // this can read up to 31 bytes beyond the buffer size, but we require
14
+ // SIMDJSON_PADDING of padding
15
+ static_assert(2 * sizeof(uint8x16_t) - 1 <= SIMDJSON_PADDING);
16
+ uint8x16_t v0 = vld1q_u8(src);
17
+ uint8x16_t v1 = vld1q_u8(src + 16);
18
+ vst1q_u8(dst, v0);
19
+ vst1q_u8(dst + 16, v1);
20
+
21
+ uint8x16_t bs_mask = vmovq_n_u8('\\');
22
+ uint8x16_t qt_mask = vmovq_n_u8('"');
23
+ const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
24
+ 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
25
+ uint8x16_t cmp_bs_0 = vceqq_u8(v0, bs_mask);
26
+ uint8x16_t cmp_bs_1 = vceqq_u8(v1, bs_mask);
27
+ uint8x16_t cmp_qt_0 = vceqq_u8(v0, qt_mask);
28
+ uint8x16_t cmp_qt_1 = vceqq_u8(v1, qt_mask);
29
+
30
+ cmp_bs_0 = vandq_u8(cmp_bs_0, bit_mask);
31
+ cmp_bs_1 = vandq_u8(cmp_bs_1, bit_mask);
32
+ cmp_qt_0 = vandq_u8(cmp_qt_0, bit_mask);
33
+ cmp_qt_1 = vandq_u8(cmp_qt_1, bit_mask);
34
+
35
+ uint8x16_t sum0 = vpaddq_u8(cmp_bs_0, cmp_bs_1);
36
+ uint8x16_t sum1 = vpaddq_u8(cmp_qt_0, cmp_qt_1);
37
+ sum0 = vpaddq_u8(sum0, sum1);
38
+ sum0 = vpaddq_u8(sum0, sum0);
39
+ return {
40
+ vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 0), // bs_bits
41
+ vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 1) // quote_bits
42
+ };
43
+ }
44
+
45
+ template <>
46
+ WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER
47
+ really_inline bool
48
+ parse_string<Architecture::ARM64>(UNUSED const uint8_t *buf,
49
+ UNUSED size_t len, ParsedJson &pj,
50
+ UNUSED const uint32_t depth,
51
+ UNUSED uint32_t offset) {
52
+ PARSE_STRING(Architecture::ARM64, buf, len, pj, depth, offset);
53
+ }
54
+ } // namespace simdjson
55
+ #endif
56
+ #endif
@@ -0,0 +1,43 @@
1
+ #ifndef SIMDJSON_STRINGPARSING_HASWELL_H
2
+ #define SIMDJSON_STRINGPARSING_HASWELL_H
3
+
4
+ #include "simdjson/stringparsing.h"
5
+ #include "simdjson/stringparsing_macros.h"
6
+
7
+ #ifdef IS_X86_64
8
+ TARGET_HASWELL
9
+ namespace simdjson {
10
+ template <>
11
+ really_inline parse_string_helper
12
+ find_bs_bits_and_quote_bits<Architecture::HASWELL>(const uint8_t *src,
13
+ uint8_t *dst) {
14
+ // this can read up to 31 bytes beyond the buffer size, but we require
15
+ // SIMDJSON_PADDING of padding
16
+ static_assert(sizeof(__m256i) - 1 <= SIMDJSON_PADDING);
17
+ __m256i v = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
18
+ // store to dest unconditionally - we can overwrite the bits we don't like
19
+ // later
20
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), v);
21
+ auto quote_mask = _mm256_cmpeq_epi8(v, _mm256_set1_epi8('"'));
22
+ return {
23
+ static_cast<uint32_t>(_mm256_movemask_epi8(
24
+ _mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\')))), // bs_bits
25
+ static_cast<uint32_t>(_mm256_movemask_epi8(quote_mask)) // quote_bits
26
+ };
27
+ }
28
+
29
+ template <>
30
+ WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER
31
+ really_inline bool
32
+ parse_string<Architecture::HASWELL>(UNUSED const uint8_t *buf,
33
+ UNUSED size_t len, ParsedJson &pj,
34
+ UNUSED const uint32_t depth,
35
+ UNUSED uint32_t offset) {
36
+ PARSE_STRING(Architecture::HASWELL, buf, len, pj, depth, offset);
37
+ }
38
+
39
+ } // namespace simdjson
40
+ UNTARGET_REGION
41
+ #endif
42
+
43
+ #endif
@@ -0,0 +1,88 @@
1
+ #ifndef SIMDJSON_STRINGPARSING_MACROS_H
2
+ #define SIMDJSON_STRINGPARSING_MACROS_H
3
+
4
+ // We need to compile that code for multiple architectures. However, target
5
+ // attributes can be used only once by function definition. Huge macro seemed
6
+ // better than huge code duplication.ç
7
+ // bool PARSE_STRING(Architecture T, const uint8_t *buf, size_t len, ParsedJson
8
+ // &pj,const uint32_t depth, uint32_t offset)
9
+ #define PARSE_STRING(T, buf, len, pj, depth, offset) \
10
+ { \
11
+ pj.write_tape(pj.current_string_buf_loc - pj.string_buf, '"'); \
12
+ const uint8_t *src = \
13
+ &buf[offset + 1]; /* we know that buf at offset is a " */ \
14
+ uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t); \
15
+ const uint8_t *const start_of_string = dst; \
16
+ while (1) { \
17
+ parse_string_helper helper = find_bs_bits_and_quote_bits<T>(src, dst); \
18
+ if (((helper.bs_bits - 1) & helper.quote_bits) != 0) { \
19
+ /* we encountered quotes first. Move dst to point to quotes and exit \
20
+ */ \
21
+ \
22
+ /* find out where the quote is... */ \
23
+ uint32_t quote_dist = trailing_zeroes(helper.quote_bits); \
24
+ \
25
+ /* NULL termination is still handy if you expect all your strings to \
26
+ * be NULL terminated? */ \
27
+ /* It comes at a small cost */ \
28
+ dst[quote_dist] = 0; \
29
+ \
30
+ uint32_t str_length = (dst - start_of_string) + quote_dist; \
31
+ memcpy(pj.current_string_buf_loc, &str_length, sizeof(uint32_t)); \
32
+ /***************************** \
33
+ * Above, check for overflow in case someone has a crazy string \
34
+ * (>=4GB?) _ \
35
+ * But only add the overflow check when the document itself exceeds \
36
+ * 4GB \
37
+ * Currently unneeded because we refuse to parse docs larger or equal \
38
+ * to 4GB. \
39
+ ****************************/ \
40
+ \
41
+ /* we advance the point, accounting for the fact that we have a NULL \
42
+ * termination */ \
43
+ pj.current_string_buf_loc = dst + quote_dist + 1; \
44
+ return true; \
45
+ } \
46
+ if (((helper.quote_bits - 1) & helper.bs_bits) != 0) { \
47
+ /* find out where the backspace is */ \
48
+ uint32_t bs_dist = trailing_zeroes(helper.bs_bits); \
49
+ uint8_t escape_char = src[bs_dist + 1]; \
50
+ /* we encountered backslash first. Handle backslash */ \
51
+ if (escape_char == 'u') { \
52
+ /* move src/dst up to the start; they will be further adjusted \
53
+ within the unicode codepoint handling code. */ \
54
+ src += bs_dist; \
55
+ dst += bs_dist; \
56
+ if (!handle_unicode_codepoint(&src, &dst)) { \
57
+ return false; \
58
+ } \
59
+ } else { \
60
+ /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and \
61
+ * write bs_dist+1 characters to output \
62
+ * note this may reach beyond the part of the buffer we've actually \
63
+ * seen. I think this is ok */ \
64
+ uint8_t escape_result = escape_map[escape_char]; \
65
+ if (escape_result == 0u) { \
66
+ return false; /* bogus escape value is an error */ \
67
+ } \
68
+ dst[bs_dist] = escape_result; \
69
+ src += bs_dist + 2; \
70
+ dst += bs_dist + 1; \
71
+ } \
72
+ } else { \
73
+ /* they are the same. Since they can't co-occur, it means we \
74
+ * encountered neither. */ \
75
+ if constexpr (T == Architecture::WESTMERE) { \
76
+ src += 16; \
77
+ dst += 16; \
78
+ } else { \
79
+ src += 32; \
80
+ dst += 32; \
81
+ } \
82
+ } \
83
+ } \
84
+ /* can't be reached */ \
85
+ return true; \
86
+ }
87
+
88
+ #endif
@@ -0,0 +1,41 @@
1
+ #ifndef SIMDJSON_STRINGPARSING_WESTMERE_H
2
+ #define SIMDJSON_STRINGPARSING_WESTMERE_H
3
+
4
+ #include "simdjson/stringparsing.h"
5
+ #include "simdjson/stringparsing_macros.h"
6
+
7
+ #ifdef IS_X86_64
8
+ TARGET_WESTMERE
9
+ namespace simdjson {
10
+ template <>
11
+ really_inline parse_string_helper
12
+ find_bs_bits_and_quote_bits<Architecture::WESTMERE>(const uint8_t *src,
13
+ uint8_t *dst) {
14
+ // this can read up to 31 bytes beyond the buffer size, but we require
15
+ // SIMDJSON_PADDING of padding
16
+ __m128i v = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
17
+ // store to dest unconditionally - we can overwrite the bits we don't like
18
+ // later
19
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), v);
20
+ auto quote_mask = _mm_cmpeq_epi8(v, _mm_set1_epi8('"'));
21
+ return {
22
+ static_cast<uint32_t>(
23
+ _mm_movemask_epi8(_mm_cmpeq_epi8(v, _mm_set1_epi8('\\')))), // bs_bits
24
+ static_cast<uint32_t>(_mm_movemask_epi8(quote_mask)) // quote_bits
25
+ };
26
+ }
27
+
28
+ template <>
29
+ WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER
30
+ really_inline bool
31
+ parse_string<Architecture::WESTMERE>(UNUSED const uint8_t *buf,
32
+ UNUSED size_t len, ParsedJson &pj,
33
+ UNUSED const uint32_t depth,
34
+ UNUSED uint32_t offset) {
35
+ PARSE_STRING(Architecture::WESTMERE, buf, len, pj, depth, offset);
36
+ }
37
+ } // namespace simdjson
38
+ UNTARGET_REGION
39
+ #endif
40
+
41
+ #endif
@@ -0,0 +1,4 @@
1
+ Files from https://github.com/plokhotnyuk/jsoniter-scala/tree/master/jsoniter-scala-benchmark/src/main/resources/com/github/plokhotnyuk/jsoniter_scala/benchmark
2
+
3
+ See issue "Lower performance on small files":
4
+ https://github.com/lemire/simdjson/issues/70
@@ -0,0 +1,11 @@
1
+ #!/bin/bash
2
+ SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
3
+ cd $SCRIPTPATH/..
4
+ make jsonstats
5
+ echo
6
+ for i in $SCRIPTPATH/../jsonexamples/*.json; do
7
+ [ -f "$i" ] || break
8
+ echo $i
9
+ $SCRIPTPATH/../jsonstats $i
10
+ echo
11
+ done
@@ -0,0 +1,14 @@
1
+ #!/bin/bash
2
+ SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
3
+ cd $SCRIPTPATH/..
4
+ for i in jsonchecker/adversarial/issue150/*.json ; do
5
+ echo $i;
6
+ ./allparserscheckfile -m $i;
7
+ if [ $? -ne 0 ];
8
+ then echo "potential bug";
9
+ exit 1
10
+ fi;
11
+ done
12
+
13
+ echo "Code is probably ok. All parsers agree."
14
+ exit 0
@@ -0,0 +1,3 @@
1
+ - npm install
2
+ - nodejs generatelargejson.js (or node generatelargejson.js)
3
+
@@ -0,0 +1,19 @@
1
+
2
+ var fs = require('fs');
3
+
4
+ var faker = require('faker');
5
+
6
+
7
+ // generate bigDataSet as example
8
+ var bigSet = [];
9
+ var mmax = 500000
10
+ console.log("this may take some time...")
11
+ for(var i = 10; i < mmax; i++){
12
+ if(i % 1024 == 0) process.stdout.write("\r"+i+" entries ("+Math.round(i * 100.0 /mmax)+" percent)");
13
+ bigSet.push(faker.helpers.userCard());
14
+ };
15
+ console.log()
16
+
17
+ fs.writeFile(__dirname + '/large.json', JSON.stringify(bigSet), function() {
18
+ console.log("large.json generated successfully!");
19
+ })
@@ -0,0 +1,11 @@
1
+ #!/bin/bash
2
+ SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
3
+ cd $SCRIPTPATH/..
4
+ make minifiercompetition
5
+ echo
6
+ for i in $SCRIPTPATH/../jsonexamples/*.json; do
7
+ [ -f "$i" ] || break
8
+ echo $i
9
+ $SCRIPTPATH/../minifiercompetition $i
10
+ echo
11
+ done
@@ -0,0 +1,24 @@
1
+ #!/bin/bash
2
+ SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
3
+ cd $SCRIPTPATH/..
4
+ make parseandstatcompetition
5
+ echo "parsing and collecting basic stats on json documents as quickly as possible"
6
+ echo
7
+ for i in $SCRIPTPATH/../jsonexamples/*.json; do
8
+ [ -f "$i" ] || break
9
+ echo $i
10
+ $SCRIPTPATH/../parseandstatcompetition $i
11
+ echo
12
+ done
13
+
14
+ make distinctuseridcompetition
15
+ echo "parsing and finding all user.id"
16
+ echo
17
+
18
+ for i in $SCRIPTPATH/../jsonexamples/twitter.json; do
19
+ [ -f "$i" ] || break
20
+ echo $i
21
+ $SCRIPTPATH/../distinctuseridcompetition jsonexamples/twitter.json
22
+ echo
23
+ done
24
+
@@ -0,0 +1,11 @@
1
+ #!/bin/bash
2
+ SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
3
+ cd $SCRIPTPATH/..
4
+ make parsingcompetition
5
+ echo
6
+ for i in $SCRIPTPATH/../jsonexamples/*.json; do
7
+ [ -f "$i" ] || break
8
+ echo $i
9
+ $SCRIPTPATH/../parsingcompetition $i
10
+ echo
11
+ done
@@ -0,0 +1,26 @@
1
+ #!/bin/bash
2
+
3
+ SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
4
+ cd $SCRIPTPATH/..
5
+ datadirectory=$SCRIPTPATH/data/$(uname -n)
6
+ mkdir -p $datadirectory
7
+
8
+ os=$(uname)
9
+
10
+
11
+ make parsingcompetition allparsingcompetition
12
+ echo "parsing (with competition)"
13
+ echo
14
+ for i in $SCRIPTPATH/../jsonexamples/*.json; do
15
+ [ -f "$i" ] || break
16
+ echo $i
17
+ shortname=$(basename $SCRIPTPATH/$i.table)
18
+ corename=$(basename ${shortname%.*})".pdf"
19
+ $SCRIPTPATH/../parsingcompetition -t $i > $datadirectory/$shortname
20
+ $SCRIPTPATH/../allparsingcompetition -t $i > $datadirectory/all$shortname
21
+ echo
22
+ done
23
+
24
+ echo "see results in "$datadirectory
25
+
26
+ cd $datadirectory && gnuplot bar.gnuplot
@@ -0,0 +1,98 @@
1
+ #!/bin/bash
2
+ [[ "$(command -v gnuplot)" ]] || { echo "gnuplot is not installed" 1>&2 ; exit 1; }
3
+
4
+ SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
5
+ cd $SCRIPTPATH/..
6
+ plotdirectory=$SCRIPTPATH/plots/$(uname -n)
7
+ mkdir -p $plotdirectory
8
+
9
+ os=$(uname)
10
+
11
+
12
+ if [ "$os" = "Linux" ]; then
13
+ echo "You are using linux."
14
+ echo "We are going to just parse using simdjson, and collect perf stats."
15
+
16
+ make parse parse_noutf8validation parse_nonumberparsing parse_nostringparsing
17
+ myfile=$plotdirectory"/parselinuxtable.txt"
18
+ echo $myfile
19
+ echo "" > $myfile
20
+
21
+ myfilenoutf8validation=$plotdirectory"/parselinuxtable_noutf8validation.txt"
22
+ echo $myfilenoutf8validation
23
+ echo "" > $myfilenoutf8validation
24
+
25
+ myfilenonumberparsing=$plotdirectory"/parselinuxtable_nonumberparsing.txt"
26
+ echo $myfilenonumberparsing
27
+ echo "" > $myfilenonumberparsing
28
+
29
+ myfilenostringparsing=$plotdirectory"/parselinuxtable_nostringparsing.txt"
30
+ echo $myfilenostringparsing
31
+ echo "" > $myfilenostringparsing
32
+
33
+
34
+ for i in $SCRIPTPATH/../jsonexamples/*.json; do
35
+ [ -f "$i" ] || break
36
+ echo $i
37
+ $SCRIPTPATH/../parse -t "$i" >> "$myfile"
38
+ $SCRIPTPATH/../parse_noutf8validation -t "$i" >> "$myfilenoutf8validation"
39
+ $SCRIPTPATH/../parse_nonumberparsing -t "$i" >> "$myfilenonumberparsing"
40
+ $SCRIPTPATH/../parse_nostringparsing -t "$i" >> "$myfilenostringparsing"
41
+ done
42
+ paste $myfile $myfilenoutf8validation $myfilenonumberparsing $myfilenostringparsing > "$myfile.tmp"
43
+ mv "$myfile.tmp" $myfile
44
+ rm $myfilenoutf8validation $myfilenonumberparsing $myfilenostringparsing
45
+ gnuplot -e "filename='$myfile';name='$plotdirectory/stackedperf.pdf'" $SCRIPTPATH/stackbar.gnuplot
46
+ fi
47
+
48
+ make parsingcompetition
49
+ echo "parsing (with competition)"
50
+ echo
51
+ for i in $SCRIPTPATH/../jsonexamples/*.json; do
52
+ [ -f "$i" ] || break
53
+ echo $i
54
+ shortname=$(basename $SCRIPTPATH/$i.table)
55
+ corename=$(basename ${shortname%.*})".pdf"
56
+ $SCRIPTPATH/../parsingcompetition -t $i > $plotdirectory/$shortname
57
+ sort $plotdirectory/$shortname > $plotdirectory/$shortname.table.sorted
58
+ gnuplot -e "filename='$plotdirectory/$shortname.table.sorted';name='$plotdirectory/$corename'" $SCRIPTPATH/bar.gnuplot
59
+ rm $plotdirectory/$shortname
60
+ rm $plotdirectory/$shortname.table.sorted
61
+ echo
62
+ done
63
+
64
+
65
+ make parseandstatcompetition
66
+ echo "parsing and collecting basic stats on json documents as quickly as possible"
67
+ echo
68
+ for i in $SCRIPTPATH/../jsonexamples/*.json; do
69
+ [ -f "$i" ] || break
70
+ echo $i
71
+ shortname=$(basename $SCRIPTPATH/$i"parseandstat.table")
72
+ corename=$(basename ${shortname%.*})".pdf"
73
+ $SCRIPTPATH/../parseandstatcompetition -t $i> $plotdirectory/$shortname
74
+ sort $plotdirectory/$shortname > $plotdirectory/$shortname.table.sorted
75
+ gnuplot -e "filename='$plotdirectory/$shortname.table.sorted';name='$plotdirectory/$corename'" $SCRIPTPATH/bar.gnuplot
76
+ rm $plotdirectory/$shortname
77
+ rm $plotdirectory/$shortname.table.sorted
78
+ echo
79
+ done
80
+
81
+ make distinctuseridcompetition
82
+ echo "parsing and finding all user.id"
83
+ echo
84
+
85
+ for i in $SCRIPTPATH/../jsonexamples/twitter.json; do
86
+ [ -f "$i" ] || break
87
+ echo $i
88
+ shortname=$(basename $SCRIPTPATH/$i"distinctuserid.table")
89
+ corename=$(basename ${shortname%.*})".pdf"
90
+ $SCRIPTPATH/../distinctuseridcompetition -t jsonexamples/twitter.json> $plotdirectory/$shortname
91
+ sort $plotdirectory/$shortname > $plotdirectory/$shortname.table.sorted
92
+ gnuplot -e "filename='$plotdirectory/$shortname.table.sorted';name='$plotdirectory/$corename'" $SCRIPTPATH/bar.gnuplot
93
+ rm $plotdirectory/$shortname
94
+ rm $plotdirectory/$shortname.table.sorted
95
+ echo
96
+ done
97
+
98
+ echo "see results in "$plotdirectory
@@ -0,0 +1,11 @@
1
+ #!/bin/bash
2
+ SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
3
+ cd $SCRIPTPATH/..
4
+ make parsingcompetition
5
+ echo
6
+ for i in "$SCRIPTPATH/../jsonexamples/twitter.json" "$SCRIPTPATH/../jsonexamples/update-center.json" "$SCRIPTPATH/../jsonexamples/github_events.json" "$SCRIPTPATH/../jsonexamples/gsoc-2018.json" ; do
7
+ [ -f "$i" ] || break
8
+ echo $i
9
+ $SCRIPTPATH/../parsingcompetition $i
10
+ echo
11
+ done
@@ -0,0 +1,15 @@
1
+ #!/bin/bash
2
+
3
+ # Be careful to not skip the space at the beginning nor the end
4
+ CPUS_TO_SKIP=" $(cat /sys/devices/system/cpu/cpu*/topology/thread_siblings_list | sed 's/[^0-9].*//' | sort | uniq | tr "\r\n" " ") "
5
+
6
+
7
+ for CPU_PATH in /sys/devices/system/cpu/cpu[0-9]*; do
8
+ CPU="$(echo $CPU_PATH | tr -cd "0-9")"
9
+ echo "$CPUS_TO_SKIP" | grep " $CPU " > /dev/null
10
+ if [ $? -ne 0 ]; then
11
+ echo 0 > $CPU_PATH/online
12
+ fi
13
+ done
14
+
15
+ egrep 'siblings|cpu cores' /proc/cpuinfo | head -2
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env bash
2
+ # sudo /usr/bin/cpupower frequency-set -g performance
3
+ #######
4
+ # taken from http://hbfs.wordpress.com/2013/06/18/fast-path-finding-part-ii/
5
+ # might require sudo apt-get install cpufrequtils
6
+ # invoke with performance or ondemand
7
+ # type cpufreq-info to check results, you can also verify with cat /proc/cpuinfo
8
+ # enumerate found CPUs
9
+ cpus=$( grep processor /proc/cpuinfo | cut -d: -f 2 )
10
+
11
+
12
+ if [ "$1" = "ondemand" ]; then
13
+ echo "setting up ondemand"
14
+ policy="ondemand"
15
+ elif [ "$1" = "performance" ]; then
16
+ echo "setting up for performance"
17
+ policy="performance"
18
+ elif [ "$1" = "list" ]; then
19
+ cpufreq-info
20
+ exit 0
21
+ else
22
+ echo "usage: powerpolicy.sh ondemand | performance list"
23
+ exit -1
24
+ fi
25
+
26
+ echo "chosen policy " $1
27
+ # set governor for each CPU
28
+ #
29
+ for cpu in ${cpus[@]}
30
+ do
31
+ cpufreq-set -c $cpu -g $1
32
+ done
@@ -0,0 +1,6 @@
1
+ cd "${0%/*}"
2
+ export CXX=g++-7
3
+ export CC=gcc-7
4
+ #./powerpolicy.sh performance
5
+ ./disablehyperthreading.sh
6
+ ./turboboost.sh on
@@ -0,0 +1,51 @@
1
+ #!/bin/sh
2
+ # stolen from https://github.com/DropD/fnc-simplex/blob/master/linux_turboboost.sh
3
+
4
+ # you might need to run sudo apt-get install msr-tools
5
+ # Toggle Turbo Boost for Ivy Bridge CPUs (should work for all newer Core)
6
+ # Requires a fairly new Linux kernel (let's say 3.0+)
7
+ # Written by Donjan Rodic, released for free use
8
+
9
+ # check current real frequency with sudo turbostat -s -i1
10
+
11
+ sudo modprobe msr
12
+
13
+ # all_cores FOO
14
+ # perform FOO(i) for each core i
15
+ all_cores() {
16
+ NPROCS=`cat /proc/cpuinfo | grep "core id" | wc -l`
17
+ NPROCS=$(($NPROCS - 1))
18
+ for i in `seq 0 1 $NPROCS`; do
19
+ $1 $i
20
+ done
21
+ }
22
+
23
+
24
+ # report Turbo Boost state on core $1
25
+ read_tb() {
26
+ ret=`sudo rdmsr -p"$1" 0x1a0 -f 38:38`
27
+ [ $ret -eq 0 ] && echo "$1": on || echo "$1": off
28
+ }
29
+
30
+ # enable Turbo Boost on core $1
31
+ enable_tb() {
32
+ sudo wrmsr -p"$1" 0x1a0 0x850089
33
+ }
34
+
35
+ # disable Turbo Boost on core $1
36
+ disable_tb() {
37
+ sudo wrmsr -p"$1" 0x1a0 0x4000850089
38
+ }
39
+
40
+
41
+ if [ "$1" = "on" ]; then
42
+ all_cores enable_tb
43
+ all_cores read_tb
44
+ elif [ "$1" = "off" ]; then
45
+ all_cores disable_tb
46
+ all_cores read_tb
47
+ elif [ "$1" = "list" ]; then
48
+ all_cores read_tb
49
+ else
50
+ echo "usage: turboboost.sh on | off | list"
51
+ fi