simdjson 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.clang-format +5 -0
- data/.gitignore +14 -0
- data/.gitmodules +3 -0
- data/.rubocop.yml +9 -0
- data/.travis.yml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +32 -0
- data/benchmark/apache_builds.json +4421 -0
- data/benchmark/demo.json +15 -0
- data/benchmark/github_events.json +1390 -0
- data/benchmark/run_benchmark.rb +30 -0
- data/ext/simdjson/extconf.rb +22 -0
- data/ext/simdjson/simdjson.cpp +76 -0
- data/ext/simdjson/simdjson.hpp +6 -0
- data/lib/simdjson/version.rb +3 -0
- data/lib/simdjson.rb +2 -0
- data/simdjson.gemspec +35 -0
- data/vendor/.gitkeep +0 -0
- data/vendor/simdjson/AUTHORS +3 -0
- data/vendor/simdjson/CMakeLists.txt +63 -0
- data/vendor/simdjson/CONTRIBUTORS +27 -0
- data/vendor/simdjson/Dockerfile +10 -0
- data/vendor/simdjson/LICENSE +201 -0
- data/vendor/simdjson/Makefile +203 -0
- data/vendor/simdjson/Notes.md +85 -0
- data/vendor/simdjson/README.md +581 -0
- data/vendor/simdjson/amalgamation.sh +158 -0
- data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
- data/vendor/simdjson/benchmark/benchmark.h +223 -0
- data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
- data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
- data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
- data/vendor/simdjson/benchmark/parse.cpp +393 -0
- data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
- data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
- data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
- data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
- data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/gbps.png +0 -0
- data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
- data/vendor/simdjson/images/halvarflake.png +0 -0
- data/vendor/simdjson/images/logo.png +0 -0
- data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
- data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
- data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
- data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
- data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
- data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
- data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
- data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
- data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
- data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
- data/vendor/simdjson/include/simdjson/portability.h +172 -0
- data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
- data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
- data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
- data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
- data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
- data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
- data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
- data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
- data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
- data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
- data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
- data/vendor/simdjson/scripts/issue150.sh +14 -0
- data/vendor/simdjson/scripts/javascript/README.md +3 -0
- data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
- data/vendor/simdjson/scripts/minifier.sh +11 -0
- data/vendor/simdjson/scripts/parseandstat.sh +24 -0
- data/vendor/simdjson/scripts/parser.sh +11 -0
- data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
- data/vendor/simdjson/scripts/plotparse.sh +98 -0
- data/vendor/simdjson/scripts/selectparser.sh +11 -0
- data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
- data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
- data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
- data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
- data/vendor/simdjson/scripts/testjson2json.sh +99 -0
- data/vendor/simdjson/scripts/transitions/Makefile +10 -0
- data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
- data/vendor/simdjson/singleheader/README.md +1 -0
- data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
- data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
- data/vendor/simdjson/singleheader/simdjson.h +39692 -0
- data/vendor/simdjson/src/CMakeLists.txt +67 -0
- data/vendor/simdjson/src/jsonioutil.cpp +35 -0
- data/vendor/simdjson/src/jsonminifier.cpp +285 -0
- data/vendor/simdjson/src/jsonparser.cpp +91 -0
- data/vendor/simdjson/src/parsedjson.cpp +323 -0
- data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
- data/vendor/simdjson/src/simdjson.cpp +30 -0
- data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
- data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
- data/vendor/simdjson/style/clang-format-check.sh +25 -0
- data/vendor/simdjson/style/clang-format.sh +25 -0
- data/vendor/simdjson/style/run-clang-format.py +326 -0
- data/vendor/simdjson/tape.md +134 -0
- data/vendor/simdjson/tests/CMakeLists.txt +25 -0
- data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
- data/vendor/simdjson/tests/basictests.cpp +75 -0
- data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
- data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
- data/vendor/simdjson/tests/pointercheck.cpp +38 -0
- data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
- data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
- data/vendor/simdjson/tools/CMakeLists.txt +3 -0
- data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
- data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
- data/vendor/simdjson/tools/json2json.cpp +112 -0
- data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
- data/vendor/simdjson/tools/jsonstats.cpp +143 -0
- data/vendor/simdjson/tools/minify.cpp +21 -0
- data/vendor/simdjson/tools/release.py +125 -0
- data/vendor/simdjson/windows/dirent_portable.h +1043 -0
- metadata +273 -0
@@ -0,0 +1,56 @@
|
|
1
|
+
#ifndef SIMDJSON_STRINGPARSING_ARM64_H
|
2
|
+
#define SIMDJSON_STRINGPARSING_ARM64_H
|
3
|
+
|
4
|
+
#include "simdjson/stringparsing.h"
|
5
|
+
#include "simdjson/stringparsing_macros.h"
|
6
|
+
|
7
|
+
#ifdef IS_ARM64
|
8
|
+
namespace simdjson {
|
9
|
+
template <>
|
10
|
+
really_inline parse_string_helper
|
11
|
+
find_bs_bits_and_quote_bits<Architecture::ARM64>(const uint8_t *src,
|
12
|
+
uint8_t *dst) {
|
13
|
+
// this can read up to 31 bytes beyond the buffer size, but we require
|
14
|
+
// SIMDJSON_PADDING of padding
|
15
|
+
static_assert(2 * sizeof(uint8x16_t) - 1 <= SIMDJSON_PADDING);
|
16
|
+
uint8x16_t v0 = vld1q_u8(src);
|
17
|
+
uint8x16_t v1 = vld1q_u8(src + 16);
|
18
|
+
vst1q_u8(dst, v0);
|
19
|
+
vst1q_u8(dst + 16, v1);
|
20
|
+
|
21
|
+
uint8x16_t bs_mask = vmovq_n_u8('\\');
|
22
|
+
uint8x16_t qt_mask = vmovq_n_u8('"');
|
23
|
+
const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
|
24
|
+
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
|
25
|
+
uint8x16_t cmp_bs_0 = vceqq_u8(v0, bs_mask);
|
26
|
+
uint8x16_t cmp_bs_1 = vceqq_u8(v1, bs_mask);
|
27
|
+
uint8x16_t cmp_qt_0 = vceqq_u8(v0, qt_mask);
|
28
|
+
uint8x16_t cmp_qt_1 = vceqq_u8(v1, qt_mask);
|
29
|
+
|
30
|
+
cmp_bs_0 = vandq_u8(cmp_bs_0, bit_mask);
|
31
|
+
cmp_bs_1 = vandq_u8(cmp_bs_1, bit_mask);
|
32
|
+
cmp_qt_0 = vandq_u8(cmp_qt_0, bit_mask);
|
33
|
+
cmp_qt_1 = vandq_u8(cmp_qt_1, bit_mask);
|
34
|
+
|
35
|
+
uint8x16_t sum0 = vpaddq_u8(cmp_bs_0, cmp_bs_1);
|
36
|
+
uint8x16_t sum1 = vpaddq_u8(cmp_qt_0, cmp_qt_1);
|
37
|
+
sum0 = vpaddq_u8(sum0, sum1);
|
38
|
+
sum0 = vpaddq_u8(sum0, sum0);
|
39
|
+
return {
|
40
|
+
vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 0), // bs_bits
|
41
|
+
vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 1) // quote_bits
|
42
|
+
};
|
43
|
+
}
|
44
|
+
|
45
|
+
template <>
|
46
|
+
WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER
|
47
|
+
really_inline bool
|
48
|
+
parse_string<Architecture::ARM64>(UNUSED const uint8_t *buf,
|
49
|
+
UNUSED size_t len, ParsedJson &pj,
|
50
|
+
UNUSED const uint32_t depth,
|
51
|
+
UNUSED uint32_t offset) {
|
52
|
+
PARSE_STRING(Architecture::ARM64, buf, len, pj, depth, offset);
|
53
|
+
}
|
54
|
+
} // namespace simdjson
|
55
|
+
#endif
|
56
|
+
#endif
|
@@ -0,0 +1,43 @@
|
|
1
|
+
#ifndef SIMDJSON_STRINGPARSING_HASWELL_H
|
2
|
+
#define SIMDJSON_STRINGPARSING_HASWELL_H
|
3
|
+
|
4
|
+
#include "simdjson/stringparsing.h"
|
5
|
+
#include "simdjson/stringparsing_macros.h"
|
6
|
+
|
7
|
+
#ifdef IS_X86_64
|
8
|
+
TARGET_HASWELL
|
9
|
+
namespace simdjson {
|
10
|
+
template <>
|
11
|
+
really_inline parse_string_helper
|
12
|
+
find_bs_bits_and_quote_bits<Architecture::HASWELL>(const uint8_t *src,
|
13
|
+
uint8_t *dst) {
|
14
|
+
// this can read up to 31 bytes beyond the buffer size, but we require
|
15
|
+
// SIMDJSON_PADDING of padding
|
16
|
+
static_assert(sizeof(__m256i) - 1 <= SIMDJSON_PADDING);
|
17
|
+
__m256i v = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
|
18
|
+
// store to dest unconditionally - we can overwrite the bits we don't like
|
19
|
+
// later
|
20
|
+
_mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), v);
|
21
|
+
auto quote_mask = _mm256_cmpeq_epi8(v, _mm256_set1_epi8('"'));
|
22
|
+
return {
|
23
|
+
static_cast<uint32_t>(_mm256_movemask_epi8(
|
24
|
+
_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\')))), // bs_bits
|
25
|
+
static_cast<uint32_t>(_mm256_movemask_epi8(quote_mask)) // quote_bits
|
26
|
+
};
|
27
|
+
}
|
28
|
+
|
29
|
+
template <>
|
30
|
+
WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER
|
31
|
+
really_inline bool
|
32
|
+
parse_string<Architecture::HASWELL>(UNUSED const uint8_t *buf,
|
33
|
+
UNUSED size_t len, ParsedJson &pj,
|
34
|
+
UNUSED const uint32_t depth,
|
35
|
+
UNUSED uint32_t offset) {
|
36
|
+
PARSE_STRING(Architecture::HASWELL, buf, len, pj, depth, offset);
|
37
|
+
}
|
38
|
+
|
39
|
+
} // namespace simdjson
|
40
|
+
UNTARGET_REGION
|
41
|
+
#endif
|
42
|
+
|
43
|
+
#endif
|
@@ -0,0 +1,88 @@
|
|
1
|
+
#ifndef SIMDJSON_STRINGPARSING_MACROS_H
|
2
|
+
#define SIMDJSON_STRINGPARSING_MACROS_H
|
3
|
+
|
4
|
+
// We need to compile that code for multiple architectures. However, target
|
5
|
+
// attributes can be used only once by function definition. Huge macro seemed
|
6
|
+
// better than huge code duplication.ç
|
7
|
+
// bool PARSE_STRING(Architecture T, const uint8_t *buf, size_t len, ParsedJson
|
8
|
+
// &pj,const uint32_t depth, uint32_t offset)
|
9
|
+
#define PARSE_STRING(T, buf, len, pj, depth, offset) \
|
10
|
+
{ \
|
11
|
+
pj.write_tape(pj.current_string_buf_loc - pj.string_buf, '"'); \
|
12
|
+
const uint8_t *src = \
|
13
|
+
&buf[offset + 1]; /* we know that buf at offset is a " */ \
|
14
|
+
uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t); \
|
15
|
+
const uint8_t *const start_of_string = dst; \
|
16
|
+
while (1) { \
|
17
|
+
parse_string_helper helper = find_bs_bits_and_quote_bits<T>(src, dst); \
|
18
|
+
if (((helper.bs_bits - 1) & helper.quote_bits) != 0) { \
|
19
|
+
/* we encountered quotes first. Move dst to point to quotes and exit \
|
20
|
+
*/ \
|
21
|
+
\
|
22
|
+
/* find out where the quote is... */ \
|
23
|
+
uint32_t quote_dist = trailing_zeroes(helper.quote_bits); \
|
24
|
+
\
|
25
|
+
/* NULL termination is still handy if you expect all your strings to \
|
26
|
+
* be NULL terminated? */ \
|
27
|
+
/* It comes at a small cost */ \
|
28
|
+
dst[quote_dist] = 0; \
|
29
|
+
\
|
30
|
+
uint32_t str_length = (dst - start_of_string) + quote_dist; \
|
31
|
+
memcpy(pj.current_string_buf_loc, &str_length, sizeof(uint32_t)); \
|
32
|
+
/***************************** \
|
33
|
+
* Above, check for overflow in case someone has a crazy string \
|
34
|
+
* (>=4GB?) _ \
|
35
|
+
* But only add the overflow check when the document itself exceeds \
|
36
|
+
* 4GB \
|
37
|
+
* Currently unneeded because we refuse to parse docs larger or equal \
|
38
|
+
* to 4GB. \
|
39
|
+
****************************/ \
|
40
|
+
\
|
41
|
+
/* we advance the point, accounting for the fact that we have a NULL \
|
42
|
+
* termination */ \
|
43
|
+
pj.current_string_buf_loc = dst + quote_dist + 1; \
|
44
|
+
return true; \
|
45
|
+
} \
|
46
|
+
if (((helper.quote_bits - 1) & helper.bs_bits) != 0) { \
|
47
|
+
/* find out where the backspace is */ \
|
48
|
+
uint32_t bs_dist = trailing_zeroes(helper.bs_bits); \
|
49
|
+
uint8_t escape_char = src[bs_dist + 1]; \
|
50
|
+
/* we encountered backslash first. Handle backslash */ \
|
51
|
+
if (escape_char == 'u') { \
|
52
|
+
/* move src/dst up to the start; they will be further adjusted \
|
53
|
+
within the unicode codepoint handling code. */ \
|
54
|
+
src += bs_dist; \
|
55
|
+
dst += bs_dist; \
|
56
|
+
if (!handle_unicode_codepoint(&src, &dst)) { \
|
57
|
+
return false; \
|
58
|
+
} \
|
59
|
+
} else { \
|
60
|
+
/* simple 1:1 conversion. Will eat bs_dist+2 characters in input and \
|
61
|
+
* write bs_dist+1 characters to output \
|
62
|
+
* note this may reach beyond the part of the buffer we've actually \
|
63
|
+
* seen. I think this is ok */ \
|
64
|
+
uint8_t escape_result = escape_map[escape_char]; \
|
65
|
+
if (escape_result == 0u) { \
|
66
|
+
return false; /* bogus escape value is an error */ \
|
67
|
+
} \
|
68
|
+
dst[bs_dist] = escape_result; \
|
69
|
+
src += bs_dist + 2; \
|
70
|
+
dst += bs_dist + 1; \
|
71
|
+
} \
|
72
|
+
} else { \
|
73
|
+
/* they are the same. Since they can't co-occur, it means we \
|
74
|
+
* encountered neither. */ \
|
75
|
+
if constexpr (T == Architecture::WESTMERE) { \
|
76
|
+
src += 16; \
|
77
|
+
dst += 16; \
|
78
|
+
} else { \
|
79
|
+
src += 32; \
|
80
|
+
dst += 32; \
|
81
|
+
} \
|
82
|
+
} \
|
83
|
+
} \
|
84
|
+
/* can't be reached */ \
|
85
|
+
return true; \
|
86
|
+
}
|
87
|
+
|
88
|
+
#endif
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#ifndef SIMDJSON_STRINGPARSING_WESTMERE_H
|
2
|
+
#define SIMDJSON_STRINGPARSING_WESTMERE_H
|
3
|
+
|
4
|
+
#include "simdjson/stringparsing.h"
|
5
|
+
#include "simdjson/stringparsing_macros.h"
|
6
|
+
|
7
|
+
#ifdef IS_X86_64
|
8
|
+
TARGET_WESTMERE
|
9
|
+
namespace simdjson {
|
10
|
+
template <>
|
11
|
+
really_inline parse_string_helper
|
12
|
+
find_bs_bits_and_quote_bits<Architecture::WESTMERE>(const uint8_t *src,
|
13
|
+
uint8_t *dst) {
|
14
|
+
// this can read up to 31 bytes beyond the buffer size, but we require
|
15
|
+
// SIMDJSON_PADDING of padding
|
16
|
+
__m128i v = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
|
17
|
+
// store to dest unconditionally - we can overwrite the bits we don't like
|
18
|
+
// later
|
19
|
+
_mm_storeu_si128(reinterpret_cast<__m128i *>(dst), v);
|
20
|
+
auto quote_mask = _mm_cmpeq_epi8(v, _mm_set1_epi8('"'));
|
21
|
+
return {
|
22
|
+
static_cast<uint32_t>(
|
23
|
+
_mm_movemask_epi8(_mm_cmpeq_epi8(v, _mm_set1_epi8('\\')))), // bs_bits
|
24
|
+
static_cast<uint32_t>(_mm_movemask_epi8(quote_mask)) // quote_bits
|
25
|
+
};
|
26
|
+
}
|
27
|
+
|
28
|
+
template <>
|
29
|
+
WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER
|
30
|
+
really_inline bool
|
31
|
+
parse_string<Architecture::WESTMERE>(UNUSED const uint8_t *buf,
|
32
|
+
UNUSED size_t len, ParsedJson &pj,
|
33
|
+
UNUSED const uint32_t depth,
|
34
|
+
UNUSED uint32_t offset) {
|
35
|
+
PARSE_STRING(Architecture::WESTMERE, buf, len, pj, depth, offset);
|
36
|
+
}
|
37
|
+
} // namespace simdjson
|
38
|
+
UNTARGET_REGION
|
39
|
+
#endif
|
40
|
+
|
41
|
+
#endif
|
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
3
|
+
cd $SCRIPTPATH/..
|
4
|
+
for i in jsonchecker/adversarial/issue150/*.json ; do
|
5
|
+
echo $i;
|
6
|
+
./allparserscheckfile -m $i;
|
7
|
+
if [ $? -ne 0 ];
|
8
|
+
then echo "potential bug";
|
9
|
+
exit 1
|
10
|
+
fi;
|
11
|
+
done
|
12
|
+
|
13
|
+
echo "Code is probably ok. All parsers agree."
|
14
|
+
exit 0
|
@@ -0,0 +1,19 @@
|
|
1
|
+
|
2
|
+
var fs = require('fs');
|
3
|
+
|
4
|
+
var faker = require('faker');
|
5
|
+
|
6
|
+
|
7
|
+
// generate bigDataSet as example
|
8
|
+
var bigSet = [];
|
9
|
+
var mmax = 500000
|
10
|
+
console.log("this may take some time...")
|
11
|
+
for(var i = 10; i < mmax; i++){
|
12
|
+
if(i % 1024 == 0) process.stdout.write("\r"+i+" entries ("+Math.round(i * 100.0 /mmax)+" percent)");
|
13
|
+
bigSet.push(faker.helpers.userCard());
|
14
|
+
};
|
15
|
+
console.log()
|
16
|
+
|
17
|
+
fs.writeFile(__dirname + '/large.json', JSON.stringify(bigSet), function() {
|
18
|
+
console.log("large.json generated successfully!");
|
19
|
+
})
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
3
|
+
cd $SCRIPTPATH/..
|
4
|
+
make parseandstatcompetition
|
5
|
+
echo "parsing and collecting basic stats on json documents as quickly as possible"
|
6
|
+
echo
|
7
|
+
for i in $SCRIPTPATH/../jsonexamples/*.json; do
|
8
|
+
[ -f "$i" ] || break
|
9
|
+
echo $i
|
10
|
+
$SCRIPTPATH/../parseandstatcompetition $i
|
11
|
+
echo
|
12
|
+
done
|
13
|
+
|
14
|
+
make distinctuseridcompetition
|
15
|
+
echo "parsing and finding all user.id"
|
16
|
+
echo
|
17
|
+
|
18
|
+
for i in $SCRIPTPATH/../jsonexamples/twitter.json; do
|
19
|
+
[ -f "$i" ] || break
|
20
|
+
echo $i
|
21
|
+
$SCRIPTPATH/../distinctuseridcompetition jsonexamples/twitter.json
|
22
|
+
echo
|
23
|
+
done
|
24
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
4
|
+
cd $SCRIPTPATH/..
|
5
|
+
datadirectory=$SCRIPTPATH/data/$(uname -n)
|
6
|
+
mkdir -p $datadirectory
|
7
|
+
|
8
|
+
os=$(uname)
|
9
|
+
|
10
|
+
|
11
|
+
make parsingcompetition allparsingcompetition
|
12
|
+
echo "parsing (with competition)"
|
13
|
+
echo
|
14
|
+
for i in $SCRIPTPATH/../jsonexamples/*.json; do
|
15
|
+
[ -f "$i" ] || break
|
16
|
+
echo $i
|
17
|
+
shortname=$(basename $SCRIPTPATH/$i.table)
|
18
|
+
corename=$(basename ${shortname%.*})".pdf"
|
19
|
+
$SCRIPTPATH/../parsingcompetition -t $i > $datadirectory/$shortname
|
20
|
+
$SCRIPTPATH/../allparsingcompetition -t $i > $datadirectory/all$shortname
|
21
|
+
echo
|
22
|
+
done
|
23
|
+
|
24
|
+
echo "see results in "$datadirectory
|
25
|
+
|
26
|
+
cd $datadirectory && gnuplot bar.gnuplot
|
@@ -0,0 +1,98 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
[[ "$(command -v gnuplot)" ]] || { echo "gnuplot is not installed" 1>&2 ; exit 1; }
|
3
|
+
|
4
|
+
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
5
|
+
cd $SCRIPTPATH/..
|
6
|
+
plotdirectory=$SCRIPTPATH/plots/$(uname -n)
|
7
|
+
mkdir -p $plotdirectory
|
8
|
+
|
9
|
+
os=$(uname)
|
10
|
+
|
11
|
+
|
12
|
+
if [ "$os" = "Linux" ]; then
|
13
|
+
echo "You are using linux."
|
14
|
+
echo "We are going to just parse using simdjson, and collect perf stats."
|
15
|
+
|
16
|
+
make parse parse_noutf8validation parse_nonumberparsing parse_nostringparsing
|
17
|
+
myfile=$plotdirectory"/parselinuxtable.txt"
|
18
|
+
echo $myfile
|
19
|
+
echo "" > $myfile
|
20
|
+
|
21
|
+
myfilenoutf8validation=$plotdirectory"/parselinuxtable_noutf8validation.txt"
|
22
|
+
echo $myfilenoutf8validation
|
23
|
+
echo "" > $myfilenoutf8validation
|
24
|
+
|
25
|
+
myfilenonumberparsing=$plotdirectory"/parselinuxtable_nonumberparsing.txt"
|
26
|
+
echo $myfilenonumberparsing
|
27
|
+
echo "" > $myfilenonumberparsing
|
28
|
+
|
29
|
+
myfilenostringparsing=$plotdirectory"/parselinuxtable_nostringparsing.txt"
|
30
|
+
echo $myfilenostringparsing
|
31
|
+
echo "" > $myfilenostringparsing
|
32
|
+
|
33
|
+
|
34
|
+
for i in $SCRIPTPATH/../jsonexamples/*.json; do
|
35
|
+
[ -f "$i" ] || break
|
36
|
+
echo $i
|
37
|
+
$SCRIPTPATH/../parse -t "$i" >> "$myfile"
|
38
|
+
$SCRIPTPATH/../parse_noutf8validation -t "$i" >> "$myfilenoutf8validation"
|
39
|
+
$SCRIPTPATH/../parse_nonumberparsing -t "$i" >> "$myfilenonumberparsing"
|
40
|
+
$SCRIPTPATH/../parse_nostringparsing -t "$i" >> "$myfilenostringparsing"
|
41
|
+
done
|
42
|
+
paste $myfile $myfilenoutf8validation $myfilenonumberparsing $myfilenostringparsing > "$myfile.tmp"
|
43
|
+
mv "$myfile.tmp" $myfile
|
44
|
+
rm $myfilenoutf8validation $myfilenonumberparsing $myfilenostringparsing
|
45
|
+
gnuplot -e "filename='$myfile';name='$plotdirectory/stackedperf.pdf'" $SCRIPTPATH/stackbar.gnuplot
|
46
|
+
fi
|
47
|
+
|
48
|
+
make parsingcompetition
|
49
|
+
echo "parsing (with competition)"
|
50
|
+
echo
|
51
|
+
for i in $SCRIPTPATH/../jsonexamples/*.json; do
|
52
|
+
[ -f "$i" ] || break
|
53
|
+
echo $i
|
54
|
+
shortname=$(basename $SCRIPTPATH/$i.table)
|
55
|
+
corename=$(basename ${shortname%.*})".pdf"
|
56
|
+
$SCRIPTPATH/../parsingcompetition -t $i > $plotdirectory/$shortname
|
57
|
+
sort $plotdirectory/$shortname > $plotdirectory/$shortname.table.sorted
|
58
|
+
gnuplot -e "filename='$plotdirectory/$shortname.table.sorted';name='$plotdirectory/$corename'" $SCRIPTPATH/bar.gnuplot
|
59
|
+
rm $plotdirectory/$shortname
|
60
|
+
rm $plotdirectory/$shortname.table.sorted
|
61
|
+
echo
|
62
|
+
done
|
63
|
+
|
64
|
+
|
65
|
+
make parseandstatcompetition
|
66
|
+
echo "parsing and collecting basic stats on json documents as quickly as possible"
|
67
|
+
echo
|
68
|
+
for i in $SCRIPTPATH/../jsonexamples/*.json; do
|
69
|
+
[ -f "$i" ] || break
|
70
|
+
echo $i
|
71
|
+
shortname=$(basename $SCRIPTPATH/$i"parseandstat.table")
|
72
|
+
corename=$(basename ${shortname%.*})".pdf"
|
73
|
+
$SCRIPTPATH/../parseandstatcompetition -t $i> $plotdirectory/$shortname
|
74
|
+
sort $plotdirectory/$shortname > $plotdirectory/$shortname.table.sorted
|
75
|
+
gnuplot -e "filename='$plotdirectory/$shortname.table.sorted';name='$plotdirectory/$corename'" $SCRIPTPATH/bar.gnuplot
|
76
|
+
rm $plotdirectory/$shortname
|
77
|
+
rm $plotdirectory/$shortname.table.sorted
|
78
|
+
echo
|
79
|
+
done
|
80
|
+
|
81
|
+
make distinctuseridcompetition
|
82
|
+
echo "parsing and finding all user.id"
|
83
|
+
echo
|
84
|
+
|
85
|
+
for i in $SCRIPTPATH/../jsonexamples/twitter.json; do
|
86
|
+
[ -f "$i" ] || break
|
87
|
+
echo $i
|
88
|
+
shortname=$(basename $SCRIPTPATH/$i"distinctuserid.table")
|
89
|
+
corename=$(basename ${shortname%.*})".pdf"
|
90
|
+
$SCRIPTPATH/../distinctuseridcompetition -t jsonexamples/twitter.json> $plotdirectory/$shortname
|
91
|
+
sort $plotdirectory/$shortname > $plotdirectory/$shortname.table.sorted
|
92
|
+
gnuplot -e "filename='$plotdirectory/$shortname.table.sorted';name='$plotdirectory/$corename'" $SCRIPTPATH/bar.gnuplot
|
93
|
+
rm $plotdirectory/$shortname
|
94
|
+
rm $plotdirectory/$shortname.table.sorted
|
95
|
+
echo
|
96
|
+
done
|
97
|
+
|
98
|
+
echo "see results in "$plotdirectory
|
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
3
|
+
cd $SCRIPTPATH/..
|
4
|
+
make parsingcompetition
|
5
|
+
echo
|
6
|
+
for i in "$SCRIPTPATH/../jsonexamples/twitter.json" "$SCRIPTPATH/../jsonexamples/update-center.json" "$SCRIPTPATH/../jsonexamples/github_events.json" "$SCRIPTPATH/../jsonexamples/gsoc-2018.json" ; do
|
7
|
+
[ -f "$i" ] || break
|
8
|
+
echo $i
|
9
|
+
$SCRIPTPATH/../parsingcompetition $i
|
10
|
+
echo
|
11
|
+
done
|
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
# Be careful to not skip the space at the beginning nor the end
|
4
|
+
CPUS_TO_SKIP=" $(cat /sys/devices/system/cpu/cpu*/topology/thread_siblings_list | sed 's/[^0-9].*//' | sort | uniq | tr "\r\n" " ") "
|
5
|
+
|
6
|
+
|
7
|
+
for CPU_PATH in /sys/devices/system/cpu/cpu[0-9]*; do
|
8
|
+
CPU="$(echo $CPU_PATH | tr -cd "0-9")"
|
9
|
+
echo "$CPUS_TO_SKIP" | grep " $CPU " > /dev/null
|
10
|
+
if [ $? -ne 0 ]; then
|
11
|
+
echo 0 > $CPU_PATH/online
|
12
|
+
fi
|
13
|
+
done
|
14
|
+
|
15
|
+
egrep 'siblings|cpu cores' /proc/cpuinfo | head -2
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
# sudo /usr/bin/cpupower frequency-set -g performance
|
3
|
+
#######
|
4
|
+
# taken from http://hbfs.wordpress.com/2013/06/18/fast-path-finding-part-ii/
|
5
|
+
# might require sudo apt-get install cpufrequtils
|
6
|
+
# invoke with performance or ondemand
|
7
|
+
# type cpufreq-info to check results, you can also verify with cat /proc/cpuinfo
|
8
|
+
# enumerate found CPUs
|
9
|
+
cpus=$( grep processor /proc/cpuinfo | cut -d: -f 2 )
|
10
|
+
|
11
|
+
|
12
|
+
if [ "$1" = "ondemand" ]; then
|
13
|
+
echo "setting up ondemand"
|
14
|
+
policy="ondemand"
|
15
|
+
elif [ "$1" = "performance" ]; then
|
16
|
+
echo "setting up for performance"
|
17
|
+
policy="performance"
|
18
|
+
elif [ "$1" = "list" ]; then
|
19
|
+
cpufreq-info
|
20
|
+
exit 0
|
21
|
+
else
|
22
|
+
echo "usage: powerpolicy.sh ondemand | performance list"
|
23
|
+
exit -1
|
24
|
+
fi
|
25
|
+
|
26
|
+
echo "chosen policy " $1
|
27
|
+
# set governor for each CPU
|
28
|
+
#
|
29
|
+
for cpu in ${cpus[@]}
|
30
|
+
do
|
31
|
+
cpufreq-set -c $cpu -g $1
|
32
|
+
done
|
@@ -0,0 +1,51 @@
|
|
1
|
+
#!/bin/sh
|
2
|
+
# stolen from https://github.com/DropD/fnc-simplex/blob/master/linux_turboboost.sh
|
3
|
+
|
4
|
+
# you might need to run sudo apt-get install msr-tools
|
5
|
+
# Toggle Turbo Boost for Ivy Bridge CPUs (should work for all newer Core)
|
6
|
+
# Requires a fairly new Linux kernel (let's say 3.0+)
|
7
|
+
# Written by Donjan Rodic, released for free use
|
8
|
+
|
9
|
+
# check current real frequency with sudo turbostat -s -i1
|
10
|
+
|
11
|
+
sudo modprobe msr
|
12
|
+
|
13
|
+
# all_cores FOO
|
14
|
+
# perform FOO(i) for each core i
|
15
|
+
all_cores() {
|
16
|
+
NPROCS=`cat /proc/cpuinfo | grep "core id" | wc -l`
|
17
|
+
NPROCS=$(($NPROCS - 1))
|
18
|
+
for i in `seq 0 1 $NPROCS`; do
|
19
|
+
$1 $i
|
20
|
+
done
|
21
|
+
}
|
22
|
+
|
23
|
+
|
24
|
+
# report Turbo Boost state on core $1
|
25
|
+
read_tb() {
|
26
|
+
ret=`sudo rdmsr -p"$1" 0x1a0 -f 38:38`
|
27
|
+
[ $ret -eq 0 ] && echo "$1": on || echo "$1": off
|
28
|
+
}
|
29
|
+
|
30
|
+
# enable Turbo Boost on core $1
|
31
|
+
enable_tb() {
|
32
|
+
sudo wrmsr -p"$1" 0x1a0 0x850089
|
33
|
+
}
|
34
|
+
|
35
|
+
# disable Turbo Boost on core $1
|
36
|
+
disable_tb() {
|
37
|
+
sudo wrmsr -p"$1" 0x1a0 0x4000850089
|
38
|
+
}
|
39
|
+
|
40
|
+
|
41
|
+
if [ "$1" = "on" ]; then
|
42
|
+
all_cores enable_tb
|
43
|
+
all_cores read_tb
|
44
|
+
elif [ "$1" = "off" ]; then
|
45
|
+
all_cores disable_tb
|
46
|
+
all_cores read_tb
|
47
|
+
elif [ "$1" = "list" ]; then
|
48
|
+
all_cores read_tb
|
49
|
+
else
|
50
|
+
echo "usage: turboboost.sh on | off | list"
|
51
|
+
fi
|