simdjson 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.clang-format +5 -0
- data/.gitignore +14 -0
- data/.gitmodules +3 -0
- data/.rubocop.yml +9 -0
- data/.travis.yml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +32 -0
- data/benchmark/apache_builds.json +4421 -0
- data/benchmark/demo.json +15 -0
- data/benchmark/github_events.json +1390 -0
- data/benchmark/run_benchmark.rb +30 -0
- data/ext/simdjson/extconf.rb +22 -0
- data/ext/simdjson/simdjson.cpp +76 -0
- data/ext/simdjson/simdjson.hpp +6 -0
- data/lib/simdjson/version.rb +3 -0
- data/lib/simdjson.rb +2 -0
- data/simdjson.gemspec +35 -0
- data/vendor/.gitkeep +0 -0
- data/vendor/simdjson/AUTHORS +3 -0
- data/vendor/simdjson/CMakeLists.txt +63 -0
- data/vendor/simdjson/CONTRIBUTORS +27 -0
- data/vendor/simdjson/Dockerfile +10 -0
- data/vendor/simdjson/LICENSE +201 -0
- data/vendor/simdjson/Makefile +203 -0
- data/vendor/simdjson/Notes.md +85 -0
- data/vendor/simdjson/README.md +581 -0
- data/vendor/simdjson/amalgamation.sh +158 -0
- data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
- data/vendor/simdjson/benchmark/benchmark.h +223 -0
- data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
- data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
- data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
- data/vendor/simdjson/benchmark/parse.cpp +393 -0
- data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
- data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
- data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
- data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
- data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/gbps.png +0 -0
- data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
- data/vendor/simdjson/images/halvarflake.png +0 -0
- data/vendor/simdjson/images/logo.png +0 -0
- data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
- data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
- data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
- data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
- data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
- data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
- data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
- data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
- data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
- data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
- data/vendor/simdjson/include/simdjson/portability.h +172 -0
- data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
- data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
- data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
- data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
- data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
- data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
- data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
- data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
- data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
- data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
- data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
- data/vendor/simdjson/scripts/issue150.sh +14 -0
- data/vendor/simdjson/scripts/javascript/README.md +3 -0
- data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
- data/vendor/simdjson/scripts/minifier.sh +11 -0
- data/vendor/simdjson/scripts/parseandstat.sh +24 -0
- data/vendor/simdjson/scripts/parser.sh +11 -0
- data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
- data/vendor/simdjson/scripts/plotparse.sh +98 -0
- data/vendor/simdjson/scripts/selectparser.sh +11 -0
- data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
- data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
- data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
- data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
- data/vendor/simdjson/scripts/testjson2json.sh +99 -0
- data/vendor/simdjson/scripts/transitions/Makefile +10 -0
- data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
- data/vendor/simdjson/singleheader/README.md +1 -0
- data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
- data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
- data/vendor/simdjson/singleheader/simdjson.h +39692 -0
- data/vendor/simdjson/src/CMakeLists.txt +67 -0
- data/vendor/simdjson/src/jsonioutil.cpp +35 -0
- data/vendor/simdjson/src/jsonminifier.cpp +285 -0
- data/vendor/simdjson/src/jsonparser.cpp +91 -0
- data/vendor/simdjson/src/parsedjson.cpp +323 -0
- data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
- data/vendor/simdjson/src/simdjson.cpp +30 -0
- data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
- data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
- data/vendor/simdjson/style/clang-format-check.sh +25 -0
- data/vendor/simdjson/style/clang-format.sh +25 -0
- data/vendor/simdjson/style/run-clang-format.py +326 -0
- data/vendor/simdjson/tape.md +134 -0
- data/vendor/simdjson/tests/CMakeLists.txt +25 -0
- data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
- data/vendor/simdjson/tests/basictests.cpp +75 -0
- data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
- data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
- data/vendor/simdjson/tests/pointercheck.cpp +38 -0
- data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
- data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
- data/vendor/simdjson/tools/CMakeLists.txt +3 -0
- data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
- data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
- data/vendor/simdjson/tools/json2json.cpp +112 -0
- data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
- data/vendor/simdjson/tools/jsonstats.cpp +143 -0
- data/vendor/simdjson/tools/minify.cpp +21 -0
- data/vendor/simdjson/tools/release.py +125 -0
- data/vendor/simdjson/windows/dirent_portable.h +1043 -0
- metadata +273 -0
@@ -0,0 +1,158 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
########################################################################
|
3
|
+
# Generates an "amalgamation build" for roaring. Inspired by similar
|
4
|
+
# script used by whefs.
|
5
|
+
########################################################################
|
6
|
+
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
7
|
+
|
8
|
+
echo "We are about to amalgamate all simdjson files into one source file. "
|
9
|
+
echo "See https://www.sqlite.org/amalgamation.html and https://en.wikipedia.org/wiki/Single_Compilation_Unit for rationale. "
|
10
|
+
|
11
|
+
AMAL_H="simdjson.h"
|
12
|
+
AMAL_C="simdjson.cpp"
|
13
|
+
|
14
|
+
# order does not matter
|
15
|
+
ALLCFILES="
|
16
|
+
$SCRIPTPATH/src/simdjson.cpp
|
17
|
+
$SCRIPTPATH/src/jsonioutil.cpp
|
18
|
+
$SCRIPTPATH/src/jsonminifier.cpp
|
19
|
+
$SCRIPTPATH/src/jsonparser.cpp
|
20
|
+
$SCRIPTPATH/src/stage1_find_marks.cpp
|
21
|
+
$SCRIPTPATH/src/stage2_build_tape.cpp
|
22
|
+
$SCRIPTPATH/src/parsedjson.cpp
|
23
|
+
$SCRIPTPATH/src/parsedjsoniterator.cpp
|
24
|
+
"
|
25
|
+
|
26
|
+
# order matters
|
27
|
+
ALLCHEADERS="
|
28
|
+
$SCRIPTPATH/include/simdjson/simdjson_version.h
|
29
|
+
$SCRIPTPATH/include/simdjson/portability.h
|
30
|
+
$SCRIPTPATH/include/simdjson/isadetection.h
|
31
|
+
$SCRIPTPATH/include/simdjson/simdjson.h
|
32
|
+
$SCRIPTPATH/include/simdjson/common_defs.h
|
33
|
+
$SCRIPTPATH/include/simdjson/padded_string.h
|
34
|
+
$SCRIPTPATH/include/simdjson/jsoncharutils.h
|
35
|
+
$SCRIPTPATH/include/simdjson/jsonformatutils.h
|
36
|
+
$SCRIPTPATH/include/simdjson/jsonioutil.h
|
37
|
+
$SCRIPTPATH/include/simdjson/simdprune_tables.h
|
38
|
+
$SCRIPTPATH/include/simdjson/simdutf8check_haswell.h
|
39
|
+
$SCRIPTPATH/include/simdjson/simdutf8check_westmere.h
|
40
|
+
$SCRIPTPATH/include/simdjson/simdutf8check_arm64.h
|
41
|
+
$SCRIPTPATH/include/simdjson/jsonminifier.h
|
42
|
+
$SCRIPTPATH/include/simdjson/parsedjson.h
|
43
|
+
$SCRIPTPATH/include/simdjson/stage1_find_marks.h
|
44
|
+
$SCRIPTPATH/include/simdjson/stage1_find_marks_flatten.h
|
45
|
+
$SCRIPTPATH/include/simdjson/stage1_find_marks_flatten_haswell.h
|
46
|
+
$SCRIPTPATH/include/simdjson/stage1_find_marks_macros.h
|
47
|
+
$SCRIPTPATH/include/simdjson/stage1_find_marks_westmere.h
|
48
|
+
$SCRIPTPATH/include/simdjson/stage1_find_marks_haswell.h
|
49
|
+
$SCRIPTPATH/include/simdjson/stage1_find_marks_arm64.h
|
50
|
+
$SCRIPTPATH/include/simdjson/stringparsing.h
|
51
|
+
$SCRIPTPATH/include/simdjson/stringparsing_macros.h
|
52
|
+
$SCRIPTPATH/include/simdjson/stringparsing_westmere.h
|
53
|
+
$SCRIPTPATH/include/simdjson/stringparsing_haswell.h
|
54
|
+
$SCRIPTPATH/include/simdjson/stringparsing_arm64.h
|
55
|
+
$SCRIPTPATH/include/simdjson/numberparsing.h
|
56
|
+
$SCRIPTPATH/include/simdjson/stage2_build_tape.h
|
57
|
+
$SCRIPTPATH/include/simdjson/jsonparser.h
|
58
|
+
"
|
59
|
+
|
60
|
+
for i in ${ALLCHEADERS} ${ALLCFILES}; do
|
61
|
+
test -e $i && continue
|
62
|
+
echo "FATAL: source file [$i] not found."
|
63
|
+
exit 127
|
64
|
+
done
|
65
|
+
|
66
|
+
|
67
|
+
function stripinc()
|
68
|
+
{
|
69
|
+
sed -e '/# *include *"/d' -e '/# *include *<simdjson\//d'
|
70
|
+
}
|
71
|
+
function dofile()
|
72
|
+
{
|
73
|
+
RELFILE=${1#"$SCRIPTPATH/"}
|
74
|
+
echo "/* begin file $RELFILE */"
|
75
|
+
# echo "#line 8 \"$1\"" ## redefining the line/file is not nearly as useful as it sounds for debugging. It breaks IDEs.
|
76
|
+
stripinc < $1
|
77
|
+
echo "/* end file $RELFILE */"
|
78
|
+
}
|
79
|
+
|
80
|
+
timestamp=$(date)
|
81
|
+
echo "Creating ${AMAL_H}..."
|
82
|
+
echo "/* auto-generated on ${timestamp}. Do not edit! */" > "${AMAL_H}"
|
83
|
+
{
|
84
|
+
for h in ${ALLCHEADERS}; do
|
85
|
+
dofile $h
|
86
|
+
done
|
87
|
+
} >> "${AMAL_H}"
|
88
|
+
|
89
|
+
|
90
|
+
echo "Creating ${AMAL_C}..."
|
91
|
+
echo "/* auto-generated on ${timestamp}. Do not edit! */" > "${AMAL_C}"
|
92
|
+
{
|
93
|
+
echo "#include \"${AMAL_H}\""
|
94
|
+
|
95
|
+
echo ""
|
96
|
+
echo "/* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */"
|
97
|
+
echo "#ifdef DMALLOC"
|
98
|
+
echo "#include \"dmalloc.h\""
|
99
|
+
echo "#endif"
|
100
|
+
echo ""
|
101
|
+
|
102
|
+
for h in ${ALLCFILES}; do
|
103
|
+
dofile $h
|
104
|
+
done
|
105
|
+
} >> "${AMAL_C}"
|
106
|
+
|
107
|
+
|
108
|
+
|
109
|
+
DEMOCPP="amalgamation_demo.cpp"
|
110
|
+
echo "Creating ${DEMOCPP}..."
|
111
|
+
echo "/* auto-generated on ${timestamp}. Do not edit! */" > "${DEMOCPP}"
|
112
|
+
cat <<< '
|
113
|
+
#include <iostream>
|
114
|
+
#include "simdjson.h"
|
115
|
+
#include "simdjson.cpp"
|
116
|
+
int main(int argc, char *argv[]) {
|
117
|
+
if(argc < 2) {
|
118
|
+
std::cerr << "Please specify a filename " << std::endl;
|
119
|
+
}
|
120
|
+
const char * filename = argv[1];
|
121
|
+
simdjson::padded_string p = simdjson::get_corpus(filename);
|
122
|
+
simdjson::ParsedJson pj = simdjson::build_parsed_json(p); // do the parsing
|
123
|
+
if( ! pj.is_valid() ) {
|
124
|
+
std::cout << "not valid" << std::endl;
|
125
|
+
} else {
|
126
|
+
std::cout << "valid" << std::endl;
|
127
|
+
}
|
128
|
+
return EXIT_SUCCESS;
|
129
|
+
}
|
130
|
+
' >> "${DEMOCPP}"
|
131
|
+
|
132
|
+
echo "Done with all files generation. "
|
133
|
+
|
134
|
+
echo "Files have been written to directory: $PWD "
|
135
|
+
ls -la ${AMAL_C} ${AMAL_H} ${DEMOCPP}
|
136
|
+
|
137
|
+
echo "Giving final instructions:"
|
138
|
+
|
139
|
+
|
140
|
+
CPPBIN=${DEMOCPP%%.*}
|
141
|
+
|
142
|
+
echo "Try :"
|
143
|
+
echo "c++ -O3 -std=c++17 -o ${CPPBIN} ${DEMOCPP} && ./${CPPBIN} ../jsonexamples/twitter.json "
|
144
|
+
|
145
|
+
SINGLEHDR=$SCRIPTPATH/singleheader
|
146
|
+
echo "Copying files to $SCRIPTPATH/singleheader "
|
147
|
+
mkdir -p $SINGLEHDR
|
148
|
+
echo "c++ -O3 -std=c++17 -o ${CPPBIN} ${DEMOCPP} && ./${CPPBIN} ../jsonexamples/twitter.json " > $SINGLEHDR/README.md
|
149
|
+
cp ${AMAL_C} ${AMAL_H} ${DEMOCPP} $SINGLEHDR
|
150
|
+
ls $SINGLEHDR
|
151
|
+
|
152
|
+
cd $SINGLEHDR && c++ -O3 -std=c++17 -o ${CPPBIN} ${DEMOCPP} && ./${CPPBIN} ../jsonexamples/twitter.json
|
153
|
+
|
154
|
+
lowercase(){
|
155
|
+
echo "$1" | tr 'A-Z' 'a-z'
|
156
|
+
}
|
157
|
+
|
158
|
+
OS=`lowercase \`uname\``
|
@@ -0,0 +1,223 @@
|
|
1
|
+
#ifndef _BENCHMARK_H_
|
2
|
+
#define _BENCHMARK_H_
|
3
|
+
#include <float.h>
|
4
|
+
#include <stdint.h>
|
5
|
+
#include <time.h>
|
6
|
+
#ifdef __x86_64__
|
7
|
+
|
8
|
+
const char *unitname = "cycles";
|
9
|
+
|
10
|
+
#define RDTSC_START(cycles) \
|
11
|
+
do { \
|
12
|
+
uint32_t cyc_high, cyc_low; \
|
13
|
+
__asm volatile("cpuid\n" \
|
14
|
+
"rdtsc\n" \
|
15
|
+
"mov %%edx, %0\n" \
|
16
|
+
"mov %%eax, %1" \
|
17
|
+
: "=r"(cyc_high), "=r"(cyc_low) \
|
18
|
+
: \
|
19
|
+
: /* no read only */ \
|
20
|
+
"%rax", "%rbx", "%rcx", "%rdx" /* clobbers */ \
|
21
|
+
); \
|
22
|
+
(cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
|
23
|
+
} while (0)
|
24
|
+
|
25
|
+
#define RDTSC_STOP(cycles) \
|
26
|
+
do { \
|
27
|
+
uint32_t cyc_high, cyc_low; \
|
28
|
+
__asm volatile("rdtscp\n" \
|
29
|
+
"mov %%edx, %0\n" \
|
30
|
+
"mov %%eax, %1\n" \
|
31
|
+
"cpuid" \
|
32
|
+
: "=r"(cyc_high), "=r"(cyc_low) \
|
33
|
+
: /* no read only registers */ \
|
34
|
+
: "%rax", "%rbx", "%rcx", "%rdx" /* clobbers */ \
|
35
|
+
); \
|
36
|
+
(cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
|
37
|
+
} while (0)
|
38
|
+
|
39
|
+
#else
|
40
|
+
const char *unitname = " (clock units) ";
|
41
|
+
|
42
|
+
#define RDTSC_START(cycles) \
|
43
|
+
do { \
|
44
|
+
cycles = clock(); \
|
45
|
+
} while (0)
|
46
|
+
|
47
|
+
#define RDTSC_STOP(cycles) \
|
48
|
+
do { \
|
49
|
+
cycles = clock(); \
|
50
|
+
} while (0)
|
51
|
+
#endif
|
52
|
+
|
53
|
+
static __attribute__((noinline)) uint64_t rdtsc_overhead_func(uint64_t dummy) {
|
54
|
+
return dummy;
|
55
|
+
}
|
56
|
+
|
57
|
+
uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
|
58
|
+
|
59
|
+
#define RDTSC_SET_OVERHEAD(test, repeat) \
|
60
|
+
do { \
|
61
|
+
uint64_t cycles_start, cycles_final, cycles_diff; \
|
62
|
+
uint64_t min_diff = UINT64_MAX; \
|
63
|
+
for (int i = 0; i < repeat; i++) { \
|
64
|
+
__asm volatile("" ::: /* pretend to clobber */ "memory"); \
|
65
|
+
RDTSC_START(cycles_start); \
|
66
|
+
test; \
|
67
|
+
RDTSC_STOP(cycles_final); \
|
68
|
+
cycles_diff = (cycles_final - cycles_start); \
|
69
|
+
if (cycles_diff < min_diff) \
|
70
|
+
min_diff = cycles_diff; \
|
71
|
+
} \
|
72
|
+
global_rdtsc_overhead = min_diff; \
|
73
|
+
} while (0)
|
74
|
+
|
75
|
+
double diff(timespec start, timespec end) {
|
76
|
+
return ((end.tv_nsec + 1000000000 * end.tv_sec) -
|
77
|
+
(start.tv_nsec + 1000000000 * start.tv_sec)) /
|
78
|
+
1000000000.0;
|
79
|
+
}
|
80
|
+
|
81
|
+
/*
|
82
|
+
* Prints the best number of operations per cycle where
|
83
|
+
* test is the function call, answer is the expected answer generated by
|
84
|
+
* test, repeat is the number of times we should repeat and size is the
|
85
|
+
* number of operations represented by test.
|
86
|
+
*/
|
87
|
+
#define BEST_TIME(name, test, expected, pre, repeat, size, verbose) \
|
88
|
+
do { \
|
89
|
+
if (global_rdtsc_overhead == UINT64_MAX) { \
|
90
|
+
RDTSC_SET_OVERHEAD(rdtsc_overhead_func(1), repeat); \
|
91
|
+
} \
|
92
|
+
if (verbose) \
|
93
|
+
printf("%-40s\t: ", name); \
|
94
|
+
else \
|
95
|
+
printf("\"%-40s\"", name); \
|
96
|
+
fflush(NULL); \
|
97
|
+
uint64_t cycles_start, cycles_final, cycles_diff; \
|
98
|
+
uint64_t min_diff = (uint64_t)-1; \
|
99
|
+
double min_sumclockdiff = DBL_MAX; \
|
100
|
+
uint64_t sum_diff = 0; \
|
101
|
+
double sumclockdiff = 0; \
|
102
|
+
struct timespec time1, time2; \
|
103
|
+
for (int i = 0; i < repeat; i++) { \
|
104
|
+
pre; \
|
105
|
+
__asm volatile("" ::: /* pretend to clobber */ "memory"); \
|
106
|
+
clock_gettime(CLOCK_REALTIME, &time1); \
|
107
|
+
RDTSC_START(cycles_start); \
|
108
|
+
if (test != expected) { \
|
109
|
+
fprintf(stderr, "not expected (%d , %d )", (int)test, (int)expected); \
|
110
|
+
break; \
|
111
|
+
} \
|
112
|
+
RDTSC_STOP(cycles_final); \
|
113
|
+
clock_gettime(CLOCK_REALTIME, &time2); \
|
114
|
+
double thistiming = diff(time1, time2); \
|
115
|
+
sumclockdiff += thistiming; \
|
116
|
+
if (thistiming < min_sumclockdiff) \
|
117
|
+
min_sumclockdiff = thistiming; \
|
118
|
+
cycles_diff = (cycles_final - cycles_start - global_rdtsc_overhead); \
|
119
|
+
if (cycles_diff < min_diff) \
|
120
|
+
min_diff = cycles_diff; \
|
121
|
+
sum_diff += cycles_diff; \
|
122
|
+
} \
|
123
|
+
uint64_t S = size; \
|
124
|
+
float cycle_per_op = (min_diff) / (double)S; \
|
125
|
+
float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \
|
126
|
+
double avg_gb_per_s = \
|
127
|
+
((double)S * repeat) / ((sumclockdiff)*1000.0 * 1000.0 * 1000.0); \
|
128
|
+
double max_gb_per_s = \
|
129
|
+
((double)S) / ((min_sumclockdiff)*1000.0 * 1000.0 * 1000.0); \
|
130
|
+
if (verbose) \
|
131
|
+
printf(" %7.3f %s per input byte (best) ", cycle_per_op, unitname); \
|
132
|
+
if (verbose) \
|
133
|
+
printf(" %7.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
|
134
|
+
if (verbose) \
|
135
|
+
printf(" %7.3f GB/s (error margin: %.3f GB/s)", max_gb_per_s, \
|
136
|
+
-avg_gb_per_s + max_gb_per_s); \
|
137
|
+
if (!verbose) \
|
138
|
+
printf(" %20.3f %20.3f %20.3f %20.3f ", cycle_per_op, \
|
139
|
+
avg_cycle_per_op - cycle_per_op, max_gb_per_s, \
|
140
|
+
-avg_gb_per_s + max_gb_per_s); \
|
141
|
+
printf("\n"); \
|
142
|
+
fflush(NULL); \
|
143
|
+
} while (0)
|
144
|
+
|
145
|
+
// like BEST_TIME, but no check
|
146
|
+
#define BEST_TIME_NOCHECK(name, test, pre, repeat, size, verbose) \
|
147
|
+
do { \
|
148
|
+
if (global_rdtsc_overhead == UINT64_MAX) { \
|
149
|
+
RDTSC_SET_OVERHEAD(rdtsc_overhead_func(1), repeat); \
|
150
|
+
} \
|
151
|
+
if (verbose) \
|
152
|
+
printf("%-40s\t: ", name); \
|
153
|
+
fflush(NULL); \
|
154
|
+
uint64_t cycles_start, cycles_final, cycles_diff; \
|
155
|
+
uint64_t min_diff = (uint64_t)-1; \
|
156
|
+
uint64_t sum_diff = 0; \
|
157
|
+
for (int i = 0; i < repeat; i++) { \
|
158
|
+
pre; \
|
159
|
+
__asm volatile("" ::: /* pretend to clobber */ "memory"); \
|
160
|
+
RDTSC_START(cycles_start); \
|
161
|
+
test; \
|
162
|
+
RDTSC_STOP(cycles_final); \
|
163
|
+
cycles_diff = (cycles_final - cycles_start - global_rdtsc_overhead); \
|
164
|
+
if (cycles_diff < min_diff) \
|
165
|
+
min_diff = cycles_diff; \
|
166
|
+
sum_diff += cycles_diff; \
|
167
|
+
} \
|
168
|
+
uint64_t S = size; \
|
169
|
+
float cycle_per_op = (min_diff) / (double)S; \
|
170
|
+
float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \
|
171
|
+
if (verbose) \
|
172
|
+
printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \
|
173
|
+
if (verbose) \
|
174
|
+
printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
|
175
|
+
if (verbose) \
|
176
|
+
printf("\n"); \
|
177
|
+
if (!verbose) \
|
178
|
+
printf(" %.3f ", cycle_per_op); \
|
179
|
+
fflush(NULL); \
|
180
|
+
} while (0)
|
181
|
+
|
182
|
+
// like BEST_TIME except that we run a function to check the result
|
183
|
+
#define BEST_TIME_CHECK(test, check, pre, repeat, size, verbose) \
|
184
|
+
do { \
|
185
|
+
if (global_rdtsc_overhead == UINT64_MAX) { \
|
186
|
+
RDTSC_SET_OVERHEAD(rdtsc_overhead_func(1), repeat); \
|
187
|
+
} \
|
188
|
+
if (verbose) \
|
189
|
+
printf("%-60s\t:\n", #test); \
|
190
|
+
fflush(NULL); \
|
191
|
+
uint64_t cycles_start, cycles_final, cycles_diff; \
|
192
|
+
uint64_t min_diff = (uint64_t)-1; \
|
193
|
+
uint64_t sum_diff = 0; \
|
194
|
+
for (int i = 0; i < repeat; i++) { \
|
195
|
+
pre; \
|
196
|
+
__asm volatile("" ::: /* pretend to clobber */ "memory"); \
|
197
|
+
RDTSC_START(cycles_start); \
|
198
|
+
test; \
|
199
|
+
RDTSC_STOP(cycles_final); \
|
200
|
+
if (!check) { \
|
201
|
+
printf("error"); \
|
202
|
+
break; \
|
203
|
+
} \
|
204
|
+
cycles_diff = (cycles_final - cycles_start - global_rdtsc_overhead); \
|
205
|
+
if (cycles_diff < min_diff) \
|
206
|
+
min_diff = cycles_diff; \
|
207
|
+
sum_diff += cycles_diff; \
|
208
|
+
} \
|
209
|
+
uint64_t S = size; \
|
210
|
+
float cycle_per_op = (min_diff) / (double)S; \
|
211
|
+
float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \
|
212
|
+
if (verbose) \
|
213
|
+
printf(" %.3f cycles per operation (best) ", cycle_per_op); \
|
214
|
+
if (verbose) \
|
215
|
+
printf("\t%.3f cycles per operation (avg) ", avg_cycle_per_op); \
|
216
|
+
if (verbose) \
|
217
|
+
printf("\n"); \
|
218
|
+
if (!verbose) \
|
219
|
+
printf(" %.3f ", cycle_per_op); \
|
220
|
+
fflush(NULL); \
|
221
|
+
} while (0)
|
222
|
+
|
223
|
+
#endif
|