simdjson 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. checksums.yaml +7 -0
  2. data/.clang-format +5 -0
  3. data/.gitignore +14 -0
  4. data/.gitmodules +3 -0
  5. data/.rubocop.yml +9 -0
  6. data/.travis.yml +7 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +39 -0
  10. data/Rakefile +32 -0
  11. data/benchmark/apache_builds.json +4421 -0
  12. data/benchmark/demo.json +15 -0
  13. data/benchmark/github_events.json +1390 -0
  14. data/benchmark/run_benchmark.rb +30 -0
  15. data/ext/simdjson/extconf.rb +22 -0
  16. data/ext/simdjson/simdjson.cpp +76 -0
  17. data/ext/simdjson/simdjson.hpp +6 -0
  18. data/lib/simdjson/version.rb +3 -0
  19. data/lib/simdjson.rb +2 -0
  20. data/simdjson.gemspec +35 -0
  21. data/vendor/.gitkeep +0 -0
  22. data/vendor/simdjson/AUTHORS +3 -0
  23. data/vendor/simdjson/CMakeLists.txt +63 -0
  24. data/vendor/simdjson/CONTRIBUTORS +27 -0
  25. data/vendor/simdjson/Dockerfile +10 -0
  26. data/vendor/simdjson/LICENSE +201 -0
  27. data/vendor/simdjson/Makefile +203 -0
  28. data/vendor/simdjson/Notes.md +85 -0
  29. data/vendor/simdjson/README.md +581 -0
  30. data/vendor/simdjson/amalgamation.sh +158 -0
  31. data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
  32. data/vendor/simdjson/benchmark/benchmark.h +223 -0
  33. data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
  34. data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
  35. data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
  36. data/vendor/simdjson/benchmark/parse.cpp +393 -0
  37. data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
  38. data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
  39. data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
  40. data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
  41. data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
  42. data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
  43. data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
  44. data/vendor/simdjson/doc/gbps.png +0 -0
  45. data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
  46. data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
  47. data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
  48. data/vendor/simdjson/images/halvarflake.png +0 -0
  49. data/vendor/simdjson/images/logo.png +0 -0
  50. data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
  51. data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
  52. data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
  53. data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
  54. data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
  55. data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
  56. data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
  57. data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
  58. data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
  59. data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
  60. data/vendor/simdjson/include/simdjson/portability.h +172 -0
  61. data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
  62. data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
  63. data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
  64. data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
  65. data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
  66. data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
  67. data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
  68. data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
  69. data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
  70. data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
  71. data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
  72. data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
  73. data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
  74. data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
  75. data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
  76. data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
  77. data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
  78. data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
  79. data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
  80. data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
  81. data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
  82. data/vendor/simdjson/scripts/issue150.sh +14 -0
  83. data/vendor/simdjson/scripts/javascript/README.md +3 -0
  84. data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
  85. data/vendor/simdjson/scripts/minifier.sh +11 -0
  86. data/vendor/simdjson/scripts/parseandstat.sh +24 -0
  87. data/vendor/simdjson/scripts/parser.sh +11 -0
  88. data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
  89. data/vendor/simdjson/scripts/plotparse.sh +98 -0
  90. data/vendor/simdjson/scripts/selectparser.sh +11 -0
  91. data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
  92. data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
  93. data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
  94. data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
  95. data/vendor/simdjson/scripts/testjson2json.sh +99 -0
  96. data/vendor/simdjson/scripts/transitions/Makefile +10 -0
  97. data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
  98. data/vendor/simdjson/singleheader/README.md +1 -0
  99. data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
  100. data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
  101. data/vendor/simdjson/singleheader/simdjson.h +39692 -0
  102. data/vendor/simdjson/src/CMakeLists.txt +67 -0
  103. data/vendor/simdjson/src/jsonioutil.cpp +35 -0
  104. data/vendor/simdjson/src/jsonminifier.cpp +285 -0
  105. data/vendor/simdjson/src/jsonparser.cpp +91 -0
  106. data/vendor/simdjson/src/parsedjson.cpp +323 -0
  107. data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
  108. data/vendor/simdjson/src/simdjson.cpp +30 -0
  109. data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
  110. data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
  111. data/vendor/simdjson/style/clang-format-check.sh +25 -0
  112. data/vendor/simdjson/style/clang-format.sh +25 -0
  113. data/vendor/simdjson/style/run-clang-format.py +326 -0
  114. data/vendor/simdjson/tape.md +134 -0
  115. data/vendor/simdjson/tests/CMakeLists.txt +25 -0
  116. data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
  117. data/vendor/simdjson/tests/basictests.cpp +75 -0
  118. data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
  119. data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
  120. data/vendor/simdjson/tests/pointercheck.cpp +38 -0
  121. data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
  122. data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
  123. data/vendor/simdjson/tools/CMakeLists.txt +3 -0
  124. data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
  125. data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
  126. data/vendor/simdjson/tools/json2json.cpp +112 -0
  127. data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
  128. data/vendor/simdjson/tools/jsonstats.cpp +143 -0
  129. data/vendor/simdjson/tools/minify.cpp +21 -0
  130. data/vendor/simdjson/tools/release.py +125 -0
  131. data/vendor/simdjson/windows/dirent_portable.h +1043 -0
  132. metadata +273 -0
@@ -0,0 +1,158 @@
1
+ #!/bin/bash
2
+ ########################################################################
3
+ # Generates an "amalgamation build" for roaring. Inspired by similar
4
+ # script used by whefs.
5
+ ########################################################################
6
+ SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
7
+
8
+ echo "We are about to amalgamate all simdjson files into one source file. "
9
+ echo "See https://www.sqlite.org/amalgamation.html and https://en.wikipedia.org/wiki/Single_Compilation_Unit for rationale. "
10
+
11
+ AMAL_H="simdjson.h"
12
+ AMAL_C="simdjson.cpp"
13
+
14
+ # order does not matter
15
+ ALLCFILES="
16
+ $SCRIPTPATH/src/simdjson.cpp
17
+ $SCRIPTPATH/src/jsonioutil.cpp
18
+ $SCRIPTPATH/src/jsonminifier.cpp
19
+ $SCRIPTPATH/src/jsonparser.cpp
20
+ $SCRIPTPATH/src/stage1_find_marks.cpp
21
+ $SCRIPTPATH/src/stage2_build_tape.cpp
22
+ $SCRIPTPATH/src/parsedjson.cpp
23
+ $SCRIPTPATH/src/parsedjsoniterator.cpp
24
+ "
25
+
26
+ # order matters
27
+ ALLCHEADERS="
28
+ $SCRIPTPATH/include/simdjson/simdjson_version.h
29
+ $SCRIPTPATH/include/simdjson/portability.h
30
+ $SCRIPTPATH/include/simdjson/isadetection.h
31
+ $SCRIPTPATH/include/simdjson/simdjson.h
32
+ $SCRIPTPATH/include/simdjson/common_defs.h
33
+ $SCRIPTPATH/include/simdjson/padded_string.h
34
+ $SCRIPTPATH/include/simdjson/jsoncharutils.h
35
+ $SCRIPTPATH/include/simdjson/jsonformatutils.h
36
+ $SCRIPTPATH/include/simdjson/jsonioutil.h
37
+ $SCRIPTPATH/include/simdjson/simdprune_tables.h
38
+ $SCRIPTPATH/include/simdjson/simdutf8check_haswell.h
39
+ $SCRIPTPATH/include/simdjson/simdutf8check_westmere.h
40
+ $SCRIPTPATH/include/simdjson/simdutf8check_arm64.h
41
+ $SCRIPTPATH/include/simdjson/jsonminifier.h
42
+ $SCRIPTPATH/include/simdjson/parsedjson.h
43
+ $SCRIPTPATH/include/simdjson/stage1_find_marks.h
44
+ $SCRIPTPATH/include/simdjson/stage1_find_marks_flatten.h
45
+ $SCRIPTPATH/include/simdjson/stage1_find_marks_flatten_haswell.h
46
+ $SCRIPTPATH/include/simdjson/stage1_find_marks_macros.h
47
+ $SCRIPTPATH/include/simdjson/stage1_find_marks_westmere.h
48
+ $SCRIPTPATH/include/simdjson/stage1_find_marks_haswell.h
49
+ $SCRIPTPATH/include/simdjson/stage1_find_marks_arm64.h
50
+ $SCRIPTPATH/include/simdjson/stringparsing.h
51
+ $SCRIPTPATH/include/simdjson/stringparsing_macros.h
52
+ $SCRIPTPATH/include/simdjson/stringparsing_westmere.h
53
+ $SCRIPTPATH/include/simdjson/stringparsing_haswell.h
54
+ $SCRIPTPATH/include/simdjson/stringparsing_arm64.h
55
+ $SCRIPTPATH/include/simdjson/numberparsing.h
56
+ $SCRIPTPATH/include/simdjson/stage2_build_tape.h
57
+ $SCRIPTPATH/include/simdjson/jsonparser.h
58
+ "
59
+
60
+ for i in ${ALLCHEADERS} ${ALLCFILES}; do
61
+ test -e $i && continue
62
+ echo "FATAL: source file [$i] not found."
63
+ exit 127
64
+ done
65
+
66
+
67
+ function stripinc()
68
+ {
69
+ sed -e '/# *include *"/d' -e '/# *include *<simdjson\//d'
70
+ }
71
+ function dofile()
72
+ {
73
+ RELFILE=${1#"$SCRIPTPATH/"}
74
+ echo "/* begin file $RELFILE */"
75
+ # echo "#line 8 \"$1\"" ## redefining the line/file is not nearly as useful as it sounds for debugging. It breaks IDEs.
76
+ stripinc < $1
77
+ echo "/* end file $RELFILE */"
78
+ }
79
+
80
+ timestamp=$(date)
81
+ echo "Creating ${AMAL_H}..."
82
+ echo "/* auto-generated on ${timestamp}. Do not edit! */" > "${AMAL_H}"
83
+ {
84
+ for h in ${ALLCHEADERS}; do
85
+ dofile $h
86
+ done
87
+ } >> "${AMAL_H}"
88
+
89
+
90
+ echo "Creating ${AMAL_C}..."
91
+ echo "/* auto-generated on ${timestamp}. Do not edit! */" > "${AMAL_C}"
92
+ {
93
+ echo "#include \"${AMAL_H}\""
94
+
95
+ echo ""
96
+ echo "/* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */"
97
+ echo "#ifdef DMALLOC"
98
+ echo "#include \"dmalloc.h\""
99
+ echo "#endif"
100
+ echo ""
101
+
102
+ for h in ${ALLCFILES}; do
103
+ dofile $h
104
+ done
105
+ } >> "${AMAL_C}"
106
+
107
+
108
+
109
+ DEMOCPP="amalgamation_demo.cpp"
110
+ echo "Creating ${DEMOCPP}..."
111
+ echo "/* auto-generated on ${timestamp}. Do not edit! */" > "${DEMOCPP}"
112
+ cat <<< '
113
+ #include <iostream>
114
+ #include "simdjson.h"
115
+ #include "simdjson.cpp"
116
+ int main(int argc, char *argv[]) {
117
+ if(argc < 2) {
118
+ std::cerr << "Please specify a filename " << std::endl;
119
+ }
120
+ const char * filename = argv[1];
121
+ simdjson::padded_string p = simdjson::get_corpus(filename);
122
+ simdjson::ParsedJson pj = simdjson::build_parsed_json(p); // do the parsing
123
+ if( ! pj.is_valid() ) {
124
+ std::cout << "not valid" << std::endl;
125
+ } else {
126
+ std::cout << "valid" << std::endl;
127
+ }
128
+ return EXIT_SUCCESS;
129
+ }
130
+ ' >> "${DEMOCPP}"
131
+
132
+ echo "Done with all files generation. "
133
+
134
+ echo "Files have been written to directory: $PWD "
135
+ ls -la ${AMAL_C} ${AMAL_H} ${DEMOCPP}
136
+
137
+ echo "Giving final instructions:"
138
+
139
+
140
+ CPPBIN=${DEMOCPP%%.*}
141
+
142
+ echo "Try :"
143
+ echo "c++ -O3 -std=c++17 -o ${CPPBIN} ${DEMOCPP} && ./${CPPBIN} ../jsonexamples/twitter.json "
144
+
145
+ SINGLEHDR=$SCRIPTPATH/singleheader
146
+ echo "Copying files to $SCRIPTPATH/singleheader "
147
+ mkdir -p $SINGLEHDR
148
+ echo "c++ -O3 -std=c++17 -o ${CPPBIN} ${DEMOCPP} && ./${CPPBIN} ../jsonexamples/twitter.json " > $SINGLEHDR/README.md
149
+ cp ${AMAL_C} ${AMAL_H} ${DEMOCPP} $SINGLEHDR
150
+ ls $SINGLEHDR
151
+
152
+ cd $SINGLEHDR && c++ -O3 -std=c++17 -o ${CPPBIN} ${DEMOCPP} && ./${CPPBIN} ../jsonexamples/twitter.json
153
+
154
+ lowercase(){
155
+ echo "$1" | tr 'A-Z' 'a-z'
156
+ }
157
+
158
+ OS=`lowercase \`uname\``
@@ -0,0 +1,8 @@
1
+ target_include_directories(${SIMDJSON_LIB_NAME}
2
+ INTERFACE
3
+ $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/benchmark>
4
+ $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/benchmark/linux>
5
+ )
6
+
7
+ add_cpp_benchmark(parse)
8
+ add_cpp_benchmark(statisticalmodel)
@@ -0,0 +1,223 @@
1
+ #ifndef _BENCHMARK_H_
2
+ #define _BENCHMARK_H_
3
+ #include <float.h>
4
+ #include <stdint.h>
5
+ #include <time.h>
6
+ #ifdef __x86_64__
7
+
8
+ const char *unitname = "cycles";
9
+
10
+ #define RDTSC_START(cycles) \
11
+ do { \
12
+ uint32_t cyc_high, cyc_low; \
13
+ __asm volatile("cpuid\n" \
14
+ "rdtsc\n" \
15
+ "mov %%edx, %0\n" \
16
+ "mov %%eax, %1" \
17
+ : "=r"(cyc_high), "=r"(cyc_low) \
18
+ : \
19
+ : /* no read only */ \
20
+ "%rax", "%rbx", "%rcx", "%rdx" /* clobbers */ \
21
+ ); \
22
+ (cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
23
+ } while (0)
24
+
25
+ #define RDTSC_STOP(cycles) \
26
+ do { \
27
+ uint32_t cyc_high, cyc_low; \
28
+ __asm volatile("rdtscp\n" \
29
+ "mov %%edx, %0\n" \
30
+ "mov %%eax, %1\n" \
31
+ "cpuid" \
32
+ : "=r"(cyc_high), "=r"(cyc_low) \
33
+ : /* no read only registers */ \
34
+ : "%rax", "%rbx", "%rcx", "%rdx" /* clobbers */ \
35
+ ); \
36
+ (cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
37
+ } while (0)
38
+
39
+ #else
40
+ const char *unitname = " (clock units) ";
41
+
42
+ #define RDTSC_START(cycles) \
43
+ do { \
44
+ cycles = clock(); \
45
+ } while (0)
46
+
47
+ #define RDTSC_STOP(cycles) \
48
+ do { \
49
+ cycles = clock(); \
50
+ } while (0)
51
+ #endif
52
+
53
+ static __attribute__((noinline)) uint64_t rdtsc_overhead_func(uint64_t dummy) {
54
+ return dummy;
55
+ }
56
+
57
+ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
58
+
59
+ #define RDTSC_SET_OVERHEAD(test, repeat) \
60
+ do { \
61
+ uint64_t cycles_start, cycles_final, cycles_diff; \
62
+ uint64_t min_diff = UINT64_MAX; \
63
+ for (int i = 0; i < repeat; i++) { \
64
+ __asm volatile("" ::: /* pretend to clobber */ "memory"); \
65
+ RDTSC_START(cycles_start); \
66
+ test; \
67
+ RDTSC_STOP(cycles_final); \
68
+ cycles_diff = (cycles_final - cycles_start); \
69
+ if (cycles_diff < min_diff) \
70
+ min_diff = cycles_diff; \
71
+ } \
72
+ global_rdtsc_overhead = min_diff; \
73
+ } while (0)
74
+
75
+ double diff(timespec start, timespec end) {
76
+ return ((end.tv_nsec + 1000000000 * end.tv_sec) -
77
+ (start.tv_nsec + 1000000000 * start.tv_sec)) /
78
+ 1000000000.0;
79
+ }
80
+
81
+ /*
82
+ * Prints the best number of operations per cycle where
83
+ * test is the function call, answer is the expected answer generated by
84
+ * test, repeat is the number of times we should repeat and size is the
85
+ * number of operations represented by test.
86
+ */
87
+ #define BEST_TIME(name, test, expected, pre, repeat, size, verbose) \
88
+ do { \
89
+ if (global_rdtsc_overhead == UINT64_MAX) { \
90
+ RDTSC_SET_OVERHEAD(rdtsc_overhead_func(1), repeat); \
91
+ } \
92
+ if (verbose) \
93
+ printf("%-40s\t: ", name); \
94
+ else \
95
+ printf("\"%-40s\"", name); \
96
+ fflush(NULL); \
97
+ uint64_t cycles_start, cycles_final, cycles_diff; \
98
+ uint64_t min_diff = (uint64_t)-1; \
99
+ double min_sumclockdiff = DBL_MAX; \
100
+ uint64_t sum_diff = 0; \
101
+ double sumclockdiff = 0; \
102
+ struct timespec time1, time2; \
103
+ for (int i = 0; i < repeat; i++) { \
104
+ pre; \
105
+ __asm volatile("" ::: /* pretend to clobber */ "memory"); \
106
+ clock_gettime(CLOCK_REALTIME, &time1); \
107
+ RDTSC_START(cycles_start); \
108
+ if (test != expected) { \
109
+ fprintf(stderr, "not expected (%d , %d )", (int)test, (int)expected); \
110
+ break; \
111
+ } \
112
+ RDTSC_STOP(cycles_final); \
113
+ clock_gettime(CLOCK_REALTIME, &time2); \
114
+ double thistiming = diff(time1, time2); \
115
+ sumclockdiff += thistiming; \
116
+ if (thistiming < min_sumclockdiff) \
117
+ min_sumclockdiff = thistiming; \
118
+ cycles_diff = (cycles_final - cycles_start - global_rdtsc_overhead); \
119
+ if (cycles_diff < min_diff) \
120
+ min_diff = cycles_diff; \
121
+ sum_diff += cycles_diff; \
122
+ } \
123
+ uint64_t S = size; \
124
+ float cycle_per_op = (min_diff) / (double)S; \
125
+ float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \
126
+ double avg_gb_per_s = \
127
+ ((double)S * repeat) / ((sumclockdiff)*1000.0 * 1000.0 * 1000.0); \
128
+ double max_gb_per_s = \
129
+ ((double)S) / ((min_sumclockdiff)*1000.0 * 1000.0 * 1000.0); \
130
+ if (verbose) \
131
+ printf(" %7.3f %s per input byte (best) ", cycle_per_op, unitname); \
132
+ if (verbose) \
133
+ printf(" %7.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
134
+ if (verbose) \
135
+ printf(" %7.3f GB/s (error margin: %.3f GB/s)", max_gb_per_s, \
136
+ -avg_gb_per_s + max_gb_per_s); \
137
+ if (!verbose) \
138
+ printf(" %20.3f %20.3f %20.3f %20.3f ", cycle_per_op, \
139
+ avg_cycle_per_op - cycle_per_op, max_gb_per_s, \
140
+ -avg_gb_per_s + max_gb_per_s); \
141
+ printf("\n"); \
142
+ fflush(NULL); \
143
+ } while (0)
144
+
145
+ // like BEST_TIME, but no check
146
+ #define BEST_TIME_NOCHECK(name, test, pre, repeat, size, verbose) \
147
+ do { \
148
+ if (global_rdtsc_overhead == UINT64_MAX) { \
149
+ RDTSC_SET_OVERHEAD(rdtsc_overhead_func(1), repeat); \
150
+ } \
151
+ if (verbose) \
152
+ printf("%-40s\t: ", name); \
153
+ fflush(NULL); \
154
+ uint64_t cycles_start, cycles_final, cycles_diff; \
155
+ uint64_t min_diff = (uint64_t)-1; \
156
+ uint64_t sum_diff = 0; \
157
+ for (int i = 0; i < repeat; i++) { \
158
+ pre; \
159
+ __asm volatile("" ::: /* pretend to clobber */ "memory"); \
160
+ RDTSC_START(cycles_start); \
161
+ test; \
162
+ RDTSC_STOP(cycles_final); \
163
+ cycles_diff = (cycles_final - cycles_start - global_rdtsc_overhead); \
164
+ if (cycles_diff < min_diff) \
165
+ min_diff = cycles_diff; \
166
+ sum_diff += cycles_diff; \
167
+ } \
168
+ uint64_t S = size; \
169
+ float cycle_per_op = (min_diff) / (double)S; \
170
+ float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \
171
+ if (verbose) \
172
+ printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \
173
+ if (verbose) \
174
+ printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
175
+ if (verbose) \
176
+ printf("\n"); \
177
+ if (!verbose) \
178
+ printf(" %.3f ", cycle_per_op); \
179
+ fflush(NULL); \
180
+ } while (0)
181
+
182
+ // like BEST_TIME except that we run a function to check the result
183
+ #define BEST_TIME_CHECK(test, check, pre, repeat, size, verbose) \
184
+ do { \
185
+ if (global_rdtsc_overhead == UINT64_MAX) { \
186
+ RDTSC_SET_OVERHEAD(rdtsc_overhead_func(1), repeat); \
187
+ } \
188
+ if (verbose) \
189
+ printf("%-60s\t:\n", #test); \
190
+ fflush(NULL); \
191
+ uint64_t cycles_start, cycles_final, cycles_diff; \
192
+ uint64_t min_diff = (uint64_t)-1; \
193
+ uint64_t sum_diff = 0; \
194
+ for (int i = 0; i < repeat; i++) { \
195
+ pre; \
196
+ __asm volatile("" ::: /* pretend to clobber */ "memory"); \
197
+ RDTSC_START(cycles_start); \
198
+ test; \
199
+ RDTSC_STOP(cycles_final); \
200
+ if (!check) { \
201
+ printf("error"); \
202
+ break; \
203
+ } \
204
+ cycles_diff = (cycles_final - cycles_start - global_rdtsc_overhead); \
205
+ if (cycles_diff < min_diff) \
206
+ min_diff = cycles_diff; \
207
+ sum_diff += cycles_diff; \
208
+ } \
209
+ uint64_t S = size; \
210
+ float cycle_per_op = (min_diff) / (double)S; \
211
+ float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \
212
+ if (verbose) \
213
+ printf(" %.3f cycles per operation (best) ", cycle_per_op); \
214
+ if (verbose) \
215
+ printf("\t%.3f cycles per operation (avg) ", avg_cycle_per_op); \
216
+ if (verbose) \
217
+ printf("\n"); \
218
+ if (!verbose) \
219
+ printf(" %.3f ", cycle_per_op); \
220
+ fflush(NULL); \
221
+ } while (0)
222
+
223
+ #endif