simdjson 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. checksums.yaml +7 -0
  2. data/.clang-format +5 -0
  3. data/.gitignore +14 -0
  4. data/.gitmodules +3 -0
  5. data/.rubocop.yml +9 -0
  6. data/.travis.yml +7 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +39 -0
  10. data/Rakefile +32 -0
  11. data/benchmark/apache_builds.json +4421 -0
  12. data/benchmark/demo.json +15 -0
  13. data/benchmark/github_events.json +1390 -0
  14. data/benchmark/run_benchmark.rb +30 -0
  15. data/ext/simdjson/extconf.rb +22 -0
  16. data/ext/simdjson/simdjson.cpp +76 -0
  17. data/ext/simdjson/simdjson.hpp +6 -0
  18. data/lib/simdjson/version.rb +3 -0
  19. data/lib/simdjson.rb +2 -0
  20. data/simdjson.gemspec +35 -0
  21. data/vendor/.gitkeep +0 -0
  22. data/vendor/simdjson/AUTHORS +3 -0
  23. data/vendor/simdjson/CMakeLists.txt +63 -0
  24. data/vendor/simdjson/CONTRIBUTORS +27 -0
  25. data/vendor/simdjson/Dockerfile +10 -0
  26. data/vendor/simdjson/LICENSE +201 -0
  27. data/vendor/simdjson/Makefile +203 -0
  28. data/vendor/simdjson/Notes.md +85 -0
  29. data/vendor/simdjson/README.md +581 -0
  30. data/vendor/simdjson/amalgamation.sh +158 -0
  31. data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
  32. data/vendor/simdjson/benchmark/benchmark.h +223 -0
  33. data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
  34. data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
  35. data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
  36. data/vendor/simdjson/benchmark/parse.cpp +393 -0
  37. data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
  38. data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
  39. data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
  40. data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
  41. data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
  42. data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
  43. data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
  44. data/vendor/simdjson/doc/gbps.png +0 -0
  45. data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
  46. data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
  47. data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
  48. data/vendor/simdjson/images/halvarflake.png +0 -0
  49. data/vendor/simdjson/images/logo.png +0 -0
  50. data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
  51. data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
  52. data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
  53. data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
  54. data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
  55. data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
  56. data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
  57. data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
  58. data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
  59. data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
  60. data/vendor/simdjson/include/simdjson/portability.h +172 -0
  61. data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
  62. data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
  63. data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
  64. data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
  65. data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
  66. data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
  67. data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
  68. data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
  69. data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
  70. data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
  71. data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
  72. data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
  73. data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
  74. data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
  75. data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
  76. data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
  77. data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
  78. data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
  79. data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
  80. data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
  81. data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
  82. data/vendor/simdjson/scripts/issue150.sh +14 -0
  83. data/vendor/simdjson/scripts/javascript/README.md +3 -0
  84. data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
  85. data/vendor/simdjson/scripts/minifier.sh +11 -0
  86. data/vendor/simdjson/scripts/parseandstat.sh +24 -0
  87. data/vendor/simdjson/scripts/parser.sh +11 -0
  88. data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
  89. data/vendor/simdjson/scripts/plotparse.sh +98 -0
  90. data/vendor/simdjson/scripts/selectparser.sh +11 -0
  91. data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
  92. data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
  93. data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
  94. data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
  95. data/vendor/simdjson/scripts/testjson2json.sh +99 -0
  96. data/vendor/simdjson/scripts/transitions/Makefile +10 -0
  97. data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
  98. data/vendor/simdjson/singleheader/README.md +1 -0
  99. data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
  100. data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
  101. data/vendor/simdjson/singleheader/simdjson.h +39692 -0
  102. data/vendor/simdjson/src/CMakeLists.txt +67 -0
  103. data/vendor/simdjson/src/jsonioutil.cpp +35 -0
  104. data/vendor/simdjson/src/jsonminifier.cpp +285 -0
  105. data/vendor/simdjson/src/jsonparser.cpp +91 -0
  106. data/vendor/simdjson/src/parsedjson.cpp +323 -0
  107. data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
  108. data/vendor/simdjson/src/simdjson.cpp +30 -0
  109. data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
  110. data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
  111. data/vendor/simdjson/style/clang-format-check.sh +25 -0
  112. data/vendor/simdjson/style/clang-format.sh +25 -0
  113. data/vendor/simdjson/style/run-clang-format.py +326 -0
  114. data/vendor/simdjson/tape.md +134 -0
  115. data/vendor/simdjson/tests/CMakeLists.txt +25 -0
  116. data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
  117. data/vendor/simdjson/tests/basictests.cpp +75 -0
  118. data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
  119. data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
  120. data/vendor/simdjson/tests/pointercheck.cpp +38 -0
  121. data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
  122. data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
  123. data/vendor/simdjson/tools/CMakeLists.txt +3 -0
  124. data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
  125. data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
  126. data/vendor/simdjson/tools/json2json.cpp +112 -0
  127. data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
  128. data/vendor/simdjson/tools/jsonstats.cpp +143 -0
  129. data/vendor/simdjson/tools/minify.cpp +21 -0
  130. data/vendor/simdjson/tools/release.py +125 -0
  131. data/vendor/simdjson/windows/dirent_portable.h +1043 -0
  132. metadata +273 -0
@@ -0,0 +1,172 @@
1
+ #ifndef SIMDJSON_PORTABILITY_H
2
+ #define SIMDJSON_PORTABILITY_H
3
+
4
+ #if defined(__x86_64__) || defined(_M_AMD64)
5
+ #define IS_X86_64 1
6
+ #endif
7
+ #if defined(__aarch64__) || defined(_M_ARM64)
8
+ #define IS_ARM64 1
9
+ #endif
10
+
11
+ // this is almost standard?
12
+ #define STRINGIFY(a) #a
13
+
14
+ // we are going to use runtime dispatch
15
+ #ifdef IS_X86_64
16
+ #ifdef __clang__
17
+ // clang does not have GCC push pop
18
+ // warning: clang attribute push can't be used within a namespace in clang up
19
+ // til 8.0 so TARGET_REGION and UNTARGET_REGION must be *outside* of a
20
+ // namespace.
21
+ #define TARGET_REGION(T) \
22
+ _Pragma(STRINGIFY( \
23
+ clang attribute push(__attribute__((target(T))), apply_to = function)))
24
+ #define UNTARGET_REGION _Pragma("clang attribute pop")
25
+ #elif defined(__GNUC__)
26
+ // GCC is easier
27
+ #define TARGET_REGION(T) \
28
+ _Pragma("GCC push_options") _Pragma(STRINGIFY(GCC target(T)))
29
+ #define UNTARGET_REGION _Pragma("GCC pop_options")
30
+ #else
31
+ #define TARGET_REGION(T)
32
+ #define UNTARGET_REGION
33
+ #endif // clang then gcc
34
+
35
+ // under GCC and CLANG, we use these two macros
36
+ #define TARGET_HASWELL TARGET_REGION("avx2,bmi,pclmul")
37
+ #define TARGET_WESTMERE TARGET_REGION("sse4.2,pclmul")
38
+
39
+ #endif // x86
40
+
41
+ #ifdef _MSC_VER
42
+ #include <intrin.h>
43
+ #else
44
+ #if IS_X86_64
45
+ #include <x86intrin.h>
46
+ #elif IS_ARM64
47
+ #include <arm_neon.h>
48
+ #endif
49
+ #endif
50
+
51
+ #ifdef _MSC_VER
52
+ /* Microsoft C/C++-compatible compiler */
53
+ #include <cstdint>
54
+ #include <iso646.h>
55
+
56
+ namespace simdjson {
57
+ static inline bool add_overflow(uint64_t value1, uint64_t value2,
58
+ uint64_t *result) {
59
+ return _addcarry_u64(0, value1, value2,
60
+ reinterpret_cast<unsigned __int64 *>(result));
61
+ }
62
+
63
+ #pragma intrinsic(_umul128)
64
+ static inline bool mul_overflow(uint64_t value1, uint64_t value2,
65
+ uint64_t *result) {
66
+ uint64_t high;
67
+ *result = _umul128(value1, value2, &high);
68
+ return high;
69
+ }
70
+
71
+ static inline int trailing_zeroes(uint64_t input_num) {
72
+ return static_cast<int>(_tzcnt_u64(input_num));
73
+ }
74
+
75
+ static inline int leading_zeroes(uint64_t input_num) {
76
+ return static_cast<int>(_lzcnt_u64(input_num));
77
+ }
78
+
79
+ static inline int hamming(uint64_t input_num) {
80
+ #ifdef _WIN64 // highly recommended!!!
81
+ return (int)__popcnt64(input_num);
82
+ #else // if we must support 32-bit Windows
83
+ return (int)(__popcnt((uint32_t)input_num) +
84
+ __popcnt((uint32_t)(input_num >> 32)));
85
+ #endif
86
+ }
87
+ } // namespace simdjson
88
+ #else
89
+ #include <cstdint>
90
+ #include <cstdlib>
91
+
92
+ namespace simdjson {
93
+ static inline bool add_overflow(uint64_t value1, uint64_t value2,
94
+ uint64_t *result) {
95
+ return __builtin_uaddll_overflow(value1, value2,
96
+ (unsigned long long *)result);
97
+ }
98
+ static inline bool mul_overflow(uint64_t value1, uint64_t value2,
99
+ uint64_t *result) {
100
+ return __builtin_umulll_overflow(value1, value2,
101
+ (unsigned long long *)result);
102
+ }
103
+
104
+ /* result might be undefined when input_num is zero */
105
+ static inline int trailing_zeroes(uint64_t input_num) {
106
+ #ifdef __BMI__ // tzcnt is BMI1
107
+ return _tzcnt_u64(input_num);
108
+ #else
109
+ return __builtin_ctzll(input_num);
110
+ #endif
111
+ }
112
+
113
+ /* result might be undefined when input_num is zero */
114
+ static inline int leading_zeroes(uint64_t input_num) {
115
+ #ifdef __BMI2__
116
+ return _lzcnt_u64(input_num);
117
+ #else
118
+ return __builtin_clzll(input_num);
119
+ #endif
120
+ }
121
+
122
+ /* result might be undefined when input_num is zero */
123
+ static inline int hamming(uint64_t input_num) {
124
+ #ifdef __POPCOUNT__
125
+ return _popcnt64(input_num);
126
+ #else
127
+ return __builtin_popcountll(input_num);
128
+ #endif
129
+ }
130
+ } // namespace simdjson
131
+ #endif // _MSC_VER
132
+
133
+ namespace simdjson {
134
+ // portable version of posix_memalign
135
+ static inline void *aligned_malloc(size_t alignment, size_t size) {
136
+ void *p;
137
+ #ifdef _MSC_VER
138
+ p = _aligned_malloc(size, alignment);
139
+ #elif defined(__MINGW32__) || defined(__MINGW64__)
140
+ p = __mingw_aligned_malloc(size, alignment);
141
+ #else
142
+ // somehow, if this is used before including "x86intrin.h", it creates an
143
+ // implicit defined warning.
144
+ if (posix_memalign(&p, alignment, size) != 0) {
145
+ return nullptr;
146
+ }
147
+ #endif
148
+ return p;
149
+ }
150
+
151
+ static inline char *aligned_malloc_char(size_t alignment, size_t size) {
152
+ return (char *)aligned_malloc(alignment, size);
153
+ }
154
+
155
+ static inline void aligned_free(void *mem_block) {
156
+ if (mem_block == nullptr) {
157
+ return;
158
+ }
159
+ #ifdef _MSC_VER
160
+ _aligned_free(mem_block);
161
+ #elif defined(__MINGW32__) || defined(__MINGW64__)
162
+ __mingw_aligned_free(mem_block);
163
+ #else
164
+ free(mem_block);
165
+ #endif
166
+ }
167
+
168
+ static inline void aligned_free_char(char *mem_block) {
169
+ aligned_free((void *)mem_block);
170
+ }
171
+ } // namespace simdjson
172
+ #endif // SIMDJSON_PORTABILITY_H
@@ -0,0 +1,44 @@
1
+ #ifndef SIMDJSON_ERR_H
2
+ #define SIMDJSON_ERR_H
3
+
4
+ #include <string>
5
+
6
+ namespace simdjson {
7
+ // Represents the minimal architecture that would support an implementation
8
+ enum class Architecture {
9
+ WESTMERE,
10
+ HASWELL,
11
+ ARM64,
12
+ NONE,
13
+ // TODO remove 'native' in favor of runtime dispatch?
14
+ // the 'native' enum class value should point at a good default on the current
15
+ // machine
16
+ #ifdef IS_X86_64
17
+ NATIVE = WESTMERE
18
+ #elif defined(IS_ARM64)
19
+ NATIVE = ARM64
20
+ #endif
21
+ };
22
+
23
+ enum ErrorValues {
24
+ SUCCESS = 0,
25
+ CAPACITY, // This ParsedJson can't support a document that big
26
+ MEMALLOC, // Error allocating memory, most likely out of memory
27
+ TAPE_ERROR, // Something went wrong while writing to the tape (stage 2), this
28
+ // is a generic error
29
+ DEPTH_ERROR, // Your document exceeds the user-specified depth limitation
30
+ STRING_ERROR, // Problem while parsing a string
31
+ T_ATOM_ERROR, // Problem while parsing an atom starting with the letter 't'
32
+ F_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'f'
33
+ N_ATOM_ERROR, // Problem while parsing an atom starting with the letter 'n'
34
+ NUMBER_ERROR, // Problem while parsing a number
35
+ UTF8_ERROR, // the input is not valid UTF-8
36
+ UNITIALIZED, // unknown error, or uninitialized document
37
+ EMPTY, // no structural document found
38
+ UNESCAPED_CHARS, // found unescaped characters in a string.
39
+ UNCLOSED_STRING, // missing quote at the end
40
+ UNEXPECTED_ERROR // indicative of a bug in simdjson
41
+ };
42
+ const std::string &error_message(const int);
43
+ } // namespace simdjson
44
+ #endif
@@ -0,0 +1,13 @@
1
+ // /include/simdjson/simdjson_version.h automatically generated by release.py,
2
+ // do not change by hand
3
+ #ifndef SIMDJSON_INCLUDE_SIMDJSON_VERSION
4
+ #define SIMDJSON_INCLUDE_SIMDJSON_VERSION
5
+ #define SIMDJSON_VERSION 0.2.1
6
+ namespace simdjson {
7
+ enum {
8
+ SIMDJSON_VERSION_MAJOR = 0,
9
+ SIMDJSON_VERSION_MINOR = 2,
10
+ SIMDJSON_VERSION_REVISION = 1
11
+ };
12
+ }
13
+ #endif // SIMDJSON_INCLUDE_SIMDJSON_VERSION