simdjson 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (132) hide show
  1. checksums.yaml +7 -0
  2. data/.clang-format +5 -0
  3. data/.gitignore +14 -0
  4. data/.gitmodules +3 -0
  5. data/.rubocop.yml +9 -0
  6. data/.travis.yml +7 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +39 -0
  10. data/Rakefile +32 -0
  11. data/benchmark/apache_builds.json +4421 -0
  12. data/benchmark/demo.json +15 -0
  13. data/benchmark/github_events.json +1390 -0
  14. data/benchmark/run_benchmark.rb +30 -0
  15. data/ext/simdjson/extconf.rb +22 -0
  16. data/ext/simdjson/simdjson.cpp +76 -0
  17. data/ext/simdjson/simdjson.hpp +6 -0
  18. data/lib/simdjson/version.rb +3 -0
  19. data/lib/simdjson.rb +2 -0
  20. data/simdjson.gemspec +35 -0
  21. data/vendor/.gitkeep +0 -0
  22. data/vendor/simdjson/AUTHORS +3 -0
  23. data/vendor/simdjson/CMakeLists.txt +63 -0
  24. data/vendor/simdjson/CONTRIBUTORS +27 -0
  25. data/vendor/simdjson/Dockerfile +10 -0
  26. data/vendor/simdjson/LICENSE +201 -0
  27. data/vendor/simdjson/Makefile +203 -0
  28. data/vendor/simdjson/Notes.md +85 -0
  29. data/vendor/simdjson/README.md +581 -0
  30. data/vendor/simdjson/amalgamation.sh +158 -0
  31. data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
  32. data/vendor/simdjson/benchmark/benchmark.h +223 -0
  33. data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
  34. data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
  35. data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
  36. data/vendor/simdjson/benchmark/parse.cpp +393 -0
  37. data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
  38. data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
  39. data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
  40. data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
  41. data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
  42. data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
  43. data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
  44. data/vendor/simdjson/doc/gbps.png +0 -0
  45. data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
  46. data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
  47. data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
  48. data/vendor/simdjson/images/halvarflake.png +0 -0
  49. data/vendor/simdjson/images/logo.png +0 -0
  50. data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
  51. data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
  52. data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
  53. data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
  54. data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
  55. data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
  56. data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
  57. data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
  58. data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
  59. data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
  60. data/vendor/simdjson/include/simdjson/portability.h +172 -0
  61. data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
  62. data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
  63. data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
  64. data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
  65. data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
  66. data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
  67. data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
  68. data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
  69. data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
  70. data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
  71. data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
  72. data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
  73. data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
  74. data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
  75. data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
  76. data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
  77. data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
  78. data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
  79. data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
  80. data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
  81. data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
  82. data/vendor/simdjson/scripts/issue150.sh +14 -0
  83. data/vendor/simdjson/scripts/javascript/README.md +3 -0
  84. data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
  85. data/vendor/simdjson/scripts/minifier.sh +11 -0
  86. data/vendor/simdjson/scripts/parseandstat.sh +24 -0
  87. data/vendor/simdjson/scripts/parser.sh +11 -0
  88. data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
  89. data/vendor/simdjson/scripts/plotparse.sh +98 -0
  90. data/vendor/simdjson/scripts/selectparser.sh +11 -0
  91. data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
  92. data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
  93. data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
  94. data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
  95. data/vendor/simdjson/scripts/testjson2json.sh +99 -0
  96. data/vendor/simdjson/scripts/transitions/Makefile +10 -0
  97. data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
  98. data/vendor/simdjson/singleheader/README.md +1 -0
  99. data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
  100. data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
  101. data/vendor/simdjson/singleheader/simdjson.h +39692 -0
  102. data/vendor/simdjson/src/CMakeLists.txt +67 -0
  103. data/vendor/simdjson/src/jsonioutil.cpp +35 -0
  104. data/vendor/simdjson/src/jsonminifier.cpp +285 -0
  105. data/vendor/simdjson/src/jsonparser.cpp +91 -0
  106. data/vendor/simdjson/src/parsedjson.cpp +323 -0
  107. data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
  108. data/vendor/simdjson/src/simdjson.cpp +30 -0
  109. data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
  110. data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
  111. data/vendor/simdjson/style/clang-format-check.sh +25 -0
  112. data/vendor/simdjson/style/clang-format.sh +25 -0
  113. data/vendor/simdjson/style/run-clang-format.py +326 -0
  114. data/vendor/simdjson/tape.md +134 -0
  115. data/vendor/simdjson/tests/CMakeLists.txt +25 -0
  116. data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
  117. data/vendor/simdjson/tests/basictests.cpp +75 -0
  118. data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
  119. data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
  120. data/vendor/simdjson/tests/pointercheck.cpp +38 -0
  121. data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
  122. data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
  123. data/vendor/simdjson/tools/CMakeLists.txt +3 -0
  124. data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
  125. data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
  126. data/vendor/simdjson/tools/json2json.cpp +112 -0
  127. data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
  128. data/vendor/simdjson/tools/jsonstats.cpp +143 -0
  129. data/vendor/simdjson/tools/minify.cpp +21 -0
  130. data/vendor/simdjson/tools/release.py +125 -0
  131. data/vendor/simdjson/windows/dirent_portable.h +1043 -0
  132. metadata +273 -0
@@ -0,0 +1,180 @@
1
+ // From https://github.com/cyb70289/utf8/blob/master/lemire-neon.c
2
+ // Adapted from https://github.com/lemire/fastvalidate-utf-8
3
+
4
+ #ifndef SIMDJSON_SIMDUTF8CHECK_ARM64_H
5
+ #define SIMDJSON_SIMDUTF8CHECK_ARM64_H
6
+
7
+ #if defined(_ARM_NEON) || defined(__aarch64__) || \
8
+ (defined(_MSC_VER) && defined(_M_ARM64))
9
+
10
+ #include <arm_neon.h>
11
+ #include <cinttypes>
12
+ #include <cstddef>
13
+ #include <cstdint>
14
+ #include <cstdio>
15
+ #include <cstring>
16
+
17
+ /*
18
+ * legal utf-8 byte sequence
19
+ * http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
20
+ *
21
+ * Code Points 1st 2s 3s 4s
22
+ * U+0000..U+007F 00..7F
23
+ * U+0080..U+07FF C2..DF 80..BF
24
+ * U+0800..U+0FFF E0 A0..BF 80..BF
25
+ * U+1000..U+CFFF E1..EC 80..BF 80..BF
26
+ * U+D000..U+D7FF ED 80..9F 80..BF
27
+ * U+E000..U+FFFF EE..EF 80..BF 80..BF
28
+ * U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
29
+ * U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
30
+ * U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
31
+ *
32
+ */
33
+ namespace simdjson {
34
+
35
+ // all byte values must be no larger than 0xF4
36
+ static inline void check_smaller_than_0xF4(int8x16_t current_bytes,
37
+ int8x16_t *has_error) {
38
+ // unsigned, saturates to 0 below max
39
+ *has_error = vorrq_s8(
40
+ *has_error, vreinterpretq_s8_u8(vqsubq_u8(
41
+ vreinterpretq_u8_s8(current_bytes), vdupq_n_u8(0xF4))));
42
+ }
43
+
44
+ static const int8_t _nibbles[] = {
45
+ 1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII)
46
+ 0, 0, 0, 0, // 10xx (continuation)
47
+ 2, 2, // 110x
48
+ 3, // 1110
49
+ 4, // 1111, next should be 0 (not checked here)
50
+ };
51
+
52
+ static inline int8x16_t continuation_lengths(int8x16_t high_nibbles) {
53
+ return vqtbl1q_s8(vld1q_s8(_nibbles), vreinterpretq_u8_s8(high_nibbles));
54
+ }
55
+
56
+ static inline int8x16_t carry_continuations(int8x16_t initial_lengths,
57
+ int8x16_t previous_carries) {
58
+
59
+ int8x16_t right1 = vreinterpretq_s8_u8(vqsubq_u8(
60
+ vreinterpretq_u8_s8(vextq_s8(previous_carries, initial_lengths, 16 - 1)),
61
+ vdupq_n_u8(1)));
62
+ int8x16_t sum = vaddq_s8(initial_lengths, right1);
63
+
64
+ int8x16_t right2 = vreinterpretq_s8_u8(
65
+ vqsubq_u8(vreinterpretq_u8_s8(vextq_s8(previous_carries, sum, 16 - 2)),
66
+ vdupq_n_u8(2)));
67
+ return vaddq_s8(sum, right2);
68
+ }
69
+
70
+ static inline void check_continuations(int8x16_t initial_lengths,
71
+ int8x16_t carries,
72
+ int8x16_t *has_error) {
73
+
74
+ // overlap || underlap
75
+ // carry > length && length > 0 || !(carry > length) && !(length > 0)
76
+ // (carries > length) == (lengths > 0)
77
+ uint8x16_t overunder = vceqq_u8(vcgtq_s8(carries, initial_lengths),
78
+ vcgtq_s8(initial_lengths, vdupq_n_s8(0)));
79
+
80
+ *has_error = vorrq_s8(*has_error, vreinterpretq_s8_u8(overunder));
81
+ }
82
+
83
+ // when 0xED is found, next byte must be no larger than 0x9F
84
+ // when 0xF4 is found, next byte must be no larger than 0x8F
85
+ // next byte must be continuation, ie sign bit is set, so signed < is ok
86
+ static inline void check_first_continuation_max(int8x16_t current_bytes,
87
+ int8x16_t off1_current_bytes,
88
+ int8x16_t *has_error) {
89
+ uint8x16_t maskED = vceqq_s8(off1_current_bytes, vdupq_n_s8(0xED));
90
+ uint8x16_t maskF4 = vceqq_s8(off1_current_bytes, vdupq_n_s8(0xF4));
91
+
92
+ uint8x16_t badfollowED =
93
+ vandq_u8(vcgtq_s8(current_bytes, vdupq_n_s8(0x9F)), maskED);
94
+ uint8x16_t badfollowF4 =
95
+ vandq_u8(vcgtq_s8(current_bytes, vdupq_n_s8(0x8F)), maskF4);
96
+
97
+ *has_error = vorrq_s8(
98
+ *has_error, vreinterpretq_s8_u8(vorrq_u8(badfollowED, badfollowF4)));
99
+ }
100
+
101
+ static const int8_t _initial_mins[] = {
102
+ -128, -128, -128, -128, -128, -128,
103
+ -128, -128, -128, -128, -128, -128, // 10xx => false
104
+ (int8_t)0xC2, -128, // 110x
105
+ (int8_t)0xE1, // 1110
106
+ (int8_t)0xF1,
107
+ };
108
+
109
+ static const int8_t _second_mins[] = {
110
+ -128, -128, -128, -128, -128, -128,
111
+ -128, -128, -128, -128, -128, -128, // 10xx => false
112
+ 127, 127, // 110x => true
113
+ (int8_t)0xA0, // 1110
114
+ (int8_t)0x90,
115
+ };
116
+
117
+ // map off1_hibits => error condition
118
+ // hibits off1 cur
119
+ // C => < C2 && true
120
+ // E => < E1 && < A0
121
+ // F => < F1 && < 90
122
+ // else false && false
123
+ static inline void check_overlong(int8x16_t current_bytes,
124
+ int8x16_t off1_current_bytes,
125
+ int8x16_t hibits, int8x16_t previous_hibits,
126
+ int8x16_t *has_error) {
127
+ int8x16_t off1_hibits = vextq_s8(previous_hibits, hibits, 16 - 1);
128
+ int8x16_t initial_mins =
129
+ vqtbl1q_s8(vld1q_s8(_initial_mins), vreinterpretq_u8_s8(off1_hibits));
130
+
131
+ uint8x16_t initial_under = vcgtq_s8(initial_mins, off1_current_bytes);
132
+
133
+ int8x16_t second_mins =
134
+ vqtbl1q_s8(vld1q_s8(_second_mins), vreinterpretq_u8_s8(off1_hibits));
135
+ uint8x16_t second_under = vcgtq_s8(second_mins, current_bytes);
136
+ *has_error = vorrq_s8(
137
+ *has_error, vreinterpretq_s8_u8(vandq_u8(initial_under, second_under)));
138
+ }
139
+
140
+ struct processed_utf_bytes {
141
+ int8x16_t raw_bytes;
142
+ int8x16_t high_nibbles;
143
+ int8x16_t carried_continuations;
144
+ };
145
+
146
+ static inline void count_nibbles(int8x16_t bytes,
147
+ struct processed_utf_bytes *answer) {
148
+ answer->raw_bytes = bytes;
149
+ answer->high_nibbles =
150
+ vreinterpretq_s8_u8(vshrq_n_u8(vreinterpretq_u8_s8(bytes), 4));
151
+ }
152
+
153
+ // check whether the current bytes are valid UTF-8
154
+ // at the end of the function, previous gets updated
155
+ static inline struct processed_utf_bytes
156
+ check_utf8_bytes(int8x16_t current_bytes, struct processed_utf_bytes *previous,
157
+ int8x16_t *has_error) {
158
+ struct processed_utf_bytes pb;
159
+ count_nibbles(current_bytes, &pb);
160
+
161
+ check_smaller_than_0xF4(current_bytes, has_error);
162
+
163
+ int8x16_t initial_lengths = continuation_lengths(pb.high_nibbles);
164
+
165
+ pb.carried_continuations =
166
+ carry_continuations(initial_lengths, previous->carried_continuations);
167
+
168
+ check_continuations(initial_lengths, pb.carried_continuations, has_error);
169
+
170
+ int8x16_t off1_current_bytes =
171
+ vextq_s8(previous->raw_bytes, pb.raw_bytes, 16 - 1);
172
+ check_first_continuation_max(current_bytes, off1_current_bytes, has_error);
173
+
174
+ check_overlong(current_bytes, off1_current_bytes, pb.high_nibbles,
175
+ previous->high_nibbles, has_error);
176
+ return pb;
177
+ }
178
+ } // namespace simdjson
179
+ #endif
180
+ #endif
@@ -0,0 +1,198 @@
1
+ #ifndef SIMDJSON_SIMDUTF8CHECK_HASWELL_H
2
+ #define SIMDJSON_SIMDUTF8CHECK_HASWELL_H
3
+
4
+ #include "simdjson/portability.h"
5
+ #include <stddef.h>
6
+ #include <stdint.h>
7
+ #include <string.h>
8
+
9
+ #ifdef IS_X86_64
10
+ /*
11
+ * legal utf-8 byte sequence
12
+ * http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
13
+ *
14
+ * Code Points 1st 2s 3s 4s
15
+ * U+0000..U+007F 00..7F
16
+ * U+0080..U+07FF C2..DF 80..BF
17
+ * U+0800..U+0FFF E0 A0..BF 80..BF
18
+ * U+1000..U+CFFF E1..EC 80..BF 80..BF
19
+ * U+D000..U+D7FF ED 80..9F 80..BF
20
+ * U+E000..U+FFFF EE..EF 80..BF 80..BF
21
+ * U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
22
+ * U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
23
+ * U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
24
+ *
25
+ */
26
+
27
+ // all byte values must be no larger than 0xF4
28
+
29
+ TARGET_HASWELL
30
+ namespace simdjson {
31
+ static inline __m256i push_last_byte_of_a_to_b(__m256i a, __m256i b) {
32
+ return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 15);
33
+ }
34
+
35
+ static inline __m256i push_last_2bytes_of_a_to_b(__m256i a, __m256i b) {
36
+ return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 14);
37
+ }
38
+
39
+ // all byte values must be no larger than 0xF4
40
+ static inline void avx_check_smaller_than_0xF4(__m256i current_bytes,
41
+ __m256i *has_error) {
42
+ // unsigned, saturates to 0 below max
43
+ *has_error = _mm256_or_si256(
44
+ *has_error, _mm256_subs_epu8(current_bytes, _mm256_set1_epi8(0xF4u)));
45
+ }
46
+
47
+ static inline __m256i avx_continuation_lengths(__m256i high_nibbles) {
48
+ return _mm256_shuffle_epi8(
49
+ _mm256_setr_epi8(1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII)
50
+ 0, 0, 0, 0, // 10xx (continuation)
51
+ 2, 2, // 110x
52
+ 3, // 1110
53
+ 4, // 1111, next should be 0 (not checked here)
54
+ 1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII)
55
+ 0, 0, 0, 0, // 10xx (continuation)
56
+ 2, 2, // 110x
57
+ 3, // 1110
58
+ 4 // 1111, next should be 0 (not checked here)
59
+ ),
60
+ high_nibbles);
61
+ }
62
+
63
+ static inline __m256i avx_carry_continuations(__m256i initial_lengths,
64
+ __m256i previous_carries) {
65
+
66
+ __m256i right1 = _mm256_subs_epu8(
67
+ push_last_byte_of_a_to_b(previous_carries, initial_lengths),
68
+ _mm256_set1_epi8(1));
69
+ __m256i sum = _mm256_add_epi8(initial_lengths, right1);
70
+
71
+ __m256i right2 = _mm256_subs_epu8(
72
+ push_last_2bytes_of_a_to_b(previous_carries, sum), _mm256_set1_epi8(2));
73
+ return _mm256_add_epi8(sum, right2);
74
+ }
75
+
76
+ static inline void avx_check_continuations(__m256i initial_lengths,
77
+ __m256i carries,
78
+ __m256i *has_error) {
79
+
80
+ // overlap || underlap
81
+ // carry > length && length > 0 || !(carry > length) && !(length > 0)
82
+ // (carries > length) == (lengths > 0)
83
+ __m256i overunder = _mm256_cmpeq_epi8(
84
+ _mm256_cmpgt_epi8(carries, initial_lengths),
85
+ _mm256_cmpgt_epi8(initial_lengths, _mm256_setzero_si256()));
86
+
87
+ *has_error = _mm256_or_si256(*has_error, overunder);
88
+ }
89
+
90
+ // when 0xED is found, next byte must be no larger than 0x9F
91
+ // when 0xF4 is found, next byte must be no larger than 0x8F
92
+ // next byte must be continuation, ie sign bit is set, so signed < is ok
93
+ static inline void avx_check_first_continuation_max(__m256i current_bytes,
94
+ __m256i off1_current_bytes,
95
+ __m256i *has_error) {
96
+ __m256i maskED =
97
+ _mm256_cmpeq_epi8(off1_current_bytes, _mm256_set1_epi8(0xEDu));
98
+ __m256i maskF4 =
99
+ _mm256_cmpeq_epi8(off1_current_bytes, _mm256_set1_epi8(0xF4u));
100
+
101
+ __m256i badfollowED = _mm256_and_si256(
102
+ _mm256_cmpgt_epi8(current_bytes, _mm256_set1_epi8(0x9Fu)), maskED);
103
+ __m256i badfollowF4 = _mm256_and_si256(
104
+ _mm256_cmpgt_epi8(current_bytes, _mm256_set1_epi8(0x8Fu)), maskF4);
105
+
106
+ *has_error =
107
+ _mm256_or_si256(*has_error, _mm256_or_si256(badfollowED, badfollowF4));
108
+ }
109
+
110
+ // map off1_hibits => error condition
111
+ // hibits off1 cur
112
+ // C => < C2 && true
113
+ // E => < E1 && < A0
114
+ // F => < F1 && < 90
115
+ // else false && false
116
+ static inline void avx_check_overlong(__m256i current_bytes,
117
+ __m256i off1_current_bytes,
118
+ __m256i hibits, __m256i previous_hibits,
119
+ __m256i *has_error) {
120
+ __m256i off1_hibits = push_last_byte_of_a_to_b(previous_hibits, hibits);
121
+ __m256i initial_mins = _mm256_shuffle_epi8(
122
+ _mm256_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128,
123
+ -128, -128, -128, // 10xx => false
124
+ 0xC2u, -128, // 110x
125
+ 0xE1u, // 1110
126
+ 0xF1u, // 1111
127
+ -128, -128, -128, -128, -128, -128, -128, -128, -128,
128
+ -128, -128, -128, // 10xx => false
129
+ 0xC2u, -128, // 110x
130
+ 0xE1u, // 1110
131
+ 0xF1u), // 1111
132
+ off1_hibits);
133
+
134
+ __m256i initial_under = _mm256_cmpgt_epi8(initial_mins, off1_current_bytes);
135
+
136
+ __m256i second_mins = _mm256_shuffle_epi8(
137
+ _mm256_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128,
138
+ -128, -128, -128, // 10xx => false
139
+ 127, 127, // 110x => true
140
+ 0xA0u, // 1110
141
+ 0x90u, // 1111
142
+ -128, -128, -128, -128, -128, -128, -128, -128, -128,
143
+ -128, -128, -128, // 10xx => false
144
+ 127, 127, // 110x => true
145
+ 0xA0u, // 1110
146
+ 0x90u), // 1111
147
+ off1_hibits);
148
+ __m256i second_under = _mm256_cmpgt_epi8(second_mins, current_bytes);
149
+ *has_error = _mm256_or_si256(*has_error,
150
+ _mm256_and_si256(initial_under, second_under));
151
+ }
152
+
153
+ struct avx_processed_utf_bytes {
154
+ __m256i raw_bytes;
155
+ __m256i high_nibbles;
156
+ __m256i carried_continuations;
157
+ };
158
+
159
+ static inline void avx_count_nibbles(__m256i bytes,
160
+ struct avx_processed_utf_bytes *answer) {
161
+ answer->raw_bytes = bytes;
162
+ answer->high_nibbles =
163
+ _mm256_and_si256(_mm256_srli_epi16(bytes, 4), _mm256_set1_epi8(0x0F));
164
+ }
165
+
166
+ // check whether the current bytes are valid UTF-8
167
+ // at the end of the function, previous gets updated
168
+ static inline struct avx_processed_utf_bytes
169
+ avx_check_utf8_bytes(__m256i current_bytes,
170
+ struct avx_processed_utf_bytes *previous,
171
+ __m256i *has_error) {
172
+ struct avx_processed_utf_bytes pb {};
173
+ avx_count_nibbles(current_bytes, &pb);
174
+
175
+ avx_check_smaller_than_0xF4(current_bytes, has_error);
176
+
177
+ __m256i initial_lengths = avx_continuation_lengths(pb.high_nibbles);
178
+
179
+ pb.carried_continuations =
180
+ avx_carry_continuations(initial_lengths, previous->carried_continuations);
181
+
182
+ avx_check_continuations(initial_lengths, pb.carried_continuations, has_error);
183
+
184
+ __m256i off1_current_bytes =
185
+ push_last_byte_of_a_to_b(previous->raw_bytes, pb.raw_bytes);
186
+ avx_check_first_continuation_max(current_bytes, off1_current_bytes,
187
+ has_error);
188
+
189
+ avx_check_overlong(current_bytes, off1_current_bytes, pb.high_nibbles,
190
+ previous->high_nibbles, has_error);
191
+ return pb;
192
+ }
193
+ } // namespace simdjson
194
+ UNTARGET_REGION // haswell
195
+
196
+ #endif // IS_X86_64
197
+
198
+ #endif
@@ -0,0 +1,169 @@
1
+ #ifndef SIMDJSON_SIMDUTF8CHECK_WESTMERE_H
2
+ #define SIMDJSON_SIMDUTF8CHECK_WESTMERE_H
3
+
4
+ #include "simdjson/portability.h"
5
+ #include <stddef.h>
6
+ #include <stdint.h>
7
+ #include <string.h>
8
+ #ifdef IS_X86_64
9
+
10
+ /*
11
+ * legal utf-8 byte sequence
12
+ * http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
13
+ *
14
+ * Code Points 1st 2s 3s 4s
15
+ * U+0000..U+007F 00..7F
16
+ * U+0080..U+07FF C2..DF 80..BF
17
+ * U+0800..U+0FFF E0 A0..BF 80..BF
18
+ * U+1000..U+CFFF E1..EC 80..BF 80..BF
19
+ * U+D000..U+D7FF ED 80..9F 80..BF
20
+ * U+E000..U+FFFF EE..EF 80..BF 80..BF
21
+ * U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
22
+ * U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
23
+ * U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
24
+ *
25
+ */
26
+
27
+ // all byte values must be no larger than 0xF4
28
+
29
+ /********** sse code **********/
30
+ TARGET_WESTMERE
31
+
32
+ namespace simdjson {
33
+ // all byte values must be no larger than 0xF4
34
+ static inline void check_smaller_than_0xF4(__m128i current_bytes,
35
+ __m128i *has_error) {
36
+ // unsigned, saturates to 0 below max
37
+ *has_error = _mm_or_si128(*has_error,
38
+ _mm_subs_epu8(current_bytes, _mm_set1_epi8(0xF4u)));
39
+ }
40
+
41
+ static inline __m128i continuation_lengths(__m128i high_nibbles) {
42
+ return _mm_shuffle_epi8(
43
+ _mm_setr_epi8(1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII)
44
+ 0, 0, 0, 0, // 10xx (continuation)
45
+ 2, 2, // 110x
46
+ 3, // 1110
47
+ 4), // 1111, next should be 0 (not checked here)
48
+ high_nibbles);
49
+ }
50
+
51
+ static inline __m128i carry_continuations(__m128i initial_lengths,
52
+ __m128i previous_carries) {
53
+
54
+ __m128i right1 =
55
+ _mm_subs_epu8(_mm_alignr_epi8(initial_lengths, previous_carries, 16 - 1),
56
+ _mm_set1_epi8(1));
57
+ __m128i sum = _mm_add_epi8(initial_lengths, right1);
58
+
59
+ __m128i right2 = _mm_subs_epu8(_mm_alignr_epi8(sum, previous_carries, 16 - 2),
60
+ _mm_set1_epi8(2));
61
+ return _mm_add_epi8(sum, right2);
62
+ }
63
+
64
+ static inline void check_continuations(__m128i initial_lengths, __m128i carries,
65
+ __m128i *has_error) {
66
+
67
+ // overlap || underlap
68
+ // carry > length && length > 0 || !(carry > length) && !(length > 0)
69
+ // (carries > length) == (lengths > 0)
70
+ __m128i overunder =
71
+ _mm_cmpeq_epi8(_mm_cmpgt_epi8(carries, initial_lengths),
72
+ _mm_cmpgt_epi8(initial_lengths, _mm_setzero_si128()));
73
+
74
+ *has_error = _mm_or_si128(*has_error, overunder);
75
+ }
76
+
77
+ // when 0xED is found, next byte must be no larger than 0x9F
78
+ // when 0xF4 is found, next byte must be no larger than 0x8F
79
+ // next byte must be continuation, ie sign bit is set, so signed < is ok
80
+ static inline void check_first_continuation_max(__m128i current_bytes,
81
+ __m128i off1_current_bytes,
82
+ __m128i *has_error) {
83
+ __m128i maskED = _mm_cmpeq_epi8(off1_current_bytes, _mm_set1_epi8(0xEDu));
84
+ __m128i maskF4 = _mm_cmpeq_epi8(off1_current_bytes, _mm_set1_epi8(0xF4u));
85
+
86
+ __m128i badfollowED = _mm_and_si128(
87
+ _mm_cmpgt_epi8(current_bytes, _mm_set1_epi8(0x9Fu)), maskED);
88
+ __m128i badfollowF4 = _mm_and_si128(
89
+ _mm_cmpgt_epi8(current_bytes, _mm_set1_epi8(0x8Fu)), maskF4);
90
+
91
+ *has_error = _mm_or_si128(*has_error, _mm_or_si128(badfollowED, badfollowF4));
92
+ }
93
+
94
+ // map off1_hibits => error condition
95
+ // hibits off1 cur
96
+ // C => < C2 && true
97
+ // E => < E1 && < A0
98
+ // F => < F1 && < 90
99
+ // else false && false
100
+ static inline void check_overlong(__m128i current_bytes,
101
+ __m128i off1_current_bytes, __m128i hibits,
102
+ __m128i previous_hibits, __m128i *has_error) {
103
+ __m128i off1_hibits = _mm_alignr_epi8(hibits, previous_hibits, 16 - 1);
104
+ __m128i initial_mins = _mm_shuffle_epi8(
105
+ _mm_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
106
+ -128, -128, // 10xx => false
107
+ 0xC2u, -128, // 110x
108
+ 0xE1u, // 1110
109
+ 0xF1u),
110
+ off1_hibits);
111
+
112
+ __m128i initial_under = _mm_cmpgt_epi8(initial_mins, off1_current_bytes);
113
+
114
+ __m128i second_mins = _mm_shuffle_epi8(
115
+ _mm_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
116
+ -128, -128, // 10xx => false
117
+ 127, 127, // 110x => true
118
+ 0xA0u, // 1110
119
+ 0x90u),
120
+ off1_hibits);
121
+ __m128i second_under = _mm_cmpgt_epi8(second_mins, current_bytes);
122
+ *has_error =
123
+ _mm_or_si128(*has_error, _mm_and_si128(initial_under, second_under));
124
+ }
125
+
126
+ struct processed_utf_bytes {
127
+ __m128i raw_bytes;
128
+ __m128i high_nibbles;
129
+ __m128i carried_continuations;
130
+ };
131
+
132
+ static inline void count_nibbles(__m128i bytes,
133
+ struct processed_utf_bytes *answer) {
134
+ answer->raw_bytes = bytes;
135
+ answer->high_nibbles =
136
+ _mm_and_si128(_mm_srli_epi16(bytes, 4), _mm_set1_epi8(0x0F));
137
+ }
138
+
139
+ // check whether the current bytes are valid UTF-8
140
+ // at the end of the function, previous gets updated
141
+ static struct processed_utf_bytes
142
+ check_utf8_bytes(__m128i current_bytes, struct processed_utf_bytes *previous,
143
+ __m128i *has_error) {
144
+ struct processed_utf_bytes pb;
145
+ count_nibbles(current_bytes, &pb);
146
+
147
+ check_smaller_than_0xF4(current_bytes, has_error);
148
+
149
+ __m128i initial_lengths = continuation_lengths(pb.high_nibbles);
150
+
151
+ pb.carried_continuations =
152
+ carry_continuations(initial_lengths, previous->carried_continuations);
153
+
154
+ check_continuations(initial_lengths, pb.carried_continuations, has_error);
155
+
156
+ __m128i off1_current_bytes =
157
+ _mm_alignr_epi8(pb.raw_bytes, previous->raw_bytes, 16 - 1);
158
+ check_first_continuation_max(current_bytes, off1_current_bytes, has_error);
159
+
160
+ check_overlong(current_bytes, off1_current_bytes, pb.high_nibbles,
161
+ previous->high_nibbles, has_error);
162
+ return pb;
163
+ }
164
+ } // namespace simdjson
165
+ UNTARGET_REGION // westmere
166
+
167
+ #endif // IS_X86_64
168
+
169
+ #endif
@@ -0,0 +1,121 @@
1
+ #ifndef SIMDJSON_STAGE1_FIND_MARKS_H
2
+ #define SIMDJSON_STAGE1_FIND_MARKS_H
3
+
4
+ #include "simdjson/common_defs.h"
5
+ #include "simdjson/parsedjson.h"
6
+ #include "simdjson/portability.h"
7
+ #include "simdjson/simdjson.h"
8
+ #include <cassert>
9
+
10
+ namespace simdjson {
11
+
12
+ template <Architecture> struct simd_input;
13
+
14
+ template <Architecture> uint64_t compute_quote_mask(uint64_t quote_bits);
15
+
16
+ namespace {
17
+ // for when clmul is unavailable
18
+ [[maybe_unused]] uint64_t portable_compute_quote_mask(uint64_t quote_bits) {
19
+ uint64_t quote_mask = quote_bits ^ (quote_bits << 1);
20
+ quote_mask = quote_mask ^ (quote_mask << 2);
21
+ quote_mask = quote_mask ^ (quote_mask << 4);
22
+ quote_mask = quote_mask ^ (quote_mask << 8);
23
+ quote_mask = quote_mask ^ (quote_mask << 16);
24
+ quote_mask = quote_mask ^ (quote_mask << 32);
25
+ return quote_mask;
26
+ }
27
+ } // namespace
28
+
29
+ // Holds the state required to perform check_utf8().
30
+ template <Architecture> struct utf8_checking_state;
31
+
32
+ template <Architecture T>
33
+ void check_utf8(simd_input<T> in, utf8_checking_state<T> &state);
34
+
35
+ // Checks if the utf8 validation has found any error.
36
+ template <Architecture T>
37
+ ErrorValues check_utf8_errors(utf8_checking_state<T> &state);
38
+
39
+ // a straightforward comparison of a mask against input.
40
+ template <Architecture T>
41
+ uint64_t cmp_mask_against_input(simd_input<T> in, uint8_t m);
42
+
43
+ template <Architecture T> simd_input<T> fill_input(const uint8_t *ptr);
44
+
45
+ // find all values less than or equal than the content of maxval (using unsigned
46
+ // arithmetic)
47
+ template <Architecture T>
48
+ uint64_t unsigned_lteq_against_input(simd_input<T> in, uint8_t m);
49
+
50
+ template <Architecture T>
51
+ really_inline uint64_t find_odd_backslash_sequences(
52
+ simd_input<T> in, uint64_t &prev_iter_ends_odd_backslash);
53
+
54
+ template <Architecture T>
55
+ really_inline uint64_t find_quote_mask_and_bits(
56
+ simd_input<T> in, uint64_t odd_ends, uint64_t &prev_iter_inside_quote,
57
+ uint64_t &quote_bits, uint64_t &error_mask);
58
+
59
+ // do a 'shufti' to detect structural JSON characters
60
+ // they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
61
+ // these go into the first 3 buckets of the comparison (1/2/4)
62
+
63
+ // we are also interested in the four whitespace characters
64
+ // space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
65
+ // these go into the next 2 buckets of the comparison (8/16)
66
+ template <Architecture T>
67
+ void find_whitespace_and_structurals(simd_input<T> in, uint64_t &whitespace,
68
+ uint64_t &structurals);
69
+
70
+ // return a updated structural bit vector with quoted contents cleared out and
71
+ // pseudo-structural characters added to the mask
72
+ // updates prev_iter_ends_pseudo_pred which tells us whether the previous
73
+ // iteration ended on a whitespace or a structural character (which means that
74
+ // the next iteration
75
+ // will have a pseudo-structural character at its start)
76
+ really_inline uint64_t finalize_structurals(
77
+ uint64_t structurals, uint64_t whitespace, uint64_t quote_mask,
78
+ uint64_t quote_bits, uint64_t &prev_iter_ends_pseudo_pred) {
79
+ // mask off anything inside quotes
80
+ structurals &= ~quote_mask;
81
+ // add the real quote bits back into our bit_mask as well, so we can
82
+ // quickly traverse the strings we've spent all this trouble gathering
83
+ structurals |= quote_bits;
84
+ // Now, establish "pseudo-structural characters". These are non-whitespace
85
+ // characters that are (a) outside quotes and (b) have a predecessor that's
86
+ // either whitespace or a structural character. This means that subsequent
87
+ // passes will get a chance to encounter the first character of every string
88
+ // of non-whitespace and, if we're parsing an atom like true/false/null or a
89
+ // number we can stop at the first whitespace or structural character
90
+ // following it.
91
+
92
+ // a qualified predecessor is something that can happen 1 position before an
93
+ // pseudo-structural character
94
+ uint64_t pseudo_pred = structurals | whitespace;
95
+
96
+ uint64_t shifted_pseudo_pred =
97
+ (pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
98
+ prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
99
+ uint64_t pseudo_structurals =
100
+ shifted_pseudo_pred & (~whitespace) & (~quote_mask);
101
+ structurals |= pseudo_structurals;
102
+
103
+ // now, we've used our close quotes all we need to. So let's switch them off
104
+ // they will be off in the quote mask and on in quote bits.
105
+ structurals &= ~(quote_bits & ~quote_mask);
106
+ return structurals;
107
+ }
108
+
109
+ template <Architecture T = Architecture::NATIVE>
110
+ int find_structural_bits(const uint8_t *buf, size_t len,
111
+ simdjson::ParsedJson &pj);
112
+
113
+ template <Architecture T = Architecture::NATIVE>
114
+ int find_structural_bits(const char *buf, size_t len,
115
+ simdjson::ParsedJson &pj) {
116
+ return find_structural_bits((const uint8_t *)buf, len, pj);
117
+ }
118
+
119
+ } // namespace simdjson
120
+
121
+ #endif