google-protobuf 3.16.0 → 3.21.12

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of google-protobuf might be problematic. Click here for more details.

Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/ext/google/protobuf_c/convert.c +128 -116
  3. data/ext/google/protobuf_c/convert.h +12 -9
  4. data/ext/google/protobuf_c/defs.c +217 -1521
  5. data/ext/google/protobuf_c/defs.h +19 -19
  6. data/ext/google/protobuf_c/extconf.rb +11 -3
  7. data/ext/google/protobuf_c/map.c +115 -102
  8. data/ext/google/protobuf_c/map.h +7 -7
  9. data/ext/google/protobuf_c/message.c +399 -308
  10. data/ext/google/protobuf_c/message.h +22 -19
  11. data/ext/google/protobuf_c/protobuf.c +89 -63
  12. data/ext/google/protobuf_c/protobuf.h +16 -9
  13. data/ext/google/protobuf_c/repeated_field.c +85 -85
  14. data/ext/google/protobuf_c/repeated_field.h +6 -5
  15. data/ext/google/protobuf_c/ruby-upb.c +10664 -7049
  16. data/ext/google/protobuf_c/ruby-upb.h +4378 -2489
  17. data/ext/google/protobuf_c/third_party/utf8_range/LICENSE +21 -0
  18. data/ext/google/protobuf_c/third_party/utf8_range/naive.c +92 -0
  19. data/ext/google/protobuf_c/third_party/utf8_range/range2-neon.c +157 -0
  20. data/ext/google/protobuf_c/third_party/utf8_range/range2-sse.c +170 -0
  21. data/ext/google/protobuf_c/third_party/utf8_range/utf8_range.h +9 -0
  22. data/ext/google/protobuf_c/wrap_memcpy.c +4 -3
  23. data/lib/google/protobuf/api_pb.rb +1 -0
  24. data/lib/google/protobuf/descriptor_dsl.rb +465 -0
  25. data/lib/google/protobuf/descriptor_pb.rb +269 -0
  26. data/lib/google/protobuf/message_exts.rb +2 -2
  27. data/lib/google/protobuf/repeated_field.rb +15 -2
  28. data/lib/google/protobuf/type_pb.rb +1 -0
  29. data/lib/google/protobuf/well_known_types.rb +12 -2
  30. data/lib/google/protobuf.rb +5 -73
  31. data/tests/basic.rb +144 -9
  32. data/tests/stress.rb +1 -1
  33. metadata +15 -29
  34. data/ext/google/protobuf_c/third_party/wyhash/wyhash.h +0 -145
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2019 Yibo Cai
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,92 @@
1
+ #include <stdio.h>
2
+
3
+ /*
4
+ * http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
5
+ *
6
+ * Table 3-7. Well-Formed UTF-8 Byte Sequences
7
+ *
8
+ * +--------------------+------------+-------------+------------+-------------+
9
+ * | Code Points | First Byte | Second Byte | Third Byte | Fourth Byte |
10
+ * +--------------------+------------+-------------+------------+-------------+
11
+ * | U+0000..U+007F | 00..7F | | | |
12
+ * +--------------------+------------+-------------+------------+-------------+
13
+ * | U+0080..U+07FF | C2..DF | 80..BF | | |
14
+ * +--------------------+------------+-------------+------------+-------------+
15
+ * | U+0800..U+0FFF | E0 | A0..BF | 80..BF | |
16
+ * +--------------------+------------+-------------+------------+-------------+
17
+ * | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | |
18
+ * +--------------------+------------+-------------+------------+-------------+
19
+ * | U+D000..U+D7FF | ED | 80..9F | 80..BF | |
20
+ * +--------------------+------------+-------------+------------+-------------+
21
+ * | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | |
22
+ * +--------------------+------------+-------------+------------+-------------+
23
+ * | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF |
24
+ * +--------------------+------------+-------------+------------+-------------+
25
+ * | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF |
26
+ * +--------------------+------------+-------------+------------+-------------+
27
+ * | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF |
28
+ * +--------------------+------------+-------------+------------+-------------+
29
+ */
30
+
31
+ /* Return 0 - success, >0 - index(1 based) of first error char */
32
+ int utf8_naive(const unsigned char *data, int len)
33
+ {
34
+ int err_pos = 1;
35
+
36
+ while (len) {
37
+ int bytes;
38
+ const unsigned char byte1 = data[0];
39
+
40
+ /* 00..7F */
41
+ if (byte1 <= 0x7F) {
42
+ bytes = 1;
43
+ /* C2..DF, 80..BF */
44
+ } else if (len >= 2 && byte1 >= 0xC2 && byte1 <= 0xDF &&
45
+ (signed char)data[1] <= (signed char)0xBF) {
46
+ bytes = 2;
47
+ } else if (len >= 3) {
48
+ const unsigned char byte2 = data[1];
49
+
50
+ /* Is byte2, byte3 between 0x80 ~ 0xBF */
51
+ const int byte2_ok = (signed char)byte2 <= (signed char)0xBF;
52
+ const int byte3_ok = (signed char)data[2] <= (signed char)0xBF;
53
+
54
+ if (byte2_ok && byte3_ok &&
55
+ /* E0, A0..BF, 80..BF */
56
+ ((byte1 == 0xE0 && byte2 >= 0xA0) ||
57
+ /* E1..EC, 80..BF, 80..BF */
58
+ (byte1 >= 0xE1 && byte1 <= 0xEC) ||
59
+ /* ED, 80..9F, 80..BF */
60
+ (byte1 == 0xED && byte2 <= 0x9F) ||
61
+ /* EE..EF, 80..BF, 80..BF */
62
+ (byte1 >= 0xEE && byte1 <= 0xEF))) {
63
+ bytes = 3;
64
+ } else if (len >= 4) {
65
+ /* Is byte4 between 0x80 ~ 0xBF */
66
+ const int byte4_ok = (signed char)data[3] <= (signed char)0xBF;
67
+
68
+ if (byte2_ok && byte3_ok && byte4_ok &&
69
+ /* F0, 90..BF, 80..BF, 80..BF */
70
+ ((byte1 == 0xF0 && byte2 >= 0x90) ||
71
+ /* F1..F3, 80..BF, 80..BF, 80..BF */
72
+ (byte1 >= 0xF1 && byte1 <= 0xF3) ||
73
+ /* F4, 80..8F, 80..BF, 80..BF */
74
+ (byte1 == 0xF4 && byte2 <= 0x8F))) {
75
+ bytes = 4;
76
+ } else {
77
+ return err_pos;
78
+ }
79
+ } else {
80
+ return err_pos;
81
+ }
82
+ } else {
83
+ return err_pos;
84
+ }
85
+
86
+ len -= bytes;
87
+ err_pos += bytes;
88
+ data += bytes;
89
+ }
90
+
91
+ return 0;
92
+ }
@@ -0,0 +1,157 @@
1
+ /*
2
+ * Process 2x16 bytes in each iteration.
3
+ * Comments removed for brevity. See range-neon.c for details.
4
+ */
5
+ #if defined(__aarch64__) && defined(__ARM_NEON)
6
+
7
+ #include <stdio.h>
8
+ #include <stdint.h>
9
+ #include <arm_neon.h>
10
+
11
+ int utf8_naive(const unsigned char *data, int len);
12
+
13
+ static const uint8_t _first_len_tbl[] = {
14
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3,
15
+ };
16
+
17
+ static const uint8_t _first_range_tbl[] = {
18
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8,
19
+ };
20
+
21
+ static const uint8_t _range_min_tbl[] = {
22
+ 0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80,
23
+ 0xC2, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
24
+ };
25
+ static const uint8_t _range_max_tbl[] = {
26
+ 0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F,
27
+ 0xF4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
28
+ };
29
+
30
+ static const uint8_t _range_adjust_tbl[] = {
31
+ 2, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0,
32
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0,
33
+ };
34
+
35
+ /* Return 0 on success, -1 on error */
36
+ int utf8_range2(const unsigned char *data, int len)
37
+ {
38
+ if (len >= 32) {
39
+ uint8x16_t prev_input = vdupq_n_u8(0);
40
+ uint8x16_t prev_first_len = vdupq_n_u8(0);
41
+
42
+ const uint8x16_t first_len_tbl = vld1q_u8(_first_len_tbl);
43
+ const uint8x16_t first_range_tbl = vld1q_u8(_first_range_tbl);
44
+ const uint8x16_t range_min_tbl = vld1q_u8(_range_min_tbl);
45
+ const uint8x16_t range_max_tbl = vld1q_u8(_range_max_tbl);
46
+ const uint8x16x2_t range_adjust_tbl = vld2q_u8(_range_adjust_tbl);
47
+
48
+ const uint8x16_t const_1 = vdupq_n_u8(1);
49
+ const uint8x16_t const_2 = vdupq_n_u8(2);
50
+ const uint8x16_t const_e0 = vdupq_n_u8(0xE0);
51
+
52
+ uint8x16_t error1 = vdupq_n_u8(0);
53
+ uint8x16_t error2 = vdupq_n_u8(0);
54
+ uint8x16_t error3 = vdupq_n_u8(0);
55
+ uint8x16_t error4 = vdupq_n_u8(0);
56
+
57
+ while (len >= 32) {
58
+ /******************* two blocks interleaved **********************/
59
+
60
+ #if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ < 8)
61
+ /* gcc doesn't support vldq1_u8_x2 until version 8 */
62
+ const uint8x16_t input_a = vld1q_u8(data);
63
+ const uint8x16_t input_b = vld1q_u8(data + 16);
64
+ #else
65
+ /* Forces a double load on Clang */
66
+ const uint8x16x2_t input_pair = vld1q_u8_x2(data);
67
+ const uint8x16_t input_a = input_pair.val[0];
68
+ const uint8x16_t input_b = input_pair.val[1];
69
+ #endif
70
+
71
+ const uint8x16_t high_nibbles_a = vshrq_n_u8(input_a, 4);
72
+ const uint8x16_t high_nibbles_b = vshrq_n_u8(input_b, 4);
73
+
74
+ const uint8x16_t first_len_a =
75
+ vqtbl1q_u8(first_len_tbl, high_nibbles_a);
76
+ const uint8x16_t first_len_b =
77
+ vqtbl1q_u8(first_len_tbl, high_nibbles_b);
78
+
79
+ uint8x16_t range_a = vqtbl1q_u8(first_range_tbl, high_nibbles_a);
80
+ uint8x16_t range_b = vqtbl1q_u8(first_range_tbl, high_nibbles_b);
81
+
82
+ range_a =
83
+ vorrq_u8(range_a, vextq_u8(prev_first_len, first_len_a, 15));
84
+ range_b =
85
+ vorrq_u8(range_b, vextq_u8(first_len_a, first_len_b, 15));
86
+
87
+ uint8x16_t tmp1_a, tmp2_a, tmp1_b, tmp2_b;
88
+ tmp1_a = vextq_u8(prev_first_len, first_len_a, 14);
89
+ tmp1_a = vqsubq_u8(tmp1_a, const_1);
90
+ range_a = vorrq_u8(range_a, tmp1_a);
91
+
92
+ tmp1_b = vextq_u8(first_len_a, first_len_b, 14);
93
+ tmp1_b = vqsubq_u8(tmp1_b, const_1);
94
+ range_b = vorrq_u8(range_b, tmp1_b);
95
+
96
+ tmp2_a = vextq_u8(prev_first_len, first_len_a, 13);
97
+ tmp2_a = vqsubq_u8(tmp2_a, const_2);
98
+ range_a = vorrq_u8(range_a, tmp2_a);
99
+
100
+ tmp2_b = vextq_u8(first_len_a, first_len_b, 13);
101
+ tmp2_b = vqsubq_u8(tmp2_b, const_2);
102
+ range_b = vorrq_u8(range_b, tmp2_b);
103
+
104
+ uint8x16_t shift1_a = vextq_u8(prev_input, input_a, 15);
105
+ uint8x16_t pos_a = vsubq_u8(shift1_a, const_e0);
106
+ range_a = vaddq_u8(range_a, vqtbl2q_u8(range_adjust_tbl, pos_a));
107
+
108
+ uint8x16_t shift1_b = vextq_u8(input_a, input_b, 15);
109
+ uint8x16_t pos_b = vsubq_u8(shift1_b, const_e0);
110
+ range_b = vaddq_u8(range_b, vqtbl2q_u8(range_adjust_tbl, pos_b));
111
+
112
+ uint8x16_t minv_a = vqtbl1q_u8(range_min_tbl, range_a);
113
+ uint8x16_t maxv_a = vqtbl1q_u8(range_max_tbl, range_a);
114
+
115
+ uint8x16_t minv_b = vqtbl1q_u8(range_min_tbl, range_b);
116
+ uint8x16_t maxv_b = vqtbl1q_u8(range_max_tbl, range_b);
117
+
118
+ error1 = vorrq_u8(error1, vcltq_u8(input_a, minv_a));
119
+ error2 = vorrq_u8(error2, vcgtq_u8(input_a, maxv_a));
120
+
121
+ error3 = vorrq_u8(error3, vcltq_u8(input_b, minv_b));
122
+ error4 = vorrq_u8(error4, vcgtq_u8(input_b, maxv_b));
123
+
124
+ /************************ next iteration *************************/
125
+ prev_input = input_b;
126
+ prev_first_len = first_len_b;
127
+
128
+ data += 32;
129
+ len -= 32;
130
+ }
131
+ error1 = vorrq_u8(error1, error2);
132
+ error1 = vorrq_u8(error1, error3);
133
+ error1 = vorrq_u8(error1, error4);
134
+
135
+ if (vmaxvq_u8(error1))
136
+ return -1;
137
+
138
+ uint32_t token4;
139
+ vst1q_lane_u32(&token4, vreinterpretq_u32_u8(prev_input), 3);
140
+
141
+ const int8_t *token = (const int8_t *)&token4;
142
+ int lookahead = 0;
143
+ if (token[3] > (int8_t)0xBF)
144
+ lookahead = 1;
145
+ else if (token[2] > (int8_t)0xBF)
146
+ lookahead = 2;
147
+ else if (token[1] > (int8_t)0xBF)
148
+ lookahead = 3;
149
+
150
+ data -= lookahead;
151
+ len += lookahead;
152
+ }
153
+
154
+ return utf8_naive(data, len);
155
+ }
156
+
157
+ #endif
@@ -0,0 +1,170 @@
1
+ /*
2
+ * Process 2x16 bytes in each iteration.
3
+ * Comments removed for brevity. See range-sse.c for details.
4
+ */
5
+ #ifdef __SSE4_1__
6
+
7
+ #include <stdio.h>
8
+ #include <stdint.h>
9
+ #include <x86intrin.h>
10
+
11
+ int utf8_naive(const unsigned char *data, int len);
12
+
13
+ static const int8_t _first_len_tbl[] = {
14
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3,
15
+ };
16
+
17
+ static const int8_t _first_range_tbl[] = {
18
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8,
19
+ };
20
+
21
+ static const int8_t _range_min_tbl[] = {
22
+ 0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80,
23
+ 0xC2, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F,
24
+ };
25
+ static const int8_t _range_max_tbl[] = {
26
+ 0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F,
27
+ 0xF4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
28
+ };
29
+
30
+ static const int8_t _df_ee_tbl[] = {
31
+ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,
32
+ };
33
+ static const int8_t _ef_fe_tbl[] = {
34
+ 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35
+ };
36
+
37
+ /* Return 0 on success, -1 on error */
38
+ int utf8_range2(const unsigned char *data, int len)
39
+ {
40
+ if (len >= 32) {
41
+ __m128i prev_input = _mm_set1_epi8(0);
42
+ __m128i prev_first_len = _mm_set1_epi8(0);
43
+
44
+ const __m128i first_len_tbl =
45
+ _mm_loadu_si128((const __m128i *)_first_len_tbl);
46
+ const __m128i first_range_tbl =
47
+ _mm_loadu_si128((const __m128i *)_first_range_tbl);
48
+ const __m128i range_min_tbl =
49
+ _mm_loadu_si128((const __m128i *)_range_min_tbl);
50
+ const __m128i range_max_tbl =
51
+ _mm_loadu_si128((const __m128i *)_range_max_tbl);
52
+ const __m128i df_ee_tbl =
53
+ _mm_loadu_si128((const __m128i *)_df_ee_tbl);
54
+ const __m128i ef_fe_tbl =
55
+ _mm_loadu_si128((const __m128i *)_ef_fe_tbl);
56
+
57
+ __m128i error = _mm_set1_epi8(0);
58
+
59
+ while (len >= 32) {
60
+ /***************************** block 1 ****************************/
61
+ const __m128i input_a = _mm_loadu_si128((const __m128i *)data);
62
+
63
+ __m128i high_nibbles =
64
+ _mm_and_si128(_mm_srli_epi16(input_a, 4), _mm_set1_epi8(0x0F));
65
+
66
+ __m128i first_len_a = _mm_shuffle_epi8(first_len_tbl, high_nibbles);
67
+
68
+ __m128i range_a = _mm_shuffle_epi8(first_range_tbl, high_nibbles);
69
+
70
+ range_a = _mm_or_si128(
71
+ range_a, _mm_alignr_epi8(first_len_a, prev_first_len, 15));
72
+
73
+ __m128i tmp;
74
+ tmp = _mm_alignr_epi8(first_len_a, prev_first_len, 14);
75
+ tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(1));
76
+ range_a = _mm_or_si128(range_a, tmp);
77
+
78
+ tmp = _mm_alignr_epi8(first_len_a, prev_first_len, 13);
79
+ tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(2));
80
+ range_a = _mm_or_si128(range_a, tmp);
81
+
82
+ __m128i shift1, pos, range2;
83
+ shift1 = _mm_alignr_epi8(input_a, prev_input, 15);
84
+ pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF));
85
+ tmp = _mm_subs_epu8(pos, _mm_set1_epi8(0xF0));
86
+ range2 = _mm_shuffle_epi8(df_ee_tbl, tmp);
87
+ tmp = _mm_adds_epu8(pos, _mm_set1_epi8(0x70));
88
+ range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp));
89
+
90
+ range_a = _mm_add_epi8(range_a, range2);
91
+
92
+ __m128i minv = _mm_shuffle_epi8(range_min_tbl, range_a);
93
+ __m128i maxv = _mm_shuffle_epi8(range_max_tbl, range_a);
94
+
95
+ tmp = _mm_or_si128(
96
+ _mm_cmplt_epi8(input_a, minv),
97
+ _mm_cmpgt_epi8(input_a, maxv)
98
+ );
99
+ error = _mm_or_si128(error, tmp);
100
+
101
+ /***************************** block 2 ****************************/
102
+ const __m128i input_b = _mm_loadu_si128((const __m128i *)(data+16));
103
+
104
+ high_nibbles =
105
+ _mm_and_si128(_mm_srli_epi16(input_b, 4), _mm_set1_epi8(0x0F));
106
+
107
+ __m128i first_len_b = _mm_shuffle_epi8(first_len_tbl, high_nibbles);
108
+
109
+ __m128i range_b = _mm_shuffle_epi8(first_range_tbl, high_nibbles);
110
+
111
+ range_b = _mm_or_si128(
112
+ range_b, _mm_alignr_epi8(first_len_b, first_len_a, 15));
113
+
114
+
115
+ tmp = _mm_alignr_epi8(first_len_b, first_len_a, 14);
116
+ tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(1));
117
+ range_b = _mm_or_si128(range_b, tmp);
118
+
119
+ tmp = _mm_alignr_epi8(first_len_b, first_len_a, 13);
120
+ tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(2));
121
+ range_b = _mm_or_si128(range_b, tmp);
122
+
123
+ shift1 = _mm_alignr_epi8(input_b, input_a, 15);
124
+ pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF));
125
+ tmp = _mm_subs_epu8(pos, _mm_set1_epi8(0xF0));
126
+ range2 = _mm_shuffle_epi8(df_ee_tbl, tmp);
127
+ tmp = _mm_adds_epu8(pos, _mm_set1_epi8(0x70));
128
+ range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp));
129
+
130
+ range_b = _mm_add_epi8(range_b, range2);
131
+
132
+ minv = _mm_shuffle_epi8(range_min_tbl, range_b);
133
+ maxv = _mm_shuffle_epi8(range_max_tbl, range_b);
134
+
135
+
136
+ tmp = _mm_or_si128(
137
+ _mm_cmplt_epi8(input_b, minv),
138
+ _mm_cmpgt_epi8(input_b, maxv)
139
+ );
140
+ error = _mm_or_si128(error, tmp);
141
+
142
+ /************************ next iteration **************************/
143
+ prev_input = input_b;
144
+ prev_first_len = first_len_b;
145
+
146
+ data += 32;
147
+ len -= 32;
148
+ }
149
+
150
+ if (!_mm_testz_si128(error, error))
151
+ return -1;
152
+
153
+ int32_t token4 = _mm_extract_epi32(prev_input, 3);
154
+ const int8_t *token = (const int8_t *)&token4;
155
+ int lookahead = 0;
156
+ if (token[3] > (int8_t)0xBF)
157
+ lookahead = 1;
158
+ else if (token[2] > (int8_t)0xBF)
159
+ lookahead = 2;
160
+ else if (token[1] > (int8_t)0xBF)
161
+ lookahead = 3;
162
+
163
+ data -= lookahead;
164
+ len += lookahead;
165
+ }
166
+
167
+ return utf8_naive(data, len);
168
+ }
169
+
170
+ #endif
@@ -0,0 +1,9 @@
1
+
2
+ #if ((defined(__ARM_NEON) && defined(__aarch64__)) || defined(__SSE4_1__)) && !defined(TRUFFLERUBY)
3
+ int utf8_range2(const unsigned char* data, int len);
4
+ #else
5
+ int utf8_naive(const unsigned char* data, int len);
6
+ static inline int utf8_range2(const unsigned char* data, int len) {
7
+ return utf8_naive(data, len);
8
+ }
9
+ #endif
@@ -33,7 +33,8 @@
33
33
  // On x86-64 Linux with glibc, we link against the 2.2.5 version of memcpy so
34
34
  // that we avoid depending on the 2.14 version of the symbol. This way,
35
35
  // distributions that are using pre-2.14 versions of glibc can successfully use
36
- // the gem we distribute (https://github.com/protocolbuffers/protobuf/issues/2783).
36
+ // the gem we distribute
37
+ // (https://github.com/protocolbuffers/protobuf/issues/2783).
37
38
  //
38
39
  // This wrapper is enabled by passing the linker flags -Wl,-wrap,memcpy in
39
40
  // extconf.rb.
@@ -41,11 +42,11 @@
41
42
  #if defined(__x86_64__) && defined(__GNU_LIBRARY__)
42
43
  __asm__(".symver memcpy,memcpy@GLIBC_2.2.5");
43
44
  void *__wrap_memcpy(void *dest, const void *src, size_t n) {
44
- return memcpy(dest, src, n);
45
+ return memcpy(dest, src, n);
45
46
  }
46
47
  #else
47
48
  void *__wrap_memcpy(void *dest, const void *src, size_t n) {
48
- return memmove(dest, src, n);
49
+ return memmove(dest, src, n);
49
50
  }
50
51
  #endif
51
52
  #endif
@@ -5,6 +5,7 @@ require 'google/protobuf'
5
5
 
6
6
  require 'google/protobuf/source_context_pb'
7
7
  require 'google/protobuf/type_pb'
8
+
8
9
  Google::Protobuf::DescriptorPool.generated_pool.build do
9
10
  add_file("google/protobuf/api.proto", :syntax => :proto3) do
10
11
  add_message "google.protobuf.Api" do