google-protobuf 3.24.3-java → 3.25.0-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of google-protobuf might be problematic. Click here for more details.

Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/ext/google/protobuf_c/Rakefile +3 -0
  3. data/ext/google/protobuf_c/glue.c +21 -0
  4. data/ext/google/protobuf_c/message.c +1388 -0
  5. data/ext/google/protobuf_c/message.h +79 -0
  6. data/ext/google/protobuf_c/protobuf.c +343 -0
  7. data/ext/google/protobuf_c/protobuf.h +112 -0
  8. data/ext/google/protobuf_c/ruby-upb.c +14414 -0
  9. data/ext/google/protobuf_c/ruby-upb.h +13044 -0
  10. data/ext/google/protobuf_c/shared_convert.c +64 -0
  11. data/ext/google/protobuf_c/shared_convert.h +26 -0
  12. data/ext/google/protobuf_c/shared_message.c +65 -0
  13. data/ext/google/protobuf_c/shared_message.h +25 -0
  14. data/ext/google/protobuf_c/third_party/utf8_range/LICENSE +22 -0
  15. data/ext/google/protobuf_c/third_party/utf8_range/naive.c +92 -0
  16. data/ext/google/protobuf_c/third_party/utf8_range/range2-neon.c +157 -0
  17. data/ext/google/protobuf_c/third_party/utf8_range/range2-sse.c +170 -0
  18. data/ext/google/protobuf_c/third_party/utf8_range/utf8_range.h +21 -0
  19. data/lib/google/protobuf/any_pb.rb +1 -1
  20. data/lib/google/protobuf/api_pb.rb +1 -1
  21. data/lib/google/protobuf/descriptor_pb.rb +6 -3
  22. data/lib/google/protobuf/duration_pb.rb +1 -1
  23. data/lib/google/protobuf/empty_pb.rb +1 -1
  24. data/lib/google/protobuf/ffi/descriptor.rb +154 -0
  25. data/lib/google/protobuf/ffi/descriptor_pool.rb +70 -0
  26. data/lib/google/protobuf/ffi/enum_descriptor.rb +161 -0
  27. data/lib/google/protobuf/ffi/ffi.rb +213 -0
  28. data/lib/google/protobuf/ffi/field_descriptor.rb +309 -0
  29. data/lib/google/protobuf/ffi/file_descriptor.rb +48 -0
  30. data/lib/google/protobuf/ffi/internal/arena.rb +66 -0
  31. data/lib/google/protobuf/ffi/internal/convert.rb +305 -0
  32. data/lib/google/protobuf/ffi/internal/pointer_helper.rb +35 -0
  33. data/lib/google/protobuf/ffi/internal/type_safety.rb +25 -0
  34. data/lib/google/protobuf/ffi/map.rb +396 -0
  35. data/lib/google/protobuf/ffi/message.rb +641 -0
  36. data/lib/google/protobuf/ffi/object_cache.rb +30 -0
  37. data/lib/google/protobuf/ffi/oneof_descriptor.rb +88 -0
  38. data/lib/google/protobuf/ffi/repeated_field.rb +503 -0
  39. data/lib/google/protobuf/field_mask_pb.rb +1 -1
  40. data/lib/google/protobuf/message_exts.rb +3 -26
  41. data/lib/google/protobuf/object_cache.rb +3 -26
  42. data/lib/google/protobuf/plugin_pb.rb +1 -1
  43. data/lib/google/protobuf/repeated_field.rb +3 -26
  44. data/lib/google/protobuf/source_context_pb.rb +1 -1
  45. data/lib/google/protobuf/struct_pb.rb +1 -1
  46. data/lib/google/protobuf/timestamp_pb.rb +1 -1
  47. data/lib/google/protobuf/type_pb.rb +1 -1
  48. data/lib/google/protobuf/well_known_types.rb +3 -26
  49. data/lib/google/protobuf/wrappers_pb.rb +1 -1
  50. data/lib/google/protobuf.rb +26 -45
  51. data/lib/google/protobuf_ffi.rb +50 -0
  52. data/lib/google/protobuf_java.jar +0 -0
  53. data/lib/google/protobuf_native.rb +20 -0
  54. data/lib/google/tasks/ffi.rake +102 -0
  55. metadata +110 -4
@@ -0,0 +1,64 @@
1
+ // Protocol Buffers - Google's data interchange format
2
+ // Copyright 2023 Google Inc. All rights reserved.
3
+ //
4
+ // Use of this source code is governed by a BSD-style
5
+ // license that can be found in the LICENSE file or at
6
+ // https://developers.google.com/open-source/licenses/bsd
7
+
8
+ // -----------------------------------------------------------------------------
9
+ // Ruby <-> upb data conversion functions. Strictly free of dependencies on
10
+ // Ruby interpreter internals.
11
+
12
+ #include "shared_convert.h"
13
+
14
+ bool shared_Msgval_IsEqual(upb_MessageValue val1, upb_MessageValue val2,
15
+ upb_CType type, upb_MessageDef* msgdef,
16
+ upb_Status* status) {
17
+ switch (type) {
18
+ case kUpb_CType_Bool:
19
+ return memcmp(&val1, &val2, 1) == 0;
20
+ case kUpb_CType_Float:
21
+ case kUpb_CType_Int32:
22
+ case kUpb_CType_UInt32:
23
+ case kUpb_CType_Enum:
24
+ return memcmp(&val1, &val2, 4) == 0;
25
+ case kUpb_CType_Double:
26
+ case kUpb_CType_Int64:
27
+ case kUpb_CType_UInt64:
28
+ return memcmp(&val1, &val2, 8) == 0;
29
+ case kUpb_CType_String:
30
+ case kUpb_CType_Bytes:
31
+ return val1.str_val.size == val2.str_val.size &&
32
+ memcmp(val1.str_val.data, val2.str_val.data, val1.str_val.size) ==
33
+ 0;
34
+ case kUpb_CType_Message:
35
+ return shared_Message_Equal(val1.msg_val, val2.msg_val, msgdef, status);
36
+ default:
37
+ upb_Status_SetErrorMessage(status, "Internal error, unexpected type");
38
+ }
39
+ }
40
+
41
+ uint64_t shared_Msgval_GetHash(upb_MessageValue val, upb_CType type,
42
+ upb_MessageDef* msgdef, uint64_t seed,
43
+ upb_Status* status) {
44
+ switch (type) {
45
+ case kUpb_CType_Bool:
46
+ return _upb_Hash(&val, 1, seed);
47
+ case kUpb_CType_Float:
48
+ case kUpb_CType_Int32:
49
+ case kUpb_CType_UInt32:
50
+ case kUpb_CType_Enum:
51
+ return _upb_Hash(&val, 4, seed);
52
+ case kUpb_CType_Double:
53
+ case kUpb_CType_Int64:
54
+ case kUpb_CType_UInt64:
55
+ return _upb_Hash(&val, 8, seed);
56
+ case kUpb_CType_String:
57
+ case kUpb_CType_Bytes:
58
+ return _upb_Hash(val.str_val.data, val.str_val.size, seed);
59
+ case kUpb_CType_Message:
60
+ return shared_Message_Hash(val.msg_val, msgdef, seed, status);
61
+ default:
62
+ upb_Status_SetErrorMessage(status, "Internal error, unexpected type");
63
+ }
64
+ }
@@ -0,0 +1,26 @@
1
+ // Protocol Buffers - Google's data interchange format
2
+ // Copyright 2023 Google Inc. All rights reserved.
3
+ //
4
+ // Use of this source code is governed by a BSD-style
5
+ // license that can be found in the LICENSE file or at
6
+ // https://developers.google.com/open-source/licenses/bsd
7
+
8
+ // -----------------------------------------------------------------------------
9
+ // Ruby <-> upb data conversion functions. Strictly free of dependencies on
10
+ // Ruby interpreter internals.
11
+
12
+ #ifndef RUBY_PROTOBUF_SHARED_CONVERT_H_
13
+ #define RUBY_PROTOBUF_SHARED_CONVERT_H_
14
+
15
+ #include "ruby-upb.h"
16
+ #include "shared_message.h"
17
+
18
+ bool shared_Msgval_IsEqual(upb_MessageValue val1, upb_MessageValue val2,
19
+ upb_CType type, upb_MessageDef* msgdef,
20
+ upb_Status* status);
21
+
22
+ uint64_t shared_Msgval_GetHash(upb_MessageValue val, upb_CType type,
23
+ upb_MessageDef* msgdef, uint64_t seed,
24
+ upb_Status* status);
25
+
26
+ #endif // RUBY_PROTOBUF_SHARED_CONVERT_H_
@@ -0,0 +1,65 @@
1
+ // Protocol Buffers - Google's data interchange format
2
+ // Copyright 2023 Google Inc. All rights reserved.
3
+ //
4
+ // Use of this source code is governed by a BSD-style
5
+ // license that can be found in the LICENSE file or at
6
+ // https://developers.google.com/open-source/licenses/bsd
7
+
8
+ // -----------------------------------------------------------------------------
9
+ // Ruby Message functions. Strictly free of dependencies on
10
+ // Ruby interpreter internals.
11
+
12
+ #include "shared_message.h"
13
+
14
+ // Support function for Message_Hash. Returns a hash value for the given
15
+ // message.
16
+ uint64_t shared_Message_Hash(const upb_Message* msg, const upb_MessageDef* m,
17
+ uint64_t seed, upb_Status* status) {
18
+ upb_Arena* arena = upb_Arena_New();
19
+ char* data;
20
+ size_t size;
21
+
22
+ // Hash a deterministically serialized payloads with no unknown fields.
23
+ upb_EncodeStatus encode_status = upb_Encode(
24
+ msg, upb_MessageDef_MiniTable(m),
25
+ kUpb_EncodeOption_SkipUnknown | kUpb_EncodeOption_Deterministic, arena,
26
+ &data, &size);
27
+
28
+ if (encode_status == kUpb_EncodeStatus_Ok) {
29
+ uint64_t ret = _upb_Hash(data, size, seed);
30
+ upb_Arena_Free(arena);
31
+ return ret;
32
+ } else {
33
+ upb_Arena_Free(arena);
34
+ upb_Status_SetErrorMessage(status, "Error calculating hash");
35
+ }
36
+ }
37
+
38
+ // Support function for Message_Equal
39
+ bool shared_Message_Equal(const upb_Message* m1, const upb_Message* m2,
40
+ const upb_MessageDef* m, upb_Status* status) {
41
+ if (m1 == m2) return true;
42
+
43
+ size_t size1, size2;
44
+ int encode_opts =
45
+ kUpb_EncodeOption_SkipUnknown | kUpb_EncodeOption_Deterministic;
46
+ upb_Arena* arena_tmp = upb_Arena_New();
47
+ const upb_MiniTable* layout = upb_MessageDef_MiniTable(m);
48
+
49
+ // Compare deterministically serialized payloads with no unknown fields.
50
+ char* data1;
51
+ char* data2;
52
+ upb_EncodeStatus status1 =
53
+ upb_Encode(m1, layout, encode_opts, arena_tmp, &data1, &size1);
54
+ upb_EncodeStatus status2 =
55
+ upb_Encode(m2, layout, encode_opts, arena_tmp, &data2, &size2);
56
+
57
+ if (status1 == kUpb_EncodeStatus_Ok && status2 == kUpb_EncodeStatus_Ok) {
58
+ bool ret = (size1 == size2) && (memcmp(data1, data2, size1) == 0);
59
+ upb_Arena_Free(arena_tmp);
60
+ return ret;
61
+ } else {
62
+ upb_Arena_Free(arena_tmp);
63
+ upb_Status_SetErrorMessage(status, "Error comparing messages");
64
+ }
65
+ }
@@ -0,0 +1,25 @@
1
+ // Protocol Buffers - Google's data interchange format
2
+ // Copyright 2023 Google Inc. All rights reserved.
3
+ //
4
+ // Use of this source code is governed by a BSD-style
5
+ // license that can be found in the LICENSE file or at
6
+ // https://developers.google.com/open-source/licenses/bsd
7
+
8
+ // -----------------------------------------------------------------------------
9
+ // Ruby Message functions. Strictly free of dependencies on
10
+ // Ruby interpreter internals.
11
+
12
+ #ifndef RUBY_PROTOBUF_SHARED_MESSAGE_H_
13
+ #define RUBY_PROTOBUF_SHARED_MESSAGE_H_
14
+
15
+ #include "ruby-upb.h"
16
+
17
+ // Returns a hash value for the given message.
18
+ uint64_t shared_Message_Hash(const upb_Message* msg, const upb_MessageDef* m,
19
+ uint64_t seed, upb_Status* status);
20
+
21
+ // Returns true if these two messages are equal.
22
+ bool shared_Message_Equal(const upb_Message* m1, const upb_Message* m2,
23
+ const upb_MessageDef* m, upb_Status* status);
24
+
25
+ #endif // RUBY_PROTOBUF_SHARED_MESSAGE_H_
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2019 Yibo Cai
4
+ Copyright 2022 Google LLC
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
@@ -0,0 +1,92 @@
1
+ #include <stdio.h>
2
+
3
+ /*
4
+ * http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
5
+ *
6
+ * Table 3-7. Well-Formed UTF-8 Byte Sequences
7
+ *
8
+ * +--------------------+------------+-------------+------------+-------------+
9
+ * | Code Points | First Byte | Second Byte | Third Byte | Fourth Byte |
10
+ * +--------------------+------------+-------------+------------+-------------+
11
+ * | U+0000..U+007F | 00..7F | | | |
12
+ * +--------------------+------------+-------------+------------+-------------+
13
+ * | U+0080..U+07FF | C2..DF | 80..BF | | |
14
+ * +--------------------+------------+-------------+------------+-------------+
15
+ * | U+0800..U+0FFF | E0 | A0..BF | 80..BF | |
16
+ * +--------------------+------------+-------------+------------+-------------+
17
+ * | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | |
18
+ * +--------------------+------------+-------------+------------+-------------+
19
+ * | U+D000..U+D7FF | ED | 80..9F | 80..BF | |
20
+ * +--------------------+------------+-------------+------------+-------------+
21
+ * | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | |
22
+ * +--------------------+------------+-------------+------------+-------------+
23
+ * | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF |
24
+ * +--------------------+------------+-------------+------------+-------------+
25
+ * | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF |
26
+ * +--------------------+------------+-------------+------------+-------------+
27
+ * | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF |
28
+ * +--------------------+------------+-------------+------------+-------------+
29
+ */
30
+
31
+ /* Return 0 - success, >0 - index(1 based) of first error char */
32
+ int utf8_naive(const unsigned char *data, int len)
33
+ {
34
+ int err_pos = 1;
35
+
36
+ while (len) {
37
+ int bytes;
38
+ const unsigned char byte1 = data[0];
39
+
40
+ /* 00..7F */
41
+ if (byte1 <= 0x7F) {
42
+ bytes = 1;
43
+ /* C2..DF, 80..BF */
44
+ } else if (len >= 2 && byte1 >= 0xC2 && byte1 <= 0xDF &&
45
+ (signed char)data[1] <= (signed char)0xBF) {
46
+ bytes = 2;
47
+ } else if (len >= 3) {
48
+ const unsigned char byte2 = data[1];
49
+
50
+ /* Is byte2, byte3 between 0x80 ~ 0xBF */
51
+ const int byte2_ok = (signed char)byte2 <= (signed char)0xBF;
52
+ const int byte3_ok = (signed char)data[2] <= (signed char)0xBF;
53
+
54
+ if (byte2_ok && byte3_ok &&
55
+ /* E0, A0..BF, 80..BF */
56
+ ((byte1 == 0xE0 && byte2 >= 0xA0) ||
57
+ /* E1..EC, 80..BF, 80..BF */
58
+ (byte1 >= 0xE1 && byte1 <= 0xEC) ||
59
+ /* ED, 80..9F, 80..BF */
60
+ (byte1 == 0xED && byte2 <= 0x9F) ||
61
+ /* EE..EF, 80..BF, 80..BF */
62
+ (byte1 >= 0xEE && byte1 <= 0xEF))) {
63
+ bytes = 3;
64
+ } else if (len >= 4) {
65
+ /* Is byte4 between 0x80 ~ 0xBF */
66
+ const int byte4_ok = (signed char)data[3] <= (signed char)0xBF;
67
+
68
+ if (byte2_ok && byte3_ok && byte4_ok &&
69
+ /* F0, 90..BF, 80..BF, 80..BF */
70
+ ((byte1 == 0xF0 && byte2 >= 0x90) ||
71
+ /* F1..F3, 80..BF, 80..BF, 80..BF */
72
+ (byte1 >= 0xF1 && byte1 <= 0xF3) ||
73
+ /* F4, 80..8F, 80..BF, 80..BF */
74
+ (byte1 == 0xF4 && byte2 <= 0x8F))) {
75
+ bytes = 4;
76
+ } else {
77
+ return err_pos;
78
+ }
79
+ } else {
80
+ return err_pos;
81
+ }
82
+ } else {
83
+ return err_pos;
84
+ }
85
+
86
+ len -= bytes;
87
+ err_pos += bytes;
88
+ data += bytes;
89
+ }
90
+
91
+ return 0;
92
+ }
@@ -0,0 +1,157 @@
1
+ /*
2
+ * Process 2x16 bytes in each iteration.
3
+ * Comments removed for brevity. See range-neon.c for details.
4
+ */
5
+ #ifdef __aarch64__
6
+
7
+ #include <stdio.h>
8
+ #include <stdint.h>
9
+ #include <arm_neon.h>
10
+
11
+ int utf8_naive(const unsigned char *data, int len);
12
+
13
+ static const uint8_t _first_len_tbl[] = {
14
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3,
15
+ };
16
+
17
+ static const uint8_t _first_range_tbl[] = {
18
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8,
19
+ };
20
+
21
+ static const uint8_t _range_min_tbl[] = {
22
+ 0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80,
23
+ 0xC2, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
24
+ };
25
+ static const uint8_t _range_max_tbl[] = {
26
+ 0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F,
27
+ 0xF4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
28
+ };
29
+
30
+ static const uint8_t _range_adjust_tbl[] = {
31
+ 2, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0,
32
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0,
33
+ };
34
+
35
+ /* Return 0 on success, -1 on error */
36
+ int utf8_range2(const unsigned char *data, int len)
37
+ {
38
+ if (len >= 32) {
39
+ uint8x16_t prev_input = vdupq_n_u8(0);
40
+ uint8x16_t prev_first_len = vdupq_n_u8(0);
41
+
42
+ const uint8x16_t first_len_tbl = vld1q_u8(_first_len_tbl);
43
+ const uint8x16_t first_range_tbl = vld1q_u8(_first_range_tbl);
44
+ const uint8x16_t range_min_tbl = vld1q_u8(_range_min_tbl);
45
+ const uint8x16_t range_max_tbl = vld1q_u8(_range_max_tbl);
46
+ const uint8x16x2_t range_adjust_tbl = vld2q_u8(_range_adjust_tbl);
47
+
48
+ const uint8x16_t const_1 = vdupq_n_u8(1);
49
+ const uint8x16_t const_2 = vdupq_n_u8(2);
50
+ const uint8x16_t const_e0 = vdupq_n_u8(0xE0);
51
+
52
+ uint8x16_t error1 = vdupq_n_u8(0);
53
+ uint8x16_t error2 = vdupq_n_u8(0);
54
+ uint8x16_t error3 = vdupq_n_u8(0);
55
+ uint8x16_t error4 = vdupq_n_u8(0);
56
+
57
+ while (len >= 32) {
58
+ /******************* two blocks interleaved **********************/
59
+
60
+ #if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ < 8)
61
+ /* gcc doesn't support vldq1_u8_x2 until version 8 */
62
+ const uint8x16_t input_a = vld1q_u8(data);
63
+ const uint8x16_t input_b = vld1q_u8(data + 16);
64
+ #else
65
+ /* Forces a double load on Clang */
66
+ const uint8x16x2_t input_pair = vld1q_u8_x2(data);
67
+ const uint8x16_t input_a = input_pair.val[0];
68
+ const uint8x16_t input_b = input_pair.val[1];
69
+ #endif
70
+
71
+ const uint8x16_t high_nibbles_a = vshrq_n_u8(input_a, 4);
72
+ const uint8x16_t high_nibbles_b = vshrq_n_u8(input_b, 4);
73
+
74
+ const uint8x16_t first_len_a =
75
+ vqtbl1q_u8(first_len_tbl, high_nibbles_a);
76
+ const uint8x16_t first_len_b =
77
+ vqtbl1q_u8(first_len_tbl, high_nibbles_b);
78
+
79
+ uint8x16_t range_a = vqtbl1q_u8(first_range_tbl, high_nibbles_a);
80
+ uint8x16_t range_b = vqtbl1q_u8(first_range_tbl, high_nibbles_b);
81
+
82
+ range_a =
83
+ vorrq_u8(range_a, vextq_u8(prev_first_len, first_len_a, 15));
84
+ range_b =
85
+ vorrq_u8(range_b, vextq_u8(first_len_a, first_len_b, 15));
86
+
87
+ uint8x16_t tmp1_a, tmp2_a, tmp1_b, tmp2_b;
88
+ tmp1_a = vextq_u8(prev_first_len, first_len_a, 14);
89
+ tmp1_a = vqsubq_u8(tmp1_a, const_1);
90
+ range_a = vorrq_u8(range_a, tmp1_a);
91
+
92
+ tmp1_b = vextq_u8(first_len_a, first_len_b, 14);
93
+ tmp1_b = vqsubq_u8(tmp1_b, const_1);
94
+ range_b = vorrq_u8(range_b, tmp1_b);
95
+
96
+ tmp2_a = vextq_u8(prev_first_len, first_len_a, 13);
97
+ tmp2_a = vqsubq_u8(tmp2_a, const_2);
98
+ range_a = vorrq_u8(range_a, tmp2_a);
99
+
100
+ tmp2_b = vextq_u8(first_len_a, first_len_b, 13);
101
+ tmp2_b = vqsubq_u8(tmp2_b, const_2);
102
+ range_b = vorrq_u8(range_b, tmp2_b);
103
+
104
+ uint8x16_t shift1_a = vextq_u8(prev_input, input_a, 15);
105
+ uint8x16_t pos_a = vsubq_u8(shift1_a, const_e0);
106
+ range_a = vaddq_u8(range_a, vqtbl2q_u8(range_adjust_tbl, pos_a));
107
+
108
+ uint8x16_t shift1_b = vextq_u8(input_a, input_b, 15);
109
+ uint8x16_t pos_b = vsubq_u8(shift1_b, const_e0);
110
+ range_b = vaddq_u8(range_b, vqtbl2q_u8(range_adjust_tbl, pos_b));
111
+
112
+ uint8x16_t minv_a = vqtbl1q_u8(range_min_tbl, range_a);
113
+ uint8x16_t maxv_a = vqtbl1q_u8(range_max_tbl, range_a);
114
+
115
+ uint8x16_t minv_b = vqtbl1q_u8(range_min_tbl, range_b);
116
+ uint8x16_t maxv_b = vqtbl1q_u8(range_max_tbl, range_b);
117
+
118
+ error1 = vorrq_u8(error1, vcltq_u8(input_a, minv_a));
119
+ error2 = vorrq_u8(error2, vcgtq_u8(input_a, maxv_a));
120
+
121
+ error3 = vorrq_u8(error3, vcltq_u8(input_b, minv_b));
122
+ error4 = vorrq_u8(error4, vcgtq_u8(input_b, maxv_b));
123
+
124
+ /************************ next iteration *************************/
125
+ prev_input = input_b;
126
+ prev_first_len = first_len_b;
127
+
128
+ data += 32;
129
+ len -= 32;
130
+ }
131
+ error1 = vorrq_u8(error1, error2);
132
+ error1 = vorrq_u8(error1, error3);
133
+ error1 = vorrq_u8(error1, error4);
134
+
135
+ if (vmaxvq_u8(error1))
136
+ return -1;
137
+
138
+ uint32_t token4;
139
+ vst1q_lane_u32(&token4, vreinterpretq_u32_u8(prev_input), 3);
140
+
141
+ const int8_t *token = (const int8_t *)&token4;
142
+ int lookahead = 0;
143
+ if (token[3] > (int8_t)0xBF)
144
+ lookahead = 1;
145
+ else if (token[2] > (int8_t)0xBF)
146
+ lookahead = 2;
147
+ else if (token[1] > (int8_t)0xBF)
148
+ lookahead = 3;
149
+
150
+ data -= lookahead;
151
+ len += lookahead;
152
+ }
153
+
154
+ return utf8_naive(data, len);
155
+ }
156
+
157
+ #endif
@@ -0,0 +1,170 @@
1
+ /*
2
+ * Process 2x16 bytes in each iteration.
3
+ * Comments removed for brevity. See range-sse.c for details.
4
+ */
5
+ #ifdef __SSE4_1__
6
+
7
+ #include <stdio.h>
8
+ #include <stdint.h>
9
+ #include <x86intrin.h>
10
+
11
+ int utf8_naive(const unsigned char *data, int len);
12
+
13
+ static const int8_t _first_len_tbl[] = {
14
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3,
15
+ };
16
+
17
+ static const int8_t _first_range_tbl[] = {
18
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8,
19
+ };
20
+
21
+ static const int8_t _range_min_tbl[] = {
22
+ 0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80,
23
+ 0xC2, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F,
24
+ };
25
+ static const int8_t _range_max_tbl[] = {
26
+ 0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F,
27
+ 0xF4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
28
+ };
29
+
30
+ static const int8_t _df_ee_tbl[] = {
31
+ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,
32
+ };
33
+ static const int8_t _ef_fe_tbl[] = {
34
+ 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35
+ };
36
+
37
+ /* Return 0 on success, -1 on error */
38
+ int utf8_range2(const unsigned char *data, int len)
39
+ {
40
+ if (len >= 32) {
41
+ __m128i prev_input = _mm_set1_epi8(0);
42
+ __m128i prev_first_len = _mm_set1_epi8(0);
43
+
44
+ const __m128i first_len_tbl =
45
+ _mm_loadu_si128((const __m128i *)_first_len_tbl);
46
+ const __m128i first_range_tbl =
47
+ _mm_loadu_si128((const __m128i *)_first_range_tbl);
48
+ const __m128i range_min_tbl =
49
+ _mm_loadu_si128((const __m128i *)_range_min_tbl);
50
+ const __m128i range_max_tbl =
51
+ _mm_loadu_si128((const __m128i *)_range_max_tbl);
52
+ const __m128i df_ee_tbl =
53
+ _mm_loadu_si128((const __m128i *)_df_ee_tbl);
54
+ const __m128i ef_fe_tbl =
55
+ _mm_loadu_si128((const __m128i *)_ef_fe_tbl);
56
+
57
+ __m128i error = _mm_set1_epi8(0);
58
+
59
+ while (len >= 32) {
60
+ /***************************** block 1 ****************************/
61
+ const __m128i input_a = _mm_loadu_si128((const __m128i *)data);
62
+
63
+ __m128i high_nibbles =
64
+ _mm_and_si128(_mm_srli_epi16(input_a, 4), _mm_set1_epi8(0x0F));
65
+
66
+ __m128i first_len_a = _mm_shuffle_epi8(first_len_tbl, high_nibbles);
67
+
68
+ __m128i range_a = _mm_shuffle_epi8(first_range_tbl, high_nibbles);
69
+
70
+ range_a = _mm_or_si128(
71
+ range_a, _mm_alignr_epi8(first_len_a, prev_first_len, 15));
72
+
73
+ __m128i tmp;
74
+ tmp = _mm_alignr_epi8(first_len_a, prev_first_len, 14);
75
+ tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(1));
76
+ range_a = _mm_or_si128(range_a, tmp);
77
+
78
+ tmp = _mm_alignr_epi8(first_len_a, prev_first_len, 13);
79
+ tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(2));
80
+ range_a = _mm_or_si128(range_a, tmp);
81
+
82
+ __m128i shift1, pos, range2;
83
+ shift1 = _mm_alignr_epi8(input_a, prev_input, 15);
84
+ pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF));
85
+ tmp = _mm_subs_epu8(pos, _mm_set1_epi8(0xF0));
86
+ range2 = _mm_shuffle_epi8(df_ee_tbl, tmp);
87
+ tmp = _mm_adds_epu8(pos, _mm_set1_epi8(0x70));
88
+ range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp));
89
+
90
+ range_a = _mm_add_epi8(range_a, range2);
91
+
92
+ __m128i minv = _mm_shuffle_epi8(range_min_tbl, range_a);
93
+ __m128i maxv = _mm_shuffle_epi8(range_max_tbl, range_a);
94
+
95
+ tmp = _mm_or_si128(
96
+ _mm_cmplt_epi8(input_a, minv),
97
+ _mm_cmpgt_epi8(input_a, maxv)
98
+ );
99
+ error = _mm_or_si128(error, tmp);
100
+
101
+ /***************************** block 2 ****************************/
102
+ const __m128i input_b = _mm_loadu_si128((const __m128i *)(data+16));
103
+
104
+ high_nibbles =
105
+ _mm_and_si128(_mm_srli_epi16(input_b, 4), _mm_set1_epi8(0x0F));
106
+
107
+ __m128i first_len_b = _mm_shuffle_epi8(first_len_tbl, high_nibbles);
108
+
109
+ __m128i range_b = _mm_shuffle_epi8(first_range_tbl, high_nibbles);
110
+
111
+ range_b = _mm_or_si128(
112
+ range_b, _mm_alignr_epi8(first_len_b, first_len_a, 15));
113
+
114
+
115
+ tmp = _mm_alignr_epi8(first_len_b, first_len_a, 14);
116
+ tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(1));
117
+ range_b = _mm_or_si128(range_b, tmp);
118
+
119
+ tmp = _mm_alignr_epi8(first_len_b, first_len_a, 13);
120
+ tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(2));
121
+ range_b = _mm_or_si128(range_b, tmp);
122
+
123
+ shift1 = _mm_alignr_epi8(input_b, input_a, 15);
124
+ pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF));
125
+ tmp = _mm_subs_epu8(pos, _mm_set1_epi8(0xF0));
126
+ range2 = _mm_shuffle_epi8(df_ee_tbl, tmp);
127
+ tmp = _mm_adds_epu8(pos, _mm_set1_epi8(0x70));
128
+ range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp));
129
+
130
+ range_b = _mm_add_epi8(range_b, range2);
131
+
132
+ minv = _mm_shuffle_epi8(range_min_tbl, range_b);
133
+ maxv = _mm_shuffle_epi8(range_max_tbl, range_b);
134
+
135
+
136
+ tmp = _mm_or_si128(
137
+ _mm_cmplt_epi8(input_b, minv),
138
+ _mm_cmpgt_epi8(input_b, maxv)
139
+ );
140
+ error = _mm_or_si128(error, tmp);
141
+
142
+ /************************ next iteration **************************/
143
+ prev_input = input_b;
144
+ prev_first_len = first_len_b;
145
+
146
+ data += 32;
147
+ len -= 32;
148
+ }
149
+
150
+ if (!_mm_testz_si128(error, error))
151
+ return -1;
152
+
153
+ int32_t token4 = _mm_extract_epi32(prev_input, 3);
154
+ const int8_t *token = (const int8_t *)&token4;
155
+ int lookahead = 0;
156
+ if (token[3] > (int8_t)0xBF)
157
+ lookahead = 1;
158
+ else if (token[2] > (int8_t)0xBF)
159
+ lookahead = 2;
160
+ else if (token[1] > (int8_t)0xBF)
161
+ lookahead = 3;
162
+
163
+ data -= lookahead;
164
+ len += lookahead;
165
+ }
166
+
167
+ return utf8_naive(data, len);
168
+ }
169
+
170
+ #endif
@@ -0,0 +1,21 @@
1
+ #ifndef THIRD_PARTY_UTF8_RANGE_UTF8_RANGE_H_
2
+ #define THIRD_PARTY_UTF8_RANGE_UTF8_RANGE_H_
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ #if (defined(__ARM_NEON) && defined(__aarch64__)) || defined(__SSE4_1__)
9
+ int utf8_range2(const unsigned char* data, int len);
10
+ #else
11
+ int utf8_naive(const unsigned char* data, int len);
12
+ static inline int utf8_range2(const unsigned char* data, int len) {
13
+ return utf8_naive(data, len);
14
+ }
15
+ #endif
16
+
17
+ #ifdef __cplusplus
18
+ } // extern "C"
19
+ #endif
20
+
21
+ #endif // THIRD_PARTY_UTF8_RANGE_UTF8_RANGE_H_
@@ -11,7 +11,7 @@ pool = Google::Protobuf::DescriptorPool.generated_pool
11
11
 
12
12
  begin
13
13
  pool.add_serialized_file(descriptor_data)
14
- rescue TypeError => e
14
+ rescue TypeError
15
15
  # Compatibility code: will be removed in the next major version.
16
16
  require 'google/protobuf/descriptor_pb'
17
17
  parsed = Google::Protobuf::FileDescriptorProto.decode(descriptor_data)
@@ -14,7 +14,7 @@ pool = Google::Protobuf::DescriptorPool.generated_pool
14
14
 
15
15
  begin
16
16
  pool.add_serialized_file(descriptor_data)
17
- rescue TypeError => e
17
+ rescue TypeError
18
18
  # Compatibility code: will be removed in the next major version.
19
19
  require 'google/protobuf/descriptor_pb'
20
20
  parsed = Google::Protobuf::FileDescriptorProto.decode(descriptor_data)