google-protobuf 3.25.7 → 4.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/ext/google/protobuf_c/convert.c +33 -15
  3. data/ext/google/protobuf_c/defs.c +806 -125
  4. data/ext/google/protobuf_c/extconf.rb +20 -10
  5. data/ext/google/protobuf_c/glue.c +79 -0
  6. data/ext/google/protobuf_c/map.c +145 -63
  7. data/ext/google/protobuf_c/map.h +7 -3
  8. data/ext/google/protobuf_c/message.c +204 -171
  9. data/ext/google/protobuf_c/message.h +2 -6
  10. data/ext/google/protobuf_c/protobuf.c +33 -19
  11. data/ext/google/protobuf_c/protobuf.h +3 -15
  12. data/ext/google/protobuf_c/repeated_field.c +130 -58
  13. data/ext/google/protobuf_c/repeated_field.h +6 -2
  14. data/ext/google/protobuf_c/ruby-upb.c +11504 -7961
  15. data/ext/google/protobuf_c/ruby-upb.h +11760 -6934
  16. data/ext/google/protobuf_c/shared_convert.c +7 -2
  17. data/ext/google/protobuf_c/shared_message.c +3 -32
  18. data/ext/google/protobuf_c/shared_message.h +0 -4
  19. data/ext/google/protobuf_c/third_party/utf8_range/utf8_range.c +207 -0
  20. data/ext/google/protobuf_c/third_party/utf8_range/utf8_range.h +9 -8
  21. data/ext/google/protobuf_c/third_party/utf8_range/utf8_range_neon.inc +117 -0
  22. data/ext/google/protobuf_c/third_party/utf8_range/utf8_range_sse.inc +272 -0
  23. data/lib/google/protobuf/any_pb.rb +2 -23
  24. data/lib/google/protobuf/api_pb.rb +3 -26
  25. data/lib/google/protobuf/descriptor_pb.rb +8 -24
  26. data/lib/google/protobuf/duration_pb.rb +2 -23
  27. data/lib/google/protobuf/empty_pb.rb +2 -23
  28. data/lib/google/protobuf/ffi/descriptor.rb +14 -4
  29. data/lib/google/protobuf/ffi/descriptor_pool.rb +9 -1
  30. data/lib/google/protobuf/ffi/enum_descriptor.rb +13 -1
  31. data/lib/google/protobuf/ffi/ffi.rb +8 -8
  32. data/lib/google/protobuf/ffi/field_descriptor.rb +29 -2
  33. data/lib/google/protobuf/ffi/file_descriptor.rb +39 -13
  34. data/lib/google/protobuf/ffi/internal/arena.rb +0 -6
  35. data/lib/google/protobuf/ffi/internal/convert.rb +17 -30
  36. data/lib/google/protobuf/ffi/internal/pointer_helper.rb +2 -1
  37. data/lib/google/protobuf/ffi/map.rb +52 -26
  38. data/lib/google/protobuf/ffi/message.rb +189 -68
  39. data/lib/google/protobuf/ffi/method_descriptor.rb +124 -0
  40. data/lib/google/protobuf/ffi/object_cache.rb +3 -3
  41. data/lib/google/protobuf/ffi/oneof_descriptor.rb +13 -1
  42. data/lib/google/protobuf/ffi/repeated_field.rb +47 -19
  43. data/lib/google/protobuf/ffi/service_descriptor.rb +117 -0
  44. data/lib/google/protobuf/field_mask_pb.rb +2 -23
  45. data/lib/google/protobuf/internal/object_cache.rb +99 -0
  46. data/lib/google/protobuf/message_exts.rb +4 -0
  47. data/lib/google/protobuf/plugin_pb.rb +3 -25
  48. data/lib/google/protobuf/repeated_field.rb +4 -5
  49. data/lib/google/protobuf/source_context_pb.rb +2 -23
  50. data/lib/google/protobuf/struct_pb.rb +2 -23
  51. data/lib/google/protobuf/timestamp_pb.rb +2 -23
  52. data/lib/google/protobuf/type_pb.rb +2 -25
  53. data/lib/google/protobuf/wrappers_pb.rb +2 -23
  54. data/lib/google/protobuf.rb +1 -1
  55. data/lib/google/protobuf_ffi.rb +6 -4
  56. data/lib/google/protobuf_native.rb +0 -1
  57. data/lib/google/tasks/ffi.rake +2 -4
  58. metadata +38 -30
  59. data/ext/google/protobuf_c/third_party/utf8_range/naive.c +0 -92
  60. data/ext/google/protobuf_c/third_party/utf8_range/range2-neon.c +0 -157
  61. data/ext/google/protobuf_c/third_party/utf8_range/range2-sse.c +0 -170
  62. data/ext/google/protobuf_c/wrap_memcpy.c +0 -29
  63. data/lib/google/protobuf/descriptor_dsl.rb +0 -465
  64. data/lib/google/protobuf/object_cache.rb +0 -97
@@ -12,7 +12,7 @@ def configure_common_compile_task(task)
12
12
  task.add_define 'NDEBUG'
13
13
  task.cflags << "-std=gnu99 -O3"
14
14
  [
15
- :convert, :defs, :map, :message, :protobuf, :repeated_field, :wrap_memcpy
15
+ :convert, :defs, :map, :message, :protobuf, :repeated_field
16
16
  ].each { |file| task.exclude << "/#{file}.c" }
17
17
  task.ext_dir = src_dir
18
18
  task.source_dirs = [src_dir]
@@ -74,9 +74,7 @@ begin
74
74
  FFI::Compiler::CompileTask.new 'protobuf_c_ffi' do |c|
75
75
  configure_common_compile_task c
76
76
  # Ruby UPB was already compiled with different flags.
77
- c.exclude << "/range2-neon.c"
78
- c.exclude << "/range2-sse.c"
79
- c.exclude << "/naive.c"
77
+ c.exclude << "/utf8_range.c"
80
78
  c.exclude << "/ruby-upb.c"
81
79
  end
82
80
 
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google-protobuf
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.25.7
4
+ version: 4.34.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Protobuf Authors
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-04-22 00:00:00.000000000 Z
11
+ date: 2026-02-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: rake-compiler-dock
14
+ name: bigdecimal
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 1.2.1
20
- type: :development
19
+ version: '0'
20
+ type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 1.2.1
26
+ version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '13'
34
- type: :development
33
+ version: '13.3'
34
+ type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '13'
40
+ version: '13.3'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: ffi
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -72,34 +72,42 @@ dependencies:
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: 1.1.0
75
+ version: '1.3'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: 1.1.0
82
+ version: '1.3'
83
83
  - !ruby/object:Gem::Dependency
84
- name: test-unit
84
+ name: rake-compiler-dock
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '3.0'
90
- - - ">="
91
- - !ruby/object:Gem::Version
92
- version: 3.0.9
89
+ version: '1.11'
93
90
  type: :development
94
91
  prerelease: false
95
92
  version_requirements: !ruby/object:Gem::Requirement
96
93
  requirements:
97
94
  - - "~>"
98
95
  - !ruby/object:Gem::Version
99
- version: '3.0'
100
- - - ">="
96
+ version: '1.11'
97
+ - !ruby/object:Gem::Dependency
98
+ name: test-unit
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '3.7'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
101
109
  - !ruby/object:Gem::Version
102
- version: 3.0.9
110
+ version: '3.7'
103
111
  description: Protocol Buffers are Google's data interchange format.
104
112
  email: protobuf@googlegroups.com
105
113
  executables: []
@@ -130,15 +138,13 @@ files:
130
138
  - ext/google/protobuf_c/shared_message.c
131
139
  - ext/google/protobuf_c/shared_message.h
132
140
  - ext/google/protobuf_c/third_party/utf8_range/LICENSE
133
- - ext/google/protobuf_c/third_party/utf8_range/naive.c
134
- - ext/google/protobuf_c/third_party/utf8_range/range2-neon.c
135
- - ext/google/protobuf_c/third_party/utf8_range/range2-sse.c
141
+ - ext/google/protobuf_c/third_party/utf8_range/utf8_range.c
136
142
  - ext/google/protobuf_c/third_party/utf8_range/utf8_range.h
137
- - ext/google/protobuf_c/wrap_memcpy.c
143
+ - ext/google/protobuf_c/third_party/utf8_range/utf8_range_neon.inc
144
+ - ext/google/protobuf_c/third_party/utf8_range/utf8_range_sse.inc
138
145
  - lib/google/protobuf.rb
139
146
  - lib/google/protobuf/any_pb.rb
140
147
  - lib/google/protobuf/api_pb.rb
141
- - lib/google/protobuf/descriptor_dsl.rb
142
148
  - lib/google/protobuf/descriptor_pb.rb
143
149
  - lib/google/protobuf/duration_pb.rb
144
150
  - lib/google/protobuf/empty_pb.rb
@@ -154,12 +160,14 @@ files:
154
160
  - lib/google/protobuf/ffi/internal/type_safety.rb
155
161
  - lib/google/protobuf/ffi/map.rb
156
162
  - lib/google/protobuf/ffi/message.rb
163
+ - lib/google/protobuf/ffi/method_descriptor.rb
157
164
  - lib/google/protobuf/ffi/object_cache.rb
158
165
  - lib/google/protobuf/ffi/oneof_descriptor.rb
159
166
  - lib/google/protobuf/ffi/repeated_field.rb
167
+ - lib/google/protobuf/ffi/service_descriptor.rb
160
168
  - lib/google/protobuf/field_mask_pb.rb
169
+ - lib/google/protobuf/internal/object_cache.rb
161
170
  - lib/google/protobuf/message_exts.rb
162
- - lib/google/protobuf/object_cache.rb
163
171
  - lib/google/protobuf/plugin_pb.rb
164
172
  - lib/google/protobuf/repeated_field.rb
165
173
  - lib/google/protobuf/source_context_pb.rb
@@ -175,7 +183,7 @@ homepage: https://developers.google.com/protocol-buffers
175
183
  licenses:
176
184
  - BSD-3-Clause
177
185
  metadata:
178
- source_code_uri: https://github.com/protocolbuffers/protobuf/tree/v3.25.7/ruby
186
+ source_code_uri: https://github.com/protocolbuffers/protobuf/tree/v4.34.0/ruby
179
187
  post_install_message:
180
188
  rdoc_options: []
181
189
  require_paths:
@@ -184,14 +192,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
184
192
  requirements:
185
193
  - - ">="
186
194
  - !ruby/object:Gem::Version
187
- version: '2.7'
195
+ version: '3.1'
188
196
  required_rubygems_version: !ruby/object:Gem::Requirement
189
197
  requirements:
190
198
  - - ">="
191
199
  - !ruby/object:Gem::Version
192
200
  version: '0'
193
201
  requirements: []
194
- rubygems_version: 3.0.8
202
+ rubygems_version: 3.5.16
195
203
  signing_key:
196
204
  specification_version: 4
197
205
  summary: Protocol Buffers
@@ -1,92 +0,0 @@
1
- #include <stdio.h>
2
-
3
- /*
4
- * http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
5
- *
6
- * Table 3-7. Well-Formed UTF-8 Byte Sequences
7
- *
8
- * +--------------------+------------+-------------+------------+-------------+
9
- * | Code Points | First Byte | Second Byte | Third Byte | Fourth Byte |
10
- * +--------------------+------------+-------------+------------+-------------+
11
- * | U+0000..U+007F | 00..7F | | | |
12
- * +--------------------+------------+-------------+------------+-------------+
13
- * | U+0080..U+07FF | C2..DF | 80..BF | | |
14
- * +--------------------+------------+-------------+------------+-------------+
15
- * | U+0800..U+0FFF | E0 | A0..BF | 80..BF | |
16
- * +--------------------+------------+-------------+------------+-------------+
17
- * | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | |
18
- * +--------------------+------------+-------------+------------+-------------+
19
- * | U+D000..U+D7FF | ED | 80..9F | 80..BF | |
20
- * +--------------------+------------+-------------+------------+-------------+
21
- * | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | |
22
- * +--------------------+------------+-------------+------------+-------------+
23
- * | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF |
24
- * +--------------------+------------+-------------+------------+-------------+
25
- * | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF |
26
- * +--------------------+------------+-------------+------------+-------------+
27
- * | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF |
28
- * +--------------------+------------+-------------+------------+-------------+
29
- */
30
-
31
- /* Return 0 - success, >0 - index(1 based) of first error char */
32
- int utf8_naive(const unsigned char *data, int len)
33
- {
34
- int err_pos = 1;
35
-
36
- while (len) {
37
- int bytes;
38
- const unsigned char byte1 = data[0];
39
-
40
- /* 00..7F */
41
- if (byte1 <= 0x7F) {
42
- bytes = 1;
43
- /* C2..DF, 80..BF */
44
- } else if (len >= 2 && byte1 >= 0xC2 && byte1 <= 0xDF &&
45
- (signed char)data[1] <= (signed char)0xBF) {
46
- bytes = 2;
47
- } else if (len >= 3) {
48
- const unsigned char byte2 = data[1];
49
-
50
- /* Is byte2, byte3 between 0x80 ~ 0xBF */
51
- const int byte2_ok = (signed char)byte2 <= (signed char)0xBF;
52
- const int byte3_ok = (signed char)data[2] <= (signed char)0xBF;
53
-
54
- if (byte2_ok && byte3_ok &&
55
- /* E0, A0..BF, 80..BF */
56
- ((byte1 == 0xE0 && byte2 >= 0xA0) ||
57
- /* E1..EC, 80..BF, 80..BF */
58
- (byte1 >= 0xE1 && byte1 <= 0xEC) ||
59
- /* ED, 80..9F, 80..BF */
60
- (byte1 == 0xED && byte2 <= 0x9F) ||
61
- /* EE..EF, 80..BF, 80..BF */
62
- (byte1 >= 0xEE && byte1 <= 0xEF))) {
63
- bytes = 3;
64
- } else if (len >= 4) {
65
- /* Is byte4 between 0x80 ~ 0xBF */
66
- const int byte4_ok = (signed char)data[3] <= (signed char)0xBF;
67
-
68
- if (byte2_ok && byte3_ok && byte4_ok &&
69
- /* F0, 90..BF, 80..BF, 80..BF */
70
- ((byte1 == 0xF0 && byte2 >= 0x90) ||
71
- /* F1..F3, 80..BF, 80..BF, 80..BF */
72
- (byte1 >= 0xF1 && byte1 <= 0xF3) ||
73
- /* F4, 80..8F, 80..BF, 80..BF */
74
- (byte1 == 0xF4 && byte2 <= 0x8F))) {
75
- bytes = 4;
76
- } else {
77
- return err_pos;
78
- }
79
- } else {
80
- return err_pos;
81
- }
82
- } else {
83
- return err_pos;
84
- }
85
-
86
- len -= bytes;
87
- err_pos += bytes;
88
- data += bytes;
89
- }
90
-
91
- return 0;
92
- }
@@ -1,157 +0,0 @@
1
- /*
2
- * Process 2x16 bytes in each iteration.
3
- * Comments removed for brevity. See range-neon.c for details.
4
- */
5
- #ifdef __aarch64__
6
-
7
- #include <stdio.h>
8
- #include <stdint.h>
9
- #include <arm_neon.h>
10
-
11
- int utf8_naive(const unsigned char *data, int len);
12
-
13
- static const uint8_t _first_len_tbl[] = {
14
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3,
15
- };
16
-
17
- static const uint8_t _first_range_tbl[] = {
18
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8,
19
- };
20
-
21
- static const uint8_t _range_min_tbl[] = {
22
- 0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80,
23
- 0xC2, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
24
- };
25
- static const uint8_t _range_max_tbl[] = {
26
- 0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F,
27
- 0xF4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
28
- };
29
-
30
- static const uint8_t _range_adjust_tbl[] = {
31
- 2, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0,
32
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0,
33
- };
34
-
35
- /* Return 0 on success, -1 on error */
36
- int utf8_range2(const unsigned char *data, int len)
37
- {
38
- if (len >= 32) {
39
- uint8x16_t prev_input = vdupq_n_u8(0);
40
- uint8x16_t prev_first_len = vdupq_n_u8(0);
41
-
42
- const uint8x16_t first_len_tbl = vld1q_u8(_first_len_tbl);
43
- const uint8x16_t first_range_tbl = vld1q_u8(_first_range_tbl);
44
- const uint8x16_t range_min_tbl = vld1q_u8(_range_min_tbl);
45
- const uint8x16_t range_max_tbl = vld1q_u8(_range_max_tbl);
46
- const uint8x16x2_t range_adjust_tbl = vld2q_u8(_range_adjust_tbl);
47
-
48
- const uint8x16_t const_1 = vdupq_n_u8(1);
49
- const uint8x16_t const_2 = vdupq_n_u8(2);
50
- const uint8x16_t const_e0 = vdupq_n_u8(0xE0);
51
-
52
- uint8x16_t error1 = vdupq_n_u8(0);
53
- uint8x16_t error2 = vdupq_n_u8(0);
54
- uint8x16_t error3 = vdupq_n_u8(0);
55
- uint8x16_t error4 = vdupq_n_u8(0);
56
-
57
- while (len >= 32) {
58
- /******************* two blocks interleaved **********************/
59
-
60
- #if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ < 8)
61
- /* gcc doesn't support vldq1_u8_x2 until version 8 */
62
- const uint8x16_t input_a = vld1q_u8(data);
63
- const uint8x16_t input_b = vld1q_u8(data + 16);
64
- #else
65
- /* Forces a double load on Clang */
66
- const uint8x16x2_t input_pair = vld1q_u8_x2(data);
67
- const uint8x16_t input_a = input_pair.val[0];
68
- const uint8x16_t input_b = input_pair.val[1];
69
- #endif
70
-
71
- const uint8x16_t high_nibbles_a = vshrq_n_u8(input_a, 4);
72
- const uint8x16_t high_nibbles_b = vshrq_n_u8(input_b, 4);
73
-
74
- const uint8x16_t first_len_a =
75
- vqtbl1q_u8(first_len_tbl, high_nibbles_a);
76
- const uint8x16_t first_len_b =
77
- vqtbl1q_u8(first_len_tbl, high_nibbles_b);
78
-
79
- uint8x16_t range_a = vqtbl1q_u8(first_range_tbl, high_nibbles_a);
80
- uint8x16_t range_b = vqtbl1q_u8(first_range_tbl, high_nibbles_b);
81
-
82
- range_a =
83
- vorrq_u8(range_a, vextq_u8(prev_first_len, first_len_a, 15));
84
- range_b =
85
- vorrq_u8(range_b, vextq_u8(first_len_a, first_len_b, 15));
86
-
87
- uint8x16_t tmp1_a, tmp2_a, tmp1_b, tmp2_b;
88
- tmp1_a = vextq_u8(prev_first_len, first_len_a, 14);
89
- tmp1_a = vqsubq_u8(tmp1_a, const_1);
90
- range_a = vorrq_u8(range_a, tmp1_a);
91
-
92
- tmp1_b = vextq_u8(first_len_a, first_len_b, 14);
93
- tmp1_b = vqsubq_u8(tmp1_b, const_1);
94
- range_b = vorrq_u8(range_b, tmp1_b);
95
-
96
- tmp2_a = vextq_u8(prev_first_len, first_len_a, 13);
97
- tmp2_a = vqsubq_u8(tmp2_a, const_2);
98
- range_a = vorrq_u8(range_a, tmp2_a);
99
-
100
- tmp2_b = vextq_u8(first_len_a, first_len_b, 13);
101
- tmp2_b = vqsubq_u8(tmp2_b, const_2);
102
- range_b = vorrq_u8(range_b, tmp2_b);
103
-
104
- uint8x16_t shift1_a = vextq_u8(prev_input, input_a, 15);
105
- uint8x16_t pos_a = vsubq_u8(shift1_a, const_e0);
106
- range_a = vaddq_u8(range_a, vqtbl2q_u8(range_adjust_tbl, pos_a));
107
-
108
- uint8x16_t shift1_b = vextq_u8(input_a, input_b, 15);
109
- uint8x16_t pos_b = vsubq_u8(shift1_b, const_e0);
110
- range_b = vaddq_u8(range_b, vqtbl2q_u8(range_adjust_tbl, pos_b));
111
-
112
- uint8x16_t minv_a = vqtbl1q_u8(range_min_tbl, range_a);
113
- uint8x16_t maxv_a = vqtbl1q_u8(range_max_tbl, range_a);
114
-
115
- uint8x16_t minv_b = vqtbl1q_u8(range_min_tbl, range_b);
116
- uint8x16_t maxv_b = vqtbl1q_u8(range_max_tbl, range_b);
117
-
118
- error1 = vorrq_u8(error1, vcltq_u8(input_a, minv_a));
119
- error2 = vorrq_u8(error2, vcgtq_u8(input_a, maxv_a));
120
-
121
- error3 = vorrq_u8(error3, vcltq_u8(input_b, minv_b));
122
- error4 = vorrq_u8(error4, vcgtq_u8(input_b, maxv_b));
123
-
124
- /************************ next iteration *************************/
125
- prev_input = input_b;
126
- prev_first_len = first_len_b;
127
-
128
- data += 32;
129
- len -= 32;
130
- }
131
- error1 = vorrq_u8(error1, error2);
132
- error1 = vorrq_u8(error1, error3);
133
- error1 = vorrq_u8(error1, error4);
134
-
135
- if (vmaxvq_u8(error1))
136
- return -1;
137
-
138
- uint32_t token4;
139
- vst1q_lane_u32(&token4, vreinterpretq_u32_u8(prev_input), 3);
140
-
141
- const int8_t *token = (const int8_t *)&token4;
142
- int lookahead = 0;
143
- if (token[3] > (int8_t)0xBF)
144
- lookahead = 1;
145
- else if (token[2] > (int8_t)0xBF)
146
- lookahead = 2;
147
- else if (token[1] > (int8_t)0xBF)
148
- lookahead = 3;
149
-
150
- data -= lookahead;
151
- len += lookahead;
152
- }
153
-
154
- return utf8_naive(data, len);
155
- }
156
-
157
- #endif
@@ -1,170 +0,0 @@
1
- /*
2
- * Process 2x16 bytes in each iteration.
3
- * Comments removed for brevity. See range-sse.c for details.
4
- */
5
- #ifdef __SSE4_1__
6
-
7
- #include <stdio.h>
8
- #include <stdint.h>
9
- #include <x86intrin.h>
10
-
11
- int utf8_naive(const unsigned char *data, int len);
12
-
13
- static const int8_t _first_len_tbl[] = {
14
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3,
15
- };
16
-
17
- static const int8_t _first_range_tbl[] = {
18
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8,
19
- };
20
-
21
- static const int8_t _range_min_tbl[] = {
22
- 0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80,
23
- 0xC2, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F,
24
- };
25
- static const int8_t _range_max_tbl[] = {
26
- 0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F,
27
- 0xF4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
28
- };
29
-
30
- static const int8_t _df_ee_tbl[] = {
31
- 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,
32
- };
33
- static const int8_t _ef_fe_tbl[] = {
34
- 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35
- };
36
-
37
- /* Return 0 on success, -1 on error */
38
- int utf8_range2(const unsigned char *data, int len)
39
- {
40
- if (len >= 32) {
41
- __m128i prev_input = _mm_set1_epi8(0);
42
- __m128i prev_first_len = _mm_set1_epi8(0);
43
-
44
- const __m128i first_len_tbl =
45
- _mm_loadu_si128((const __m128i *)_first_len_tbl);
46
- const __m128i first_range_tbl =
47
- _mm_loadu_si128((const __m128i *)_first_range_tbl);
48
- const __m128i range_min_tbl =
49
- _mm_loadu_si128((const __m128i *)_range_min_tbl);
50
- const __m128i range_max_tbl =
51
- _mm_loadu_si128((const __m128i *)_range_max_tbl);
52
- const __m128i df_ee_tbl =
53
- _mm_loadu_si128((const __m128i *)_df_ee_tbl);
54
- const __m128i ef_fe_tbl =
55
- _mm_loadu_si128((const __m128i *)_ef_fe_tbl);
56
-
57
- __m128i error = _mm_set1_epi8(0);
58
-
59
- while (len >= 32) {
60
- /***************************** block 1 ****************************/
61
- const __m128i input_a = _mm_loadu_si128((const __m128i *)data);
62
-
63
- __m128i high_nibbles =
64
- _mm_and_si128(_mm_srli_epi16(input_a, 4), _mm_set1_epi8(0x0F));
65
-
66
- __m128i first_len_a = _mm_shuffle_epi8(first_len_tbl, high_nibbles);
67
-
68
- __m128i range_a = _mm_shuffle_epi8(first_range_tbl, high_nibbles);
69
-
70
- range_a = _mm_or_si128(
71
- range_a, _mm_alignr_epi8(first_len_a, prev_first_len, 15));
72
-
73
- __m128i tmp;
74
- tmp = _mm_alignr_epi8(first_len_a, prev_first_len, 14);
75
- tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(1));
76
- range_a = _mm_or_si128(range_a, tmp);
77
-
78
- tmp = _mm_alignr_epi8(first_len_a, prev_first_len, 13);
79
- tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(2));
80
- range_a = _mm_or_si128(range_a, tmp);
81
-
82
- __m128i shift1, pos, range2;
83
- shift1 = _mm_alignr_epi8(input_a, prev_input, 15);
84
- pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF));
85
- tmp = _mm_subs_epu8(pos, _mm_set1_epi8(0xF0));
86
- range2 = _mm_shuffle_epi8(df_ee_tbl, tmp);
87
- tmp = _mm_adds_epu8(pos, _mm_set1_epi8(0x70));
88
- range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp));
89
-
90
- range_a = _mm_add_epi8(range_a, range2);
91
-
92
- __m128i minv = _mm_shuffle_epi8(range_min_tbl, range_a);
93
- __m128i maxv = _mm_shuffle_epi8(range_max_tbl, range_a);
94
-
95
- tmp = _mm_or_si128(
96
- _mm_cmplt_epi8(input_a, minv),
97
- _mm_cmpgt_epi8(input_a, maxv)
98
- );
99
- error = _mm_or_si128(error, tmp);
100
-
101
- /***************************** block 2 ****************************/
102
- const __m128i input_b = _mm_loadu_si128((const __m128i *)(data+16));
103
-
104
- high_nibbles =
105
- _mm_and_si128(_mm_srli_epi16(input_b, 4), _mm_set1_epi8(0x0F));
106
-
107
- __m128i first_len_b = _mm_shuffle_epi8(first_len_tbl, high_nibbles);
108
-
109
- __m128i range_b = _mm_shuffle_epi8(first_range_tbl, high_nibbles);
110
-
111
- range_b = _mm_or_si128(
112
- range_b, _mm_alignr_epi8(first_len_b, first_len_a, 15));
113
-
114
-
115
- tmp = _mm_alignr_epi8(first_len_b, first_len_a, 14);
116
- tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(1));
117
- range_b = _mm_or_si128(range_b, tmp);
118
-
119
- tmp = _mm_alignr_epi8(first_len_b, first_len_a, 13);
120
- tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(2));
121
- range_b = _mm_or_si128(range_b, tmp);
122
-
123
- shift1 = _mm_alignr_epi8(input_b, input_a, 15);
124
- pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF));
125
- tmp = _mm_subs_epu8(pos, _mm_set1_epi8(0xF0));
126
- range2 = _mm_shuffle_epi8(df_ee_tbl, tmp);
127
- tmp = _mm_adds_epu8(pos, _mm_set1_epi8(0x70));
128
- range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp));
129
-
130
- range_b = _mm_add_epi8(range_b, range2);
131
-
132
- minv = _mm_shuffle_epi8(range_min_tbl, range_b);
133
- maxv = _mm_shuffle_epi8(range_max_tbl, range_b);
134
-
135
-
136
- tmp = _mm_or_si128(
137
- _mm_cmplt_epi8(input_b, minv),
138
- _mm_cmpgt_epi8(input_b, maxv)
139
- );
140
- error = _mm_or_si128(error, tmp);
141
-
142
- /************************ next iteration **************************/
143
- prev_input = input_b;
144
- prev_first_len = first_len_b;
145
-
146
- data += 32;
147
- len -= 32;
148
- }
149
-
150
- if (!_mm_testz_si128(error, error))
151
- return -1;
152
-
153
- int32_t token4 = _mm_extract_epi32(prev_input, 3);
154
- const int8_t *token = (const int8_t *)&token4;
155
- int lookahead = 0;
156
- if (token[3] > (int8_t)0xBF)
157
- lookahead = 1;
158
- else if (token[2] > (int8_t)0xBF)
159
- lookahead = 2;
160
- else if (token[1] > (int8_t)0xBF)
161
- lookahead = 3;
162
-
163
- data -= lookahead;
164
- len += lookahead;
165
- }
166
-
167
- return utf8_naive(data, len);
168
- }
169
-
170
- #endif
@@ -1,29 +0,0 @@
1
- // Protocol Buffers - Google's data interchange format
2
- // Copyright 2017 Google Inc. All rights reserved.
3
- //
4
- // Use of this source code is governed by a BSD-style
5
- // license that can be found in the LICENSE file or at
6
- // https://developers.google.com/open-source/licenses/bsd
7
-
8
- #include <string.h>
9
-
10
- // On x86-64 Linux with glibc, we link against the 2.2.5 version of memcpy so
11
- // that we avoid depending on the 2.14 version of the symbol. This way,
12
- // distributions that are using pre-2.14 versions of glibc can successfully use
13
- // the gem we distribute
14
- // (https://github.com/protocolbuffers/protobuf/issues/2783).
15
- //
16
- // This wrapper is enabled by passing the linker flags -Wl,-wrap,memcpy in
17
- // extconf.rb.
18
- #ifdef __linux__
19
- #if defined(__x86_64__) && defined(__GNU_LIBRARY__)
20
- __asm__(".symver memcpy,memcpy@GLIBC_2.2.5");
21
- void *__wrap_memcpy(void *dest, const void *src, size_t n) {
22
- return memcpy(dest, src, n);
23
- }
24
- #else
25
- void *__wrap_memcpy(void *dest, const void *src, size_t n) {
26
- return memmove(dest, src, n);
27
- }
28
- #endif
29
- #endif