google-protobuf 3.25.2-java → 4.26.0.rc.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of google-protobuf might be problematic. Click here for more details.

Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/ext/google/protobuf_c/message.c +38 -76
  3. data/ext/google/protobuf_c/message.h +1 -1
  4. data/ext/google/protobuf_c/ruby-upb.c +11791 -10801
  5. data/ext/google/protobuf_c/ruby-upb.h +5163 -4241
  6. data/ext/google/protobuf_c/shared_convert.c +5 -3
  7. data/ext/google/protobuf_c/shared_convert.h +2 -2
  8. data/ext/google/protobuf_c/shared_message.c +8 -6
  9. data/ext/google/protobuf_c/third_party/utf8_range/utf8_range.c +467 -0
  10. data/ext/google/protobuf_c/third_party/utf8_range/utf8_range.h +9 -8
  11. data/lib/google/protobuf/any_pb.rb +1 -22
  12. data/lib/google/protobuf/api_pb.rb +1 -24
  13. data/lib/google/protobuf/descriptor_pb.rb +2 -23
  14. data/lib/google/protobuf/duration_pb.rb +1 -22
  15. data/lib/google/protobuf/empty_pb.rb +1 -22
  16. data/lib/google/protobuf/ffi/descriptor.rb +2 -3
  17. data/lib/google/protobuf/ffi/enum_descriptor.rb +1 -1
  18. data/lib/google/protobuf/ffi/ffi.rb +2 -0
  19. data/lib/google/protobuf/ffi/field_descriptor.rb +10 -1
  20. data/lib/google/protobuf/ffi/file_descriptor.rb +1 -13
  21. data/lib/google/protobuf/ffi/internal/convert.rb +7 -23
  22. data/lib/google/protobuf/ffi/map.rb +13 -11
  23. data/lib/google/protobuf/ffi/message.rb +10 -13
  24. data/lib/google/protobuf/ffi/oneof_descriptor.rb +1 -1
  25. data/lib/google/protobuf/ffi/repeated_field.rb +12 -10
  26. data/lib/google/protobuf/field_mask_pb.rb +1 -22
  27. data/lib/google/protobuf/plugin_pb.rb +2 -24
  28. data/lib/google/protobuf/repeated_field.rb +1 -2
  29. data/lib/google/protobuf/source_context_pb.rb +1 -22
  30. data/lib/google/protobuf/struct_pb.rb +1 -22
  31. data/lib/google/protobuf/timestamp_pb.rb +1 -22
  32. data/lib/google/protobuf/type_pb.rb +1 -24
  33. data/lib/google/protobuf/wrappers_pb.rb +1 -22
  34. data/lib/google/protobuf_ffi.rb +1 -2
  35. data/lib/google/protobuf_java.jar +0 -0
  36. data/lib/google/protobuf_native.rb +0 -1
  37. data/lib/google/tasks/ffi.rake +1 -3
  38. metadata +9 -12
  39. data/ext/google/protobuf_c/third_party/utf8_range/naive.c +0 -92
  40. data/ext/google/protobuf_c/third_party/utf8_range/range2-neon.c +0 -157
  41. data/ext/google/protobuf_c/third_party/utf8_range/range2-sse.c +0 -170
  42. data/lib/google/protobuf/descriptor_dsl.rb +0 -465
@@ -8,28 +8,7 @@ require 'google/protobuf'
8
8
  descriptor_data = "\n\x1fgoogle/protobuf/timestamp.proto\x12\x0fgoogle.protobuf\"+\n\tTimestamp\x12\x0f\n\x07seconds\x18\x01 \x01(\x03\x12\r\n\x05nanos\x18\x02 \x01(\x05\x42\x85\x01\n\x13\x63om.google.protobufB\x0eTimestampProtoP\x01Z2google.golang.org/protobuf/types/known/timestamppb\xf8\x01\x01\xa2\x02\x03GPB\xaa\x02\x1eGoogle.Protobuf.WellKnownTypesb\x06proto3"
9
9
 
10
10
  pool = Google::Protobuf::DescriptorPool.generated_pool
11
-
12
- begin
13
- pool.add_serialized_file(descriptor_data)
14
- rescue TypeError
15
- # Compatibility code: will be removed in the next major version.
16
- require 'google/protobuf/descriptor_pb'
17
- parsed = Google::Protobuf::FileDescriptorProto.decode(descriptor_data)
18
- parsed.clear_dependency
19
- serialized = parsed.class.encode(parsed)
20
- file = pool.add_serialized_file(serialized)
21
- warn "Warning: Protobuf detected an import path issue while loading generated file #{__FILE__}"
22
- imports = [
23
- ]
24
- imports.each do |type_name, expected_filename|
25
- import_file = pool.lookup(type_name).file_descriptor
26
- if import_file.name != expected_filename
27
- warn "- #{file.name} imports #{expected_filename}, but that import was loaded as #{import_file.name}"
28
- end
29
- end
30
- warn "Each proto file must use a consistent fully-qualified name."
31
- warn "This will become an error in the next major version."
32
- end
11
+ pool.add_serialized_file(descriptor_data)
33
12
 
34
13
  module Google
35
14
  module Protobuf
@@ -11,30 +11,7 @@ require 'google/protobuf/source_context_pb'
11
11
  descriptor_data = "\n\x1agoogle/protobuf/type.proto\x12\x0fgoogle.protobuf\x1a\x19google/protobuf/any.proto\x1a$google/protobuf/source_context.proto\"\xe8\x01\n\x04Type\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\x06\x66ields\x18\x02 \x03(\x0b\x32\x16.google.protobuf.Field\x12\x0e\n\x06oneofs\x18\x03 \x03(\t\x12(\n\x07options\x18\x04 \x03(\x0b\x32\x17.google.protobuf.Option\x12\x36\n\x0esource_context\x18\x05 \x01(\x0b\x32\x1e.google.protobuf.SourceContext\x12\'\n\x06syntax\x18\x06 \x01(\x0e\x32\x17.google.protobuf.Syntax\x12\x0f\n\x07\x65\x64ition\x18\x07 \x01(\t\"\xd5\x05\n\x05\x46ield\x12)\n\x04kind\x18\x01 \x01(\x0e\x32\x1b.google.protobuf.Field.Kind\x12\x37\n\x0b\x63\x61rdinality\x18\x02 \x01(\x0e\x32\".google.protobuf.Field.Cardinality\x12\x0e\n\x06number\x18\x03 \x01(\x05\x12\x0c\n\x04name\x18\x04 \x01(\t\x12\x10\n\x08type_url\x18\x06 \x01(\t\x12\x13\n\x0boneof_index\x18\x07 \x01(\x05\x12\x0e\n\x06packed\x18\x08 \x01(\x08\x12(\n\x07options\x18\t \x03(\x0b\x32\x17.google.protobuf.Option\x12\x11\n\tjson_name\x18\n \x01(\t\x12\x15\n\rdefault_value\x18\x0b \x01(\t\"\xc8\x02\n\x04Kind\x12\x10\n\x0cTYPE_UNKNOWN\x10\x00\x12\x0f\n\x0bTYPE_DOUBLE\x10\x01\x12\x0e\n\nTYPE_FLOAT\x10\x02\x12\x0e\n\nTYPE_INT64\x10\x03\x12\x0f\n\x0bTYPE_UINT64\x10\x04\x12\x0e\n\nTYPE_INT32\x10\x05\x12\x10\n\x0cTYPE_FIXED64\x10\x06\x12\x10\n\x0cTYPE_FIXED32\x10\x07\x12\r\n\tTYPE_BOOL\x10\x08\x12\x0f\n\x0bTYPE_STRING\x10\t\x12\x0e\n\nTYPE_GROUP\x10\n\x12\x10\n\x0cTYPE_MESSAGE\x10\x0b\x12\x0e\n\nTYPE_BYTES\x10\x0c\x12\x0f\n\x0bTYPE_UINT32\x10\r\x12\r\n\tTYPE_ENUM\x10\x0e\x12\x11\n\rTYPE_SFIXED32\x10\x0f\x12\x11\n\rTYPE_SFIXED64\x10\x10\x12\x0f\n\x0bTYPE_SINT32\x10\x11\x12\x0f\n\x0bTYPE_SINT64\x10\x12\"t\n\x0b\x43\x61rdinality\x12\x17\n\x13\x43\x41RDINALITY_UNKNOWN\x10\x00\x12\x18\n\x14\x43\x41RDINALITY_OPTIONAL\x10\x01\x12\x18\n\x14\x43\x41RDINALITY_REQUIRED\x10\x02\x12\x18\n\x14\x43\x41RDINALITY_REPEATED\x10\x03\"\xdf\x01\n\x04\x45num\x12\x0c\n\x04name\x18\x01 \x01(\t\x12-\n\tenumvalue\x18\x02 \x03(\x0b\x32\x1a.google.protobuf.EnumValue\x12(\n\x07options\x18\x03 \x03(\x0b\x32\x17.google.protobuf.Option\x12\x36\n\x0esource_context\x18\x04 \x01(\x0b\x32\x1e.google.protobuf.SourceContext\x12\'\n\x06syntax\x18\x05 \x01(\x0e\x32\x17.google.protobuf.Syntax\x12\x0f\n\x07\x65\x64ition\x18\x06 \x01(\t\"S\n\tEnumValue\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06number\x18\x02 \x01(\x05\x12(\n\x07options\x18\x03 \x03(\x0b\x32\x17.google.protobuf.Option\";\n\x06Option\x12\x0c\n\x04name\x18\x01 \x01(\t\x12#\n\x05value\x18\x02 \x01(\x0b\x32\x14.google.protobuf.Any*C\n\x06Syntax\x12\x11\n\rSYNTAX_PROTO2\x10\x00\x12\x11\n\rSYNTAX_PROTO3\x10\x01\x12\x13\n\x0fSYNTAX_EDITIONS\x10\x02\x42{\n\x13\x63om.google.protobufB\tTypeProtoP\x01Z-google.golang.org/protobuf/types/known/typepb\xf8\x01\x01\xa2\x02\x03GPB\xaa\x02\x1eGoogle.Protobuf.WellKnownTypesb\x06proto3"
12
12
 
13
13
  pool = Google::Protobuf::DescriptorPool.generated_pool
14
-
15
- begin
16
- pool.add_serialized_file(descriptor_data)
17
- rescue TypeError
18
- # Compatibility code: will be removed in the next major version.
19
- require 'google/protobuf/descriptor_pb'
20
- parsed = Google::Protobuf::FileDescriptorProto.decode(descriptor_data)
21
- parsed.clear_dependency
22
- serialized = parsed.class.encode(parsed)
23
- file = pool.add_serialized_file(serialized)
24
- warn "Warning: Protobuf detected an import path issue while loading generated file #{__FILE__}"
25
- imports = [
26
- ["google.protobuf.SourceContext", "google/protobuf/source_context.proto"],
27
- ["google.protobuf.Any", "google/protobuf/any.proto"],
28
- ]
29
- imports.each do |type_name, expected_filename|
30
- import_file = pool.lookup(type_name).file_descriptor
31
- if import_file.name != expected_filename
32
- warn "- #{file.name} imports #{expected_filename}, but that import was loaded as #{import_file.name}"
33
- end
34
- end
35
- warn "Each proto file must use a consistent fully-qualified name."
36
- warn "This will become an error in the next major version."
37
- end
14
+ pool.add_serialized_file(descriptor_data)
38
15
 
39
16
  module Google
40
17
  module Protobuf
@@ -8,28 +8,7 @@ require 'google/protobuf'
8
8
  descriptor_data = "\n\x1egoogle/protobuf/wrappers.proto\x12\x0fgoogle.protobuf\"\x1c\n\x0b\x44oubleValue\x12\r\n\x05value\x18\x01 \x01(\x01\"\x1b\n\nFloatValue\x12\r\n\x05value\x18\x01 \x01(\x02\"\x1b\n\nInt64Value\x12\r\n\x05value\x18\x01 \x01(\x03\"\x1c\n\x0bUInt64Value\x12\r\n\x05value\x18\x01 \x01(\x04\"\x1b\n\nInt32Value\x12\r\n\x05value\x18\x01 \x01(\x05\"\x1c\n\x0bUInt32Value\x12\r\n\x05value\x18\x01 \x01(\r\"\x1a\n\tBoolValue\x12\r\n\x05value\x18\x01 \x01(\x08\"\x1c\n\x0bStringValue\x12\r\n\x05value\x18\x01 \x01(\t\"\x1b\n\nBytesValue\x12\r\n\x05value\x18\x01 \x01(\x0c\x42\x83\x01\n\x13\x63om.google.protobufB\rWrappersProtoP\x01Z1google.golang.org/protobuf/types/known/wrapperspb\xf8\x01\x01\xa2\x02\x03GPB\xaa\x02\x1eGoogle.Protobuf.WellKnownTypesb\x06proto3"
9
9
 
10
10
  pool = Google::Protobuf::DescriptorPool.generated_pool
11
-
12
- begin
13
- pool.add_serialized_file(descriptor_data)
14
- rescue TypeError
15
- # Compatibility code: will be removed in the next major version.
16
- require 'google/protobuf/descriptor_pb'
17
- parsed = Google::Protobuf::FileDescriptorProto.decode(descriptor_data)
18
- parsed.clear_dependency
19
- serialized = parsed.class.encode(parsed)
20
- file = pool.add_serialized_file(serialized)
21
- warn "Warning: Protobuf detected an import path issue while loading generated file #{__FILE__}"
22
- imports = [
23
- ]
24
- imports.each do |type_name, expected_filename|
25
- import_file = pool.lookup(type_name).file_descriptor
26
- if import_file.name != expected_filename
27
- warn "- #{file.name} imports #{expected_filename}, but that import was loaded as #{import_file.name}"
28
- end
29
- end
30
- warn "Each proto file must use a consistent fully-qualified name."
31
- warn "This will become an error in the next major version."
32
- end
11
+ pool.add_serialized_file(descriptor_data)
33
12
 
34
13
  module Google
35
14
  module Protobuf
@@ -21,7 +21,6 @@ require 'google/protobuf/ffi/map'
21
21
  require 'google/protobuf/ffi/object_cache'
22
22
  require 'google/protobuf/ffi/repeated_field'
23
23
  require 'google/protobuf/ffi/message'
24
- require 'google/protobuf/descriptor_dsl'
25
24
 
26
25
  module Google
27
26
  module Protobuf
@@ -47,4 +46,4 @@ module Google
47
46
  nil
48
47
  end
49
48
  end
50
- end
49
+ end
Binary file
@@ -16,5 +16,4 @@ else
16
16
  end
17
17
  end
18
18
 
19
- require 'google/protobuf/descriptor_dsl'
20
19
  require 'google/protobuf/repeated_field'
@@ -74,9 +74,7 @@ begin
74
74
  FFI::Compiler::CompileTask.new 'protobuf_c_ffi' do |c|
75
75
  configure_common_compile_task c
76
76
  # Ruby UPB was already compiled with different flags.
77
- c.exclude << "/range2-neon.c"
78
- c.exclude << "/range2-sse.c"
79
- c.exclude << "/naive.c"
77
+ c.exclude << "/utf8_range.c"
80
78
  c.exclude << "/ruby-upb.c"
81
79
  end
82
80
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google-protobuf
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.25.2
4
+ version: 4.26.0.rc.1
5
5
  platform: java
6
6
  authors:
7
7
  - Protobuf Authors
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-01-08 00:00:00.000000000 Z
11
+ date: 2024-01-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -41,15 +41,15 @@ dependencies:
41
41
  - !ruby/object:Gem::Dependency
42
42
  requirement: !ruby/object:Gem::Requirement
43
43
  requirements:
44
- - - "~>"
44
+ - - ">="
45
45
  - !ruby/object:Gem::Version
46
46
  version: '13'
47
47
  name: rake
48
48
  prerelease: false
49
- type: :development
49
+ type: :runtime
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - "~>"
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '13'
55
55
  - !ruby/object:Gem::Dependency
@@ -134,14 +134,11 @@ files:
134
134
  - ext/google/protobuf_c/shared_message.c
135
135
  - ext/google/protobuf_c/shared_message.h
136
136
  - ext/google/protobuf_c/third_party/utf8_range/LICENSE
137
- - ext/google/protobuf_c/third_party/utf8_range/naive.c
138
- - ext/google/protobuf_c/third_party/utf8_range/range2-neon.c
139
- - ext/google/protobuf_c/third_party/utf8_range/range2-sse.c
137
+ - ext/google/protobuf_c/third_party/utf8_range/utf8_range.c
140
138
  - ext/google/protobuf_c/third_party/utf8_range/utf8_range.h
141
139
  - lib/google/protobuf.rb
142
140
  - lib/google/protobuf/any_pb.rb
143
141
  - lib/google/protobuf/api_pb.rb
144
- - lib/google/protobuf/descriptor_dsl.rb
145
142
  - lib/google/protobuf/descriptor_pb.rb
146
143
  - lib/google/protobuf/duration_pb.rb
147
144
  - lib/google/protobuf/empty_pb.rb
@@ -179,7 +176,7 @@ homepage: https://developers.google.com/protocol-buffers
179
176
  licenses:
180
177
  - BSD-3-Clause
181
178
  metadata:
182
- source_code_uri: https://github.com/protocolbuffers/protobuf/tree/v3.25.2/ruby
179
+ source_code_uri: https://github.com/protocolbuffers/protobuf/tree/v4.26.0-rc1/ruby
183
180
  post_install_message:
184
181
  rdoc_options: []
185
182
  require_paths:
@@ -191,9 +188,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
191
188
  version: '2.7'
192
189
  required_rubygems_version: !ruby/object:Gem::Requirement
193
190
  requirements:
194
- - - ">="
191
+ - - ">"
195
192
  - !ruby/object:Gem::Version
196
- version: '0'
193
+ version: 1.3.1
197
194
  requirements: []
198
195
  rubygems_version: 3.2.29
199
196
  signing_key:
@@ -1,92 +0,0 @@
1
- #include <stdio.h>
2
-
3
- /*
4
- * http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
5
- *
6
- * Table 3-7. Well-Formed UTF-8 Byte Sequences
7
- *
8
- * +--------------------+------------+-------------+------------+-------------+
9
- * | Code Points | First Byte | Second Byte | Third Byte | Fourth Byte |
10
- * +--------------------+------------+-------------+------------+-------------+
11
- * | U+0000..U+007F | 00..7F | | | |
12
- * +--------------------+------------+-------------+------------+-------------+
13
- * | U+0080..U+07FF | C2..DF | 80..BF | | |
14
- * +--------------------+------------+-------------+------------+-------------+
15
- * | U+0800..U+0FFF | E0 | A0..BF | 80..BF | |
16
- * +--------------------+------------+-------------+------------+-------------+
17
- * | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | |
18
- * +--------------------+------------+-------------+------------+-------------+
19
- * | U+D000..U+D7FF | ED | 80..9F | 80..BF | |
20
- * +--------------------+------------+-------------+------------+-------------+
21
- * | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | |
22
- * +--------------------+------------+-------------+------------+-------------+
23
- * | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF |
24
- * +--------------------+------------+-------------+------------+-------------+
25
- * | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF |
26
- * +--------------------+------------+-------------+------------+-------------+
27
- * | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF |
28
- * +--------------------+------------+-------------+------------+-------------+
29
- */
30
-
31
- /* Return 0 - success, >0 - index(1 based) of first error char */
32
- int utf8_naive(const unsigned char *data, int len)
33
- {
34
- int err_pos = 1;
35
-
36
- while (len) {
37
- int bytes;
38
- const unsigned char byte1 = data[0];
39
-
40
- /* 00..7F */
41
- if (byte1 <= 0x7F) {
42
- bytes = 1;
43
- /* C2..DF, 80..BF */
44
- } else if (len >= 2 && byte1 >= 0xC2 && byte1 <= 0xDF &&
45
- (signed char)data[1] <= (signed char)0xBF) {
46
- bytes = 2;
47
- } else if (len >= 3) {
48
- const unsigned char byte2 = data[1];
49
-
50
- /* Is byte2, byte3 between 0x80 ~ 0xBF */
51
- const int byte2_ok = (signed char)byte2 <= (signed char)0xBF;
52
- const int byte3_ok = (signed char)data[2] <= (signed char)0xBF;
53
-
54
- if (byte2_ok && byte3_ok &&
55
- /* E0, A0..BF, 80..BF */
56
- ((byte1 == 0xE0 && byte2 >= 0xA0) ||
57
- /* E1..EC, 80..BF, 80..BF */
58
- (byte1 >= 0xE1 && byte1 <= 0xEC) ||
59
- /* ED, 80..9F, 80..BF */
60
- (byte1 == 0xED && byte2 <= 0x9F) ||
61
- /* EE..EF, 80..BF, 80..BF */
62
- (byte1 >= 0xEE && byte1 <= 0xEF))) {
63
- bytes = 3;
64
- } else if (len >= 4) {
65
- /* Is byte4 between 0x80 ~ 0xBF */
66
- const int byte4_ok = (signed char)data[3] <= (signed char)0xBF;
67
-
68
- if (byte2_ok && byte3_ok && byte4_ok &&
69
- /* F0, 90..BF, 80..BF, 80..BF */
70
- ((byte1 == 0xF0 && byte2 >= 0x90) ||
71
- /* F1..F3, 80..BF, 80..BF, 80..BF */
72
- (byte1 >= 0xF1 && byte1 <= 0xF3) ||
73
- /* F4, 80..8F, 80..BF, 80..BF */
74
- (byte1 == 0xF4 && byte2 <= 0x8F))) {
75
- bytes = 4;
76
- } else {
77
- return err_pos;
78
- }
79
- } else {
80
- return err_pos;
81
- }
82
- } else {
83
- return err_pos;
84
- }
85
-
86
- len -= bytes;
87
- err_pos += bytes;
88
- data += bytes;
89
- }
90
-
91
- return 0;
92
- }
@@ -1,157 +0,0 @@
1
- /*
2
- * Process 2x16 bytes in each iteration.
3
- * Comments removed for brevity. See range-neon.c for details.
4
- */
5
- #ifdef __aarch64__
6
-
7
- #include <stdio.h>
8
- #include <stdint.h>
9
- #include <arm_neon.h>
10
-
11
- int utf8_naive(const unsigned char *data, int len);
12
-
13
- static const uint8_t _first_len_tbl[] = {
14
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3,
15
- };
16
-
17
- static const uint8_t _first_range_tbl[] = {
18
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8,
19
- };
20
-
21
- static const uint8_t _range_min_tbl[] = {
22
- 0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80,
23
- 0xC2, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
24
- };
25
- static const uint8_t _range_max_tbl[] = {
26
- 0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F,
27
- 0xF4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
28
- };
29
-
30
- static const uint8_t _range_adjust_tbl[] = {
31
- 2, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0,
32
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0,
33
- };
34
-
35
- /* Return 0 on success, -1 on error */
36
- int utf8_range2(const unsigned char *data, int len)
37
- {
38
- if (len >= 32) {
39
- uint8x16_t prev_input = vdupq_n_u8(0);
40
- uint8x16_t prev_first_len = vdupq_n_u8(0);
41
-
42
- const uint8x16_t first_len_tbl = vld1q_u8(_first_len_tbl);
43
- const uint8x16_t first_range_tbl = vld1q_u8(_first_range_tbl);
44
- const uint8x16_t range_min_tbl = vld1q_u8(_range_min_tbl);
45
- const uint8x16_t range_max_tbl = vld1q_u8(_range_max_tbl);
46
- const uint8x16x2_t range_adjust_tbl = vld2q_u8(_range_adjust_tbl);
47
-
48
- const uint8x16_t const_1 = vdupq_n_u8(1);
49
- const uint8x16_t const_2 = vdupq_n_u8(2);
50
- const uint8x16_t const_e0 = vdupq_n_u8(0xE0);
51
-
52
- uint8x16_t error1 = vdupq_n_u8(0);
53
- uint8x16_t error2 = vdupq_n_u8(0);
54
- uint8x16_t error3 = vdupq_n_u8(0);
55
- uint8x16_t error4 = vdupq_n_u8(0);
56
-
57
- while (len >= 32) {
58
- /******************* two blocks interleaved **********************/
59
-
60
- #if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ < 8)
61
- /* gcc doesn't support vldq1_u8_x2 until version 8 */
62
- const uint8x16_t input_a = vld1q_u8(data);
63
- const uint8x16_t input_b = vld1q_u8(data + 16);
64
- #else
65
- /* Forces a double load on Clang */
66
- const uint8x16x2_t input_pair = vld1q_u8_x2(data);
67
- const uint8x16_t input_a = input_pair.val[0];
68
- const uint8x16_t input_b = input_pair.val[1];
69
- #endif
70
-
71
- const uint8x16_t high_nibbles_a = vshrq_n_u8(input_a, 4);
72
- const uint8x16_t high_nibbles_b = vshrq_n_u8(input_b, 4);
73
-
74
- const uint8x16_t first_len_a =
75
- vqtbl1q_u8(first_len_tbl, high_nibbles_a);
76
- const uint8x16_t first_len_b =
77
- vqtbl1q_u8(first_len_tbl, high_nibbles_b);
78
-
79
- uint8x16_t range_a = vqtbl1q_u8(first_range_tbl, high_nibbles_a);
80
- uint8x16_t range_b = vqtbl1q_u8(first_range_tbl, high_nibbles_b);
81
-
82
- range_a =
83
- vorrq_u8(range_a, vextq_u8(prev_first_len, first_len_a, 15));
84
- range_b =
85
- vorrq_u8(range_b, vextq_u8(first_len_a, first_len_b, 15));
86
-
87
- uint8x16_t tmp1_a, tmp2_a, tmp1_b, tmp2_b;
88
- tmp1_a = vextq_u8(prev_first_len, first_len_a, 14);
89
- tmp1_a = vqsubq_u8(tmp1_a, const_1);
90
- range_a = vorrq_u8(range_a, tmp1_a);
91
-
92
- tmp1_b = vextq_u8(first_len_a, first_len_b, 14);
93
- tmp1_b = vqsubq_u8(tmp1_b, const_1);
94
- range_b = vorrq_u8(range_b, tmp1_b);
95
-
96
- tmp2_a = vextq_u8(prev_first_len, first_len_a, 13);
97
- tmp2_a = vqsubq_u8(tmp2_a, const_2);
98
- range_a = vorrq_u8(range_a, tmp2_a);
99
-
100
- tmp2_b = vextq_u8(first_len_a, first_len_b, 13);
101
- tmp2_b = vqsubq_u8(tmp2_b, const_2);
102
- range_b = vorrq_u8(range_b, tmp2_b);
103
-
104
- uint8x16_t shift1_a = vextq_u8(prev_input, input_a, 15);
105
- uint8x16_t pos_a = vsubq_u8(shift1_a, const_e0);
106
- range_a = vaddq_u8(range_a, vqtbl2q_u8(range_adjust_tbl, pos_a));
107
-
108
- uint8x16_t shift1_b = vextq_u8(input_a, input_b, 15);
109
- uint8x16_t pos_b = vsubq_u8(shift1_b, const_e0);
110
- range_b = vaddq_u8(range_b, vqtbl2q_u8(range_adjust_tbl, pos_b));
111
-
112
- uint8x16_t minv_a = vqtbl1q_u8(range_min_tbl, range_a);
113
- uint8x16_t maxv_a = vqtbl1q_u8(range_max_tbl, range_a);
114
-
115
- uint8x16_t minv_b = vqtbl1q_u8(range_min_tbl, range_b);
116
- uint8x16_t maxv_b = vqtbl1q_u8(range_max_tbl, range_b);
117
-
118
- error1 = vorrq_u8(error1, vcltq_u8(input_a, minv_a));
119
- error2 = vorrq_u8(error2, vcgtq_u8(input_a, maxv_a));
120
-
121
- error3 = vorrq_u8(error3, vcltq_u8(input_b, minv_b));
122
- error4 = vorrq_u8(error4, vcgtq_u8(input_b, maxv_b));
123
-
124
- /************************ next iteration *************************/
125
- prev_input = input_b;
126
- prev_first_len = first_len_b;
127
-
128
- data += 32;
129
- len -= 32;
130
- }
131
- error1 = vorrq_u8(error1, error2);
132
- error1 = vorrq_u8(error1, error3);
133
- error1 = vorrq_u8(error1, error4);
134
-
135
- if (vmaxvq_u8(error1))
136
- return -1;
137
-
138
- uint32_t token4;
139
- vst1q_lane_u32(&token4, vreinterpretq_u32_u8(prev_input), 3);
140
-
141
- const int8_t *token = (const int8_t *)&token4;
142
- int lookahead = 0;
143
- if (token[3] > (int8_t)0xBF)
144
- lookahead = 1;
145
- else if (token[2] > (int8_t)0xBF)
146
- lookahead = 2;
147
- else if (token[1] > (int8_t)0xBF)
148
- lookahead = 3;
149
-
150
- data -= lookahead;
151
- len += lookahead;
152
- }
153
-
154
- return utf8_naive(data, len);
155
- }
156
-
157
- #endif
@@ -1,170 +0,0 @@
1
- /*
2
- * Process 2x16 bytes in each iteration.
3
- * Comments removed for brevity. See range-sse.c for details.
4
- */
5
- #ifdef __SSE4_1__
6
-
7
- #include <stdio.h>
8
- #include <stdint.h>
9
- #include <x86intrin.h>
10
-
11
- int utf8_naive(const unsigned char *data, int len);
12
-
13
- static const int8_t _first_len_tbl[] = {
14
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3,
15
- };
16
-
17
- static const int8_t _first_range_tbl[] = {
18
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8,
19
- };
20
-
21
- static const int8_t _range_min_tbl[] = {
22
- 0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80,
23
- 0xC2, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F,
24
- };
25
- static const int8_t _range_max_tbl[] = {
26
- 0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F,
27
- 0xF4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
28
- };
29
-
30
- static const int8_t _df_ee_tbl[] = {
31
- 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,
32
- };
33
- static const int8_t _ef_fe_tbl[] = {
34
- 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35
- };
36
-
37
- /* Return 0 on success, -1 on error */
38
- int utf8_range2(const unsigned char *data, int len)
39
- {
40
- if (len >= 32) {
41
- __m128i prev_input = _mm_set1_epi8(0);
42
- __m128i prev_first_len = _mm_set1_epi8(0);
43
-
44
- const __m128i first_len_tbl =
45
- _mm_loadu_si128((const __m128i *)_first_len_tbl);
46
- const __m128i first_range_tbl =
47
- _mm_loadu_si128((const __m128i *)_first_range_tbl);
48
- const __m128i range_min_tbl =
49
- _mm_loadu_si128((const __m128i *)_range_min_tbl);
50
- const __m128i range_max_tbl =
51
- _mm_loadu_si128((const __m128i *)_range_max_tbl);
52
- const __m128i df_ee_tbl =
53
- _mm_loadu_si128((const __m128i *)_df_ee_tbl);
54
- const __m128i ef_fe_tbl =
55
- _mm_loadu_si128((const __m128i *)_ef_fe_tbl);
56
-
57
- __m128i error = _mm_set1_epi8(0);
58
-
59
- while (len >= 32) {
60
- /***************************** block 1 ****************************/
61
- const __m128i input_a = _mm_loadu_si128((const __m128i *)data);
62
-
63
- __m128i high_nibbles =
64
- _mm_and_si128(_mm_srli_epi16(input_a, 4), _mm_set1_epi8(0x0F));
65
-
66
- __m128i first_len_a = _mm_shuffle_epi8(first_len_tbl, high_nibbles);
67
-
68
- __m128i range_a = _mm_shuffle_epi8(first_range_tbl, high_nibbles);
69
-
70
- range_a = _mm_or_si128(
71
- range_a, _mm_alignr_epi8(first_len_a, prev_first_len, 15));
72
-
73
- __m128i tmp;
74
- tmp = _mm_alignr_epi8(first_len_a, prev_first_len, 14);
75
- tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(1));
76
- range_a = _mm_or_si128(range_a, tmp);
77
-
78
- tmp = _mm_alignr_epi8(first_len_a, prev_first_len, 13);
79
- tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(2));
80
- range_a = _mm_or_si128(range_a, tmp);
81
-
82
- __m128i shift1, pos, range2;
83
- shift1 = _mm_alignr_epi8(input_a, prev_input, 15);
84
- pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF));
85
- tmp = _mm_subs_epu8(pos, _mm_set1_epi8(0xF0));
86
- range2 = _mm_shuffle_epi8(df_ee_tbl, tmp);
87
- tmp = _mm_adds_epu8(pos, _mm_set1_epi8(0x70));
88
- range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp));
89
-
90
- range_a = _mm_add_epi8(range_a, range2);
91
-
92
- __m128i minv = _mm_shuffle_epi8(range_min_tbl, range_a);
93
- __m128i maxv = _mm_shuffle_epi8(range_max_tbl, range_a);
94
-
95
- tmp = _mm_or_si128(
96
- _mm_cmplt_epi8(input_a, minv),
97
- _mm_cmpgt_epi8(input_a, maxv)
98
- );
99
- error = _mm_or_si128(error, tmp);
100
-
101
- /***************************** block 2 ****************************/
102
- const __m128i input_b = _mm_loadu_si128((const __m128i *)(data+16));
103
-
104
- high_nibbles =
105
- _mm_and_si128(_mm_srli_epi16(input_b, 4), _mm_set1_epi8(0x0F));
106
-
107
- __m128i first_len_b = _mm_shuffle_epi8(first_len_tbl, high_nibbles);
108
-
109
- __m128i range_b = _mm_shuffle_epi8(first_range_tbl, high_nibbles);
110
-
111
- range_b = _mm_or_si128(
112
- range_b, _mm_alignr_epi8(first_len_b, first_len_a, 15));
113
-
114
-
115
- tmp = _mm_alignr_epi8(first_len_b, first_len_a, 14);
116
- tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(1));
117
- range_b = _mm_or_si128(range_b, tmp);
118
-
119
- tmp = _mm_alignr_epi8(first_len_b, first_len_a, 13);
120
- tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(2));
121
- range_b = _mm_or_si128(range_b, tmp);
122
-
123
- shift1 = _mm_alignr_epi8(input_b, input_a, 15);
124
- pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF));
125
- tmp = _mm_subs_epu8(pos, _mm_set1_epi8(0xF0));
126
- range2 = _mm_shuffle_epi8(df_ee_tbl, tmp);
127
- tmp = _mm_adds_epu8(pos, _mm_set1_epi8(0x70));
128
- range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp));
129
-
130
- range_b = _mm_add_epi8(range_b, range2);
131
-
132
- minv = _mm_shuffle_epi8(range_min_tbl, range_b);
133
- maxv = _mm_shuffle_epi8(range_max_tbl, range_b);
134
-
135
-
136
- tmp = _mm_or_si128(
137
- _mm_cmplt_epi8(input_b, minv),
138
- _mm_cmpgt_epi8(input_b, maxv)
139
- );
140
- error = _mm_or_si128(error, tmp);
141
-
142
- /************************ next iteration **************************/
143
- prev_input = input_b;
144
- prev_first_len = first_len_b;
145
-
146
- data += 32;
147
- len -= 32;
148
- }
149
-
150
- if (!_mm_testz_si128(error, error))
151
- return -1;
152
-
153
- int32_t token4 = _mm_extract_epi32(prev_input, 3);
154
- const int8_t *token = (const int8_t *)&token4;
155
- int lookahead = 0;
156
- if (token[3] > (int8_t)0xBF)
157
- lookahead = 1;
158
- else if (token[2] > (int8_t)0xBF)
159
- lookahead = 2;
160
- else if (token[1] > (int8_t)0xBF)
161
- lookahead = 3;
162
-
163
- data -= lookahead;
164
- len += lookahead;
165
- }
166
-
167
- return utf8_naive(data, len);
168
- }
169
-
170
- #endif