avro 1.8.2 → 1.10.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +2 -2
- data/Manifest +7 -0
- data/NOTICE +1 -1
- data/Rakefile +12 -15
- data/avro.gemspec +21 -21
- data/interop/test_interop.rb +13 -3
- data/lib/avro.rb +26 -3
- data/lib/avro/VERSION.txt +1 -0
- data/lib/avro/data_file.rb +25 -2
- data/lib/avro/io.rb +66 -84
- data/lib/avro/ipc.rb +11 -11
- data/lib/avro/logical_types.rb +90 -0
- data/lib/avro/protocol.rb +12 -8
- data/lib/avro/schema.rb +243 -74
- data/lib/avro/schema_compatibility.rb +175 -0
- data/lib/avro/schema_normalization.rb +1 -1
- data/lib/avro/schema_validator.rb +242 -0
- data/test/case_finder.rb +9 -4
- data/test/random_data.rb +24 -4
- data/test/sample_ipc_client.rb +1 -1
- data/test/sample_ipc_http_client.rb +1 -1
- data/test/sample_ipc_http_server.rb +1 -1
- data/test/sample_ipc_server.rb +1 -1
- data/test/test_datafile.rb +17 -4
- data/test/test_fingerprints.rb +20 -1
- data/test/test_help.rb +1 -1
- data/test/test_io.rb +155 -7
- data/test/test_logical_types.rb +128 -0
- data/test/test_protocol.rb +37 -4
- data/test/test_schema.rb +592 -28
- data/test/test_schema_compatibility.rb +543 -0
- data/test/test_schema_normalization.rb +2 -1
- data/test/test_schema_validator.rb +554 -0
- data/test/test_socket_transport.rb +1 -1
- data/test/tool.rb +4 -5
- metadata +28 -14
data/lib/avro/ipc.rb
CHANGED
@@ -5,9 +5,9 @@
|
|
5
5
|
# to you under the Apache License, Version 2.0 (the
|
6
6
|
# "License"); you may not use this file except in compliance
|
7
7
|
# with the License. You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
8
|
+
#
|
9
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
11
|
# Unless required by applicable law or agreed to in writing, software
|
12
12
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
13
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
@@ -74,10 +74,10 @@ module Avro::IPC
|
|
74
74
|
|
75
75
|
class ConnectionClosedException < Avro::AvroError; end
|
76
76
|
|
77
|
+
# Base class for the client side of a protocol interaction.
|
77
78
|
class Requestor
|
78
|
-
|
79
|
-
|
80
|
-
attr_accessor :remote_protocol, :remote_hash, :send_protocol
|
79
|
+
attr_reader :local_protocol, :transport, :remote_protocol, :remote_hash
|
80
|
+
attr_accessor :send_protocol
|
81
81
|
|
82
82
|
def initialize(local_protocol, transport)
|
83
83
|
@local_protocol = local_protocol
|
@@ -193,9 +193,9 @@ module Avro::IPC
|
|
193
193
|
# * a one-byte error flag boolean, followed by either:
|
194
194
|
# * if the error flag is false,
|
195
195
|
# the message response, serialized per the message's response schema.
|
196
|
-
# * if the error flag is true,
|
196
|
+
# * if the error flag is true,
|
197
197
|
# the error, serialized per the message's error union schema.
|
198
|
-
|
198
|
+
_response_metadata = META_READER.read(decoder)
|
199
199
|
|
200
200
|
# remote response schema
|
201
201
|
remote_message_schema = remote_protocol.messages[message_name]
|
@@ -257,7 +257,7 @@ module Avro::IPC
|
|
257
257
|
end
|
258
258
|
|
259
259
|
# read request using remote protocol
|
260
|
-
|
260
|
+
_request_metadata = META_READER.read(buffer_decoder)
|
261
261
|
remote_message_name = buffer_decoder.read_string
|
262
262
|
|
263
263
|
# get remote and local request schemas so we can do
|
@@ -278,7 +278,7 @@ module Avro::IPC
|
|
278
278
|
response = call(local_message, request)
|
279
279
|
rescue AvroRemoteError => e
|
280
280
|
error = e
|
281
|
-
rescue Exception => e
|
281
|
+
rescue Exception => e # rubocop:disable Lint/RescueException
|
282
282
|
error = AvroRemoteError.new(e.to_s)
|
283
283
|
end
|
284
284
|
|
@@ -350,7 +350,7 @@ module Avro::IPC
|
|
350
350
|
remote_protocol
|
351
351
|
end
|
352
352
|
|
353
|
-
def call(
|
353
|
+
def call(_local_message, _request)
|
354
354
|
# Actual work done by server: cf. handler in thrift.
|
355
355
|
raise NotImplementedError
|
356
356
|
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
4
|
+
# distributed with this work for additional information
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
7
|
+
# "License"); you may not use this file except in compliance
|
8
|
+
# with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
# See the License for the specific language governing permissions and
|
16
|
+
# limitations under the License.
|
17
|
+
|
18
|
+
require 'date'
|
19
|
+
|
20
|
+
module Avro
|
21
|
+
module LogicalTypes
|
22
|
+
module IntDate
|
23
|
+
EPOCH_START = Date.new(1970, 1, 1)
|
24
|
+
|
25
|
+
def self.encode(date)
|
26
|
+
return date.to_i if date.is_a?(Numeric)
|
27
|
+
|
28
|
+
(date - EPOCH_START).to_i
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.decode(int)
|
32
|
+
EPOCH_START + int
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
module TimestampMillis
|
37
|
+
def self.encode(value)
|
38
|
+
return value.to_i if value.is_a?(Numeric)
|
39
|
+
|
40
|
+
time = value.to_time
|
41
|
+
time.to_i * 1000 + time.usec / 1000
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.decode(int)
|
45
|
+
s, ms = int / 1000, int % 1000
|
46
|
+
Time.at(s, ms * 1000).utc
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
module TimestampMicros
|
51
|
+
def self.encode(value)
|
52
|
+
return value.to_i if value.is_a?(Numeric)
|
53
|
+
|
54
|
+
time = value.to_time
|
55
|
+
time.to_i * 1000_000 + time.usec
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.decode(int)
|
59
|
+
s, us = int / 1000_000, int % 1000_000
|
60
|
+
Time.at(s, us).utc
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
module Identity
|
65
|
+
def self.encode(datum)
|
66
|
+
datum
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.decode(datum)
|
70
|
+
datum
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
TYPES = {
|
75
|
+
"int" => {
|
76
|
+
"date" => IntDate
|
77
|
+
},
|
78
|
+
"long" => {
|
79
|
+
"timestamp-millis" => TimestampMillis,
|
80
|
+
"timestamp-micros" => TimestampMicros
|
81
|
+
},
|
82
|
+
}.freeze
|
83
|
+
|
84
|
+
def self.type_adapter(type, logical_type)
|
85
|
+
return unless logical_type
|
86
|
+
|
87
|
+
TYPES.fetch(type, {}.freeze).fetch(logical_type, Identity)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
data/lib/avro/protocol.rb
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
# "License"); you may not use this file except in compliance
|
7
7
|
# with the License. You may obtain a copy of the License at
|
8
8
|
#
|
9
|
-
#
|
9
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
10
10
|
#
|
11
11
|
# Unless required by applicable law or agreed to in writing, software
|
12
12
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
@@ -20,7 +20,7 @@ module Avro
|
|
20
20
|
VALID_TYPE_SCHEMA_TYPES_SYM = Set.new(VALID_TYPE_SCHEMA_TYPES.map(&:to_sym))
|
21
21
|
class ProtocolParseError < Avro::AvroError; end
|
22
22
|
|
23
|
-
attr_reader :name, :namespace, :types, :messages, :md5
|
23
|
+
attr_reader :name, :namespace, :types, :messages, :md5, :doc
|
24
24
|
def self.parse(protocol_string)
|
25
25
|
json_data = MultiJson.load(protocol_string)
|
26
26
|
|
@@ -29,13 +29,14 @@ module Avro
|
|
29
29
|
namespace = json_data['namespace']
|
30
30
|
types = json_data['types']
|
31
31
|
messages = json_data['messages']
|
32
|
-
|
32
|
+
doc = json_data['doc']
|
33
|
+
Protocol.new(name, namespace, types, messages, doc)
|
33
34
|
else
|
34
35
|
raise ProtocolParseError, "Not a JSON object: #{json_data}"
|
35
36
|
end
|
36
37
|
end
|
37
38
|
|
38
|
-
def initialize(name, namespace=nil, types=nil, messages=nil)
|
39
|
+
def initialize(name, namespace=nil, types=nil, messages=nil, doc=nil)
|
39
40
|
# Ensure valid ctor args
|
40
41
|
if !name
|
41
42
|
raise ProtocolParseError, 'Protocols must have a non-empty name.'
|
@@ -55,6 +56,7 @@ module Avro
|
|
55
56
|
@types = parse_types(types, type_names)
|
56
57
|
@messages = parse_messages(messages, type_names)
|
57
58
|
@md5 = Digest::MD5.digest(to_s)
|
59
|
+
@doc = doc
|
58
60
|
end
|
59
61
|
|
60
62
|
def to_s
|
@@ -67,7 +69,6 @@ module Avro
|
|
67
69
|
|
68
70
|
private
|
69
71
|
def parse_types(types, type_names)
|
70
|
-
type_objects = []
|
71
72
|
types.collect do |type|
|
72
73
|
# FIXME adding type.name to type_names is not defined in the
|
73
74
|
# spec. Possible bug in the python impl and the spec.
|
@@ -92,7 +93,8 @@ module Avro
|
|
92
93
|
request = body['request']
|
93
94
|
response = body['response']
|
94
95
|
errors = body['errors']
|
95
|
-
|
96
|
+
doc = body['doc']
|
97
|
+
message_objects[name] = Message.new(name, request, response, errors, names, namespace, doc)
|
96
98
|
end
|
97
99
|
message_objects
|
98
100
|
end
|
@@ -111,14 +113,15 @@ module Avro
|
|
111
113
|
end
|
112
114
|
|
113
115
|
class Message
|
114
|
-
attr_reader :name, :request, :response, :errors, :default_namespace
|
116
|
+
attr_reader :name, :request, :response, :errors, :default_namespace, :doc
|
115
117
|
|
116
|
-
def initialize(name, request, response, errors=nil, names=nil, default_namespace=nil)
|
118
|
+
def initialize(name, request, response, errors=nil, names=nil, default_namespace=nil, doc=nil)
|
117
119
|
@name = name
|
118
120
|
@default_namespace = default_namespace
|
119
121
|
@request = parse_request(request, names)
|
120
122
|
@response = parse_response(response, names)
|
121
123
|
@errors = parse_errors(errors, names) if errors
|
124
|
+
@doc = doc
|
122
125
|
end
|
123
126
|
|
124
127
|
def to_avro(names=Set.new)
|
@@ -127,6 +130,7 @@ module Avro
|
|
127
130
|
'response' => response.to_avro(names)
|
128
131
|
}.tap do |hash|
|
129
132
|
hash['errors'] = errors.to_avro(names) if errors
|
133
|
+
hash['doc'] = @doc if @doc
|
130
134
|
end
|
131
135
|
end
|
132
136
|
|
data/lib/avro/schema.rb
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
# "License"); you may not use this file except in compliance
|
7
7
|
# with the License. You may obtain a copy of the License at
|
8
8
|
#
|
9
|
-
#
|
9
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
10
10
|
#
|
11
11
|
# Unless required by applicable law or agreed to in writing, software
|
12
12
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
@@ -14,6 +14,8 @@
|
|
14
14
|
# See the License for the specific language governing permissions and
|
15
15
|
# limitations under the License.
|
16
16
|
|
17
|
+
require 'avro/logical_types'
|
18
|
+
|
17
19
|
module Avro
|
18
20
|
class Schema
|
19
21
|
# Sets of strings, for backwards compatibility. See below for sets of symbols,
|
@@ -27,6 +29,8 @@ module Avro
|
|
27
29
|
NAMED_TYPES_SYM = Set.new(NAMED_TYPES.map(&:to_sym))
|
28
30
|
VALID_TYPES_SYM = Set.new(VALID_TYPES.map(&:to_sym))
|
29
31
|
|
32
|
+
NAME_REGEX = /^([A-Za-z_][A-Za-z0-9_]*)(\.([A-Za-z_][A-Za-z0-9_]*))*$/
|
33
|
+
|
30
34
|
INT_MIN_VALUE = -(1 << 31)
|
31
35
|
INT_MAX_VALUE = (1 << 31) - 1
|
32
36
|
LONG_MIN_VALUE = -(1 << 63)
|
@@ -40,6 +44,7 @@ module Avro
|
|
40
44
|
def self.real_parse(json_obj, names=nil, default_namespace=nil)
|
41
45
|
if json_obj.is_a? Hash
|
42
46
|
type = json_obj['type']
|
47
|
+
logical_type = json_obj['logicalType']
|
43
48
|
raise SchemaParseError, %Q(No "type" property: #{json_obj}) if type.nil?
|
44
49
|
|
45
50
|
# Check that the type is valid before calling #to_sym, since symbols are never garbage
|
@@ -50,21 +55,34 @@ module Avro
|
|
50
55
|
|
51
56
|
type_sym = type.to_sym
|
52
57
|
if PRIMITIVE_TYPES_SYM.include?(type_sym)
|
53
|
-
|
54
|
-
|
58
|
+
case type_sym
|
59
|
+
when :bytes
|
60
|
+
precision = json_obj['precision']
|
61
|
+
scale = json_obj['scale']
|
62
|
+
return BytesSchema.new(type_sym, logical_type, precision, scale)
|
63
|
+
else
|
64
|
+
return PrimitiveSchema.new(type_sym, logical_type)
|
65
|
+
end
|
55
66
|
elsif NAMED_TYPES_SYM.include? type_sym
|
56
67
|
name = json_obj['name']
|
68
|
+
if !Avro.disable_schema_name_validation && name !~ NAME_REGEX
|
69
|
+
raise SchemaParseError, "Name #{name} is invalid for type #{type}!"
|
70
|
+
end
|
57
71
|
namespace = json_obj.include?('namespace') ? json_obj['namespace'] : default_namespace
|
72
|
+
aliases = json_obj['aliases']
|
58
73
|
case type_sym
|
59
74
|
when :fixed
|
60
75
|
size = json_obj['size']
|
61
|
-
return FixedSchema.new(name, namespace, size, names)
|
76
|
+
return FixedSchema.new(name, namespace, size, names, logical_type, aliases)
|
62
77
|
when :enum
|
63
78
|
symbols = json_obj['symbols']
|
64
|
-
|
79
|
+
doc = json_obj['doc']
|
80
|
+
default = json_obj['default']
|
81
|
+
return EnumSchema.new(name, namespace, symbols, names, doc, default, aliases)
|
65
82
|
when :record, :error
|
66
83
|
fields = json_obj['fields']
|
67
|
-
|
84
|
+
doc = json_obj['doc']
|
85
|
+
return RecordSchema.new(name, namespace, fields, names, type_sym, doc, aliases)
|
68
86
|
else
|
69
87
|
raise SchemaParseError.new("Unknown named type: #{type}")
|
70
88
|
end
|
@@ -91,52 +109,29 @@ module Avro
|
|
91
109
|
end
|
92
110
|
|
93
111
|
# Determine if a ruby datum is an instance of a schema
|
94
|
-
def self.validate(expected_schema,
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
datum == true || datum == false
|
100
|
-
when :string, :bytes
|
101
|
-
datum.is_a? String
|
102
|
-
when :int
|
103
|
-
(datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
|
104
|
-
(INT_MIN_VALUE <= datum) && (datum <= INT_MAX_VALUE)
|
105
|
-
when :long
|
106
|
-
(datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
|
107
|
-
(LONG_MIN_VALUE <= datum) && (datum <= LONG_MAX_VALUE)
|
108
|
-
when :float, :double
|
109
|
-
datum.is_a?(Float) || datum.is_a?(Fixnum) || datum.is_a?(Bignum)
|
110
|
-
when :fixed
|
111
|
-
datum.is_a?(String) && datum.bytesize == expected_schema.size
|
112
|
-
when :enum
|
113
|
-
expected_schema.symbols.include? datum
|
114
|
-
when :array
|
115
|
-
datum.is_a?(Array) &&
|
116
|
-
datum.all?{|d| validate(expected_schema.items, d) }
|
117
|
-
when :map
|
118
|
-
datum.keys.all?{|k| k.is_a? String } &&
|
119
|
-
datum.values.all?{|v| validate(expected_schema.values, v) }
|
120
|
-
when :union
|
121
|
-
expected_schema.schemas.any?{|s| validate(s, datum) }
|
122
|
-
when :record, :error, :request
|
123
|
-
datum.is_a?(Hash) &&
|
124
|
-
expected_schema.fields.all?{|f| validate(f.type, datum[f.name]) }
|
125
|
-
else
|
126
|
-
raise "you suck #{expected_schema.inspect} is not allowed."
|
127
|
-
end
|
112
|
+
def self.validate(expected_schema, logical_datum, options = { recursive: true, encoded: false })
|
113
|
+
SchemaValidator.validate!(expected_schema, logical_datum, options)
|
114
|
+
true
|
115
|
+
rescue SchemaValidator::ValidationError
|
116
|
+
false
|
128
117
|
end
|
129
118
|
|
130
|
-
def initialize(type)
|
119
|
+
def initialize(type, logical_type=nil)
|
131
120
|
@type_sym = type.is_a?(Symbol) ? type : type.to_sym
|
121
|
+
@logical_type = logical_type
|
132
122
|
end
|
133
123
|
|
134
124
|
attr_reader :type_sym
|
125
|
+
attr_reader :logical_type
|
135
126
|
|
136
127
|
# Returns the type as a string (rather than a symbol), for backwards compatibility.
|
137
128
|
# Deprecated in favor of {#type_sym}.
|
138
129
|
def type; @type_sym.to_s; end
|
139
130
|
|
131
|
+
def type_adapter
|
132
|
+
@type_adapter ||= LogicalTypes.type_adapter(type, logical_type) || LogicalTypes::Identity
|
133
|
+
end
|
134
|
+
|
140
135
|
# Returns the MD5 fingerprint of the schema as an Integer.
|
141
136
|
def md5_fingerprint
|
142
137
|
parsing_form = SchemaNormalization.to_parsing_form(self)
|
@@ -149,11 +144,66 @@ module Avro
|
|
149
144
|
Digest::SHA256.hexdigest(parsing_form).to_i(16)
|
150
145
|
end
|
151
146
|
|
152
|
-
|
147
|
+
CRC_EMPTY = 0xc15d213aa4d7a795
|
148
|
+
|
149
|
+
# The java library caches this value after initialized, so this pattern
|
150
|
+
# mimics that.
|
151
|
+
@@fp_table = nil
|
152
|
+
def initFPTable
|
153
|
+
@@fp_table = Array.new(256)
|
154
|
+
256.times do |i|
|
155
|
+
fp = i
|
156
|
+
8.times do
|
157
|
+
fp = (fp >> 1) ^ ( CRC_EMPTY & -( fp & 1 ) )
|
158
|
+
end
|
159
|
+
@@fp_table[i] = fp
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def crc_64_avro_fingerprint
|
164
|
+
parsing_form = Avro::SchemaNormalization.to_parsing_form(self)
|
165
|
+
data_bytes = parsing_form.unpack("C*")
|
166
|
+
|
167
|
+
initFPTable unless @@fp_table
|
168
|
+
|
169
|
+
fp = CRC_EMPTY
|
170
|
+
data_bytes.each do |b|
|
171
|
+
fp = (fp >> 8) ^ @@fp_table[ (fp ^ b) & 0xff ]
|
172
|
+
end
|
173
|
+
fp
|
174
|
+
end
|
175
|
+
|
176
|
+
SINGLE_OBJECT_MAGIC_NUMBER = [0xC3, 0x01]
|
177
|
+
def single_object_encoding_header
|
178
|
+
[SINGLE_OBJECT_MAGIC_NUMBER, single_object_schema_fingerprint].flatten
|
179
|
+
end
|
180
|
+
def single_object_schema_fingerprint
|
181
|
+
working = crc_64_avro_fingerprint
|
182
|
+
bytes = Array.new(8)
|
183
|
+
8.times do |i|
|
184
|
+
bytes[i] = (working & 0xff)
|
185
|
+
working = working >> 8
|
186
|
+
end
|
187
|
+
bytes
|
188
|
+
end
|
189
|
+
|
190
|
+
def read?(writers_schema)
|
191
|
+
SchemaCompatibility.can_read?(writers_schema, self)
|
192
|
+
end
|
193
|
+
|
194
|
+
def be_read?(other_schema)
|
195
|
+
other_schema.read?(self)
|
196
|
+
end
|
197
|
+
|
198
|
+
def mutual_read?(other_schema)
|
199
|
+
SchemaCompatibility.mutual_read?(other_schema, self)
|
200
|
+
end
|
201
|
+
|
202
|
+
def ==(other, _seen=nil)
|
153
203
|
other.is_a?(Schema) && type_sym == other.type_sym
|
154
204
|
end
|
155
205
|
|
156
|
-
def hash(
|
206
|
+
def hash(_seen=nil)
|
157
207
|
type_sym.hash
|
158
208
|
end
|
159
209
|
|
@@ -171,20 +221,36 @@ module Avro
|
|
171
221
|
end
|
172
222
|
end
|
173
223
|
|
174
|
-
def to_avro(
|
175
|
-
{'type' => type}
|
224
|
+
def to_avro(_names=nil)
|
225
|
+
props = {'type' => type}
|
226
|
+
props['logicalType'] = logical_type if logical_type
|
227
|
+
props
|
176
228
|
end
|
177
229
|
|
178
230
|
def to_s
|
179
231
|
MultiJson.dump to_avro
|
180
232
|
end
|
181
233
|
|
234
|
+
def validate_aliases!
|
235
|
+
unless aliases.nil? ||
|
236
|
+
(aliases.is_a?(Array) && aliases.all? { |a| a.is_a?(String) })
|
237
|
+
|
238
|
+
raise Avro::SchemaParseError,
|
239
|
+
"Invalid aliases value #{aliases.inspect} for #{type} #{name}. Must be an array of strings."
|
240
|
+
end
|
241
|
+
end
|
242
|
+
private :validate_aliases!
|
243
|
+
|
182
244
|
class NamedSchema < Schema
|
183
|
-
attr_reader :name, :namespace
|
184
|
-
|
185
|
-
|
245
|
+
attr_reader :name, :namespace, :aliases
|
246
|
+
|
247
|
+
def initialize(type, name, namespace=nil, names=nil, doc=nil, logical_type=nil, aliases=nil)
|
248
|
+
super(type, logical_type)
|
186
249
|
@name, @namespace = Name.extract_namespace(name, namespace)
|
187
|
-
|
250
|
+
@doc = doc
|
251
|
+
@aliases = aliases
|
252
|
+
validate_aliases! if aliases
|
253
|
+
Name.add_name(names, self)
|
188
254
|
end
|
189
255
|
|
190
256
|
def to_avro(names=Set.new)
|
@@ -194,31 +260,53 @@ module Avro
|
|
194
260
|
end
|
195
261
|
props = {'name' => @name}
|
196
262
|
props.merge!('namespace' => @namespace) if @namespace
|
263
|
+
props['namespace'] = @namespace if @namespace
|
264
|
+
props['doc'] = @doc if @doc
|
265
|
+
props['aliases'] = aliases if aliases && aliases.any?
|
197
266
|
super.merge props
|
198
267
|
end
|
199
268
|
|
200
269
|
def fullname
|
201
270
|
@fullname ||= Name.make_fullname(@name, @namespace)
|
202
271
|
end
|
272
|
+
|
273
|
+
def fullname_aliases
|
274
|
+
@fullname_aliases ||= if aliases
|
275
|
+
aliases.map { |a| Name.make_fullname(a, namespace) }
|
276
|
+
else
|
277
|
+
[]
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
def match_fullname?(name)
|
282
|
+
name == fullname || fullname_aliases.include?(name)
|
283
|
+
end
|
203
284
|
end
|
204
285
|
|
205
286
|
class RecordSchema < NamedSchema
|
206
|
-
attr_reader :fields
|
287
|
+
attr_reader :fields, :doc
|
207
288
|
|
208
289
|
def self.make_field_objects(field_data, names, namespace=nil)
|
209
|
-
field_objects, field_names = [], Set.new
|
210
|
-
field_data.
|
290
|
+
field_objects, field_names, alias_names = [], Set.new, Set.new
|
291
|
+
field_data.each do |field|
|
211
292
|
if field.respond_to?(:[]) # TODO(jmhodges) wtffffff
|
212
293
|
type = field['type']
|
213
294
|
name = field['name']
|
214
295
|
default = field.key?('default') ? field['default'] : :no_default
|
215
296
|
order = field['order']
|
216
|
-
|
297
|
+
doc = field['doc']
|
298
|
+
aliases = field['aliases']
|
299
|
+
new_field = Field.new(type, name, default, order, names, namespace, doc, aliases)
|
217
300
|
# make sure field name has not been used yet
|
218
301
|
if field_names.include?(new_field.name)
|
219
302
|
raise SchemaParseError, "Field name #{new_field.name.inspect} is already in use"
|
220
303
|
end
|
221
304
|
field_names << new_field.name
|
305
|
+
# make sure alias has not be been used yet
|
306
|
+
if new_field.aliases && alias_names.intersect?(new_field.aliases.to_set)
|
307
|
+
raise SchemaParseError, "Alias #{(alias_names & new_field.aliases).to_a} already in use"
|
308
|
+
end
|
309
|
+
alias_names.merge(new_field.aliases) if new_field.aliases
|
222
310
|
else
|
223
311
|
raise SchemaParseError, "Not a valid field: #{field}"
|
224
312
|
end
|
@@ -227,20 +315,36 @@ module Avro
|
|
227
315
|
field_objects
|
228
316
|
end
|
229
317
|
|
230
|
-
def initialize(name, namespace, fields, names=nil, schema_type=:record)
|
318
|
+
def initialize(name, namespace, fields, names=nil, schema_type=:record, doc=nil, aliases=nil)
|
231
319
|
if schema_type == :request || schema_type == 'request'
|
232
320
|
@type_sym = schema_type.to_sym
|
233
321
|
@namespace = namespace
|
322
|
+
@name = nil
|
323
|
+
@doc = nil
|
234
324
|
else
|
235
|
-
super(schema_type, name, namespace, names)
|
325
|
+
super(schema_type, name, namespace, names, doc, nil, aliases)
|
236
326
|
end
|
237
|
-
@fields =
|
327
|
+
@fields = if fields
|
328
|
+
RecordSchema.make_field_objects(fields, names, self.namespace)
|
329
|
+
else
|
330
|
+
{}
|
331
|
+
end
|
238
332
|
end
|
239
333
|
|
240
334
|
def fields_hash
|
241
335
|
@fields_hash ||= fields.inject({}){|hsh, field| hsh[field.name] = field; hsh }
|
242
336
|
end
|
243
337
|
|
338
|
+
def fields_by_alias
|
339
|
+
@fields_by_alias ||= fields.each_with_object({}) do |field, hash|
|
340
|
+
if field.aliases
|
341
|
+
field.aliases.each do |a|
|
342
|
+
hash[a] = field
|
343
|
+
end
|
344
|
+
end
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
244
348
|
def to_avro(names=Set.new)
|
245
349
|
hsh = super
|
246
350
|
return hsh unless hsh.is_a?(Hash)
|
@@ -285,8 +389,7 @@ module Avro
|
|
285
389
|
def initialize(schemas, names=nil, default_namespace=nil)
|
286
390
|
super(:union)
|
287
391
|
|
288
|
-
|
289
|
-
schemas.each_with_index do |schema, i|
|
392
|
+
@schemas = schemas.each_with_object([]) do |schema, schema_objects|
|
290
393
|
new_schema = subparse(schema, names, default_namespace)
|
291
394
|
ns_type = new_schema.type_sym
|
292
395
|
|
@@ -299,7 +402,6 @@ module Avro
|
|
299
402
|
else
|
300
403
|
schema_objects << new_schema
|
301
404
|
end
|
302
|
-
@schemas = schema_objects
|
303
405
|
end
|
304
406
|
end
|
305
407
|
|
@@ -309,29 +411,51 @@ module Avro
|
|
309
411
|
end
|
310
412
|
|
311
413
|
class EnumSchema < NamedSchema
|
312
|
-
|
313
|
-
|
414
|
+
SYMBOL_REGEX = /^[A-Za-z_][A-Za-z0-9_]*$/
|
415
|
+
|
416
|
+
attr_reader :symbols, :doc, :default
|
417
|
+
|
418
|
+
def initialize(name, space, symbols, names=nil, doc=nil, default=nil, aliases=nil)
|
314
419
|
if symbols.uniq.length < symbols.length
|
315
|
-
fail_msg =
|
420
|
+
fail_msg = "Duplicate symbol: #{symbols}"
|
316
421
|
raise Avro::SchemaParseError, fail_msg
|
317
422
|
end
|
318
|
-
|
423
|
+
|
424
|
+
if !Avro.disable_enum_symbol_validation
|
425
|
+
invalid_symbols = symbols.select { |symbol| symbol !~ SYMBOL_REGEX }
|
426
|
+
|
427
|
+
if invalid_symbols.any?
|
428
|
+
raise SchemaParseError,
|
429
|
+
"Invalid symbols for #{name}: #{invalid_symbols.join(', ')} don't match #{SYMBOL_REGEX.inspect}"
|
430
|
+
end
|
431
|
+
end
|
432
|
+
|
433
|
+
if default && !symbols.include?(default)
|
434
|
+
raise Avro::SchemaParseError, "Default '#{default}' is not a valid symbol for enum #{name}"
|
435
|
+
end
|
436
|
+
|
437
|
+
super(:enum, name, space, names, doc, nil, aliases)
|
438
|
+
@default = default
|
319
439
|
@symbols = symbols
|
320
440
|
end
|
321
441
|
|
322
|
-
def to_avro(
|
442
|
+
def to_avro(_names=Set.new)
|
323
443
|
avro = super
|
324
|
-
avro.is_a?(Hash)
|
444
|
+
if avro.is_a?(Hash)
|
445
|
+
avro['symbols'] = symbols
|
446
|
+
avro['default'] = default if default
|
447
|
+
end
|
448
|
+
avro
|
325
449
|
end
|
326
450
|
end
|
327
451
|
|
328
452
|
# Valid primitive types are in PRIMITIVE_TYPES.
|
329
453
|
class PrimitiveSchema < Schema
|
330
|
-
def initialize(type)
|
454
|
+
def initialize(type, logical_type=nil)
|
331
455
|
if PRIMITIVE_TYPES_SYM.include?(type)
|
332
|
-
super(type)
|
456
|
+
super(type, logical_type)
|
333
457
|
elsif PRIMITIVE_TYPES.include?(type)
|
334
|
-
super(type.to_sym)
|
458
|
+
super(type.to_sym, logical_type)
|
335
459
|
else
|
336
460
|
raise AvroError.new("#{type} is not a valid primitive type.")
|
337
461
|
end
|
@@ -343,14 +467,32 @@ module Avro
|
|
343
467
|
end
|
344
468
|
end
|
345
469
|
|
470
|
+
class BytesSchema < PrimitiveSchema
|
471
|
+
attr_reader :precision, :scale
|
472
|
+
def initialize(type, logical_type=nil, precision=nil, scale=nil)
|
473
|
+
super(type.to_sym, logical_type)
|
474
|
+
@precision = precision
|
475
|
+
@scale = scale
|
476
|
+
end
|
477
|
+
|
478
|
+
def to_avro(names=nil)
|
479
|
+
avro = super
|
480
|
+
return avro if avro.is_a?(String)
|
481
|
+
|
482
|
+
avro['precision'] = precision if precision
|
483
|
+
avro['scale'] = scale if scale
|
484
|
+
avro
|
485
|
+
end
|
486
|
+
end
|
487
|
+
|
346
488
|
class FixedSchema < NamedSchema
|
347
489
|
attr_reader :size
|
348
|
-
def initialize(name, space, size, names=nil)
|
490
|
+
def initialize(name, space, size, names=nil, logical_type=nil, aliases=nil)
|
349
491
|
# Ensure valid cto args
|
350
|
-
unless size.is_a?(
|
492
|
+
unless size.is_a?(Integer)
|
351
493
|
raise AvroError, 'Fixed Schema requires a valid integer for size property.'
|
352
494
|
end
|
353
|
-
super(:fixed, name, space, names)
|
495
|
+
super(:fixed, name, space, names, nil, logical_type, aliases)
|
354
496
|
@size = size
|
355
497
|
end
|
356
498
|
|
@@ -361,21 +503,48 @@ module Avro
|
|
361
503
|
end
|
362
504
|
|
363
505
|
class Field < Schema
|
364
|
-
attr_reader :type, :name, :default, :order
|
506
|
+
attr_reader :type, :name, :default, :order, :doc, :aliases
|
365
507
|
|
366
|
-
def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil)
|
508
|
+
def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil, doc=nil, aliases=nil)
|
367
509
|
@type = subparse(type, names, namespace)
|
368
510
|
@name = name
|
369
511
|
@default = default
|
370
512
|
@order = order
|
513
|
+
@doc = doc
|
514
|
+
@aliases = aliases
|
515
|
+
validate_aliases! if aliases
|
516
|
+
validate_default! if default? && !Avro.disable_field_default_validation
|
517
|
+
end
|
518
|
+
|
519
|
+
def default?
|
520
|
+
@default != :no_default
|
371
521
|
end
|
372
522
|
|
373
523
|
def to_avro(names=Set.new)
|
374
524
|
{'name' => name, 'type' => type.to_avro(names)}.tap do |avro|
|
375
|
-
avro['default'] = default
|
525
|
+
avro['default'] = default if default?
|
376
526
|
avro['order'] = order if order
|
527
|
+
avro['doc'] = doc if doc
|
377
528
|
end
|
378
529
|
end
|
530
|
+
|
531
|
+
def alias_names
|
532
|
+
@alias_names ||= Array(aliases)
|
533
|
+
end
|
534
|
+
|
535
|
+
private
|
536
|
+
|
537
|
+
def validate_default!
|
538
|
+
type_for_default = if type.type_sym == :union
|
539
|
+
type.schemas.first
|
540
|
+
else
|
541
|
+
type
|
542
|
+
end
|
543
|
+
|
544
|
+
Avro::SchemaValidator.validate!(type_for_default, default)
|
545
|
+
rescue Avro::SchemaValidator::ValidationError => e
|
546
|
+
raise Avro::SchemaParseError, "Error validating default for #{name}: #{e.message}"
|
547
|
+
end
|
379
548
|
end
|
380
549
|
end
|
381
550
|
|