avro 1.9.0 → 1.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/LICENSE +2 -2
- data/Manifest +1 -1
- data/NOTICE +2 -2
- data/Rakefile +21 -30
- data/avro.gemspec +36 -29
- data/interop/test_interop.rb +14 -3
- data/lib/avro/VERSION.txt +1 -0
- data/lib/avro/data_file.rb +30 -6
- data/lib/avro/io.rb +42 -36
- data/lib/avro/ipc.rb +12 -8
- data/lib/avro/logical_types.rb +187 -3
- data/lib/avro/protocol.rb +2 -1
- data/lib/avro/schema.rb +234 -32
- data/lib/avro/schema_compatibility.rb +32 -21
- data/lib/avro/schema_normalization.rb +2 -1
- data/lib/avro/schema_validator.rb +41 -35
- data/lib/avro.rb +16 -3
- data/test/case_finder.rb +10 -4
- data/test/random_data.rb +9 -7
- data/test/sample_ipc_client.rb +2 -1
- data/test/sample_ipc_http_client.rb +2 -1
- data/test/sample_ipc_http_server.rb +2 -1
- data/test/sample_ipc_server.rb +2 -1
- data/test/test_datafile.rb +15 -1
- data/test/test_fingerprints.rb +21 -1
- data/test/test_help.rb +2 -1
- data/test/test_io.rb +114 -18
- data/test/test_logical_types.rb +139 -2
- data/test/test_protocol.rb +3 -2
- data/test/test_schema.rb +399 -1
- data/test/test_schema_compatibility.rb +177 -1
- data/test/test_schema_normalization.rb +3 -1
- data/test/test_schema_validator.rb +27 -6
- data/test/test_socket_transport.rb +2 -1
- data/test/tool.rb +10 -9
- metadata +32 -34
- data/CHANGELOG +0 -1
data/lib/avro/logical_types.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
|
+
# frozen_string_literal: true
|
2
3
|
# Licensed to the Apache Software Foundation (ASF) under one
|
3
4
|
# or more contributor license agreements. See the NOTICE file
|
4
5
|
# distributed with this work for additional information
|
@@ -7,7 +8,7 @@
|
|
7
8
|
# "License"); you may not use this file except in compliance
|
8
9
|
# with the License. You may obtain a copy of the License at
|
9
10
|
#
|
10
|
-
#
|
11
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
11
12
|
#
|
12
13
|
# Unless required by applicable law or agreed to in writing, software
|
13
14
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
@@ -16,9 +17,188 @@
|
|
16
17
|
# limitations under the License.
|
17
18
|
|
18
19
|
require 'date'
|
20
|
+
require 'bigdecimal'
|
21
|
+
require 'bigdecimal/util'
|
19
22
|
|
20
23
|
module Avro
|
21
24
|
module LogicalTypes
|
25
|
+
##
|
26
|
+
# Base class for logical types requiring a schema to be present
|
27
|
+
class LogicalTypeWithSchema
|
28
|
+
##
|
29
|
+
# @return [Avro::Schema] The schema this logical type is dealing with
|
30
|
+
attr_reader :schema
|
31
|
+
|
32
|
+
##
|
33
|
+
# Build a new instance of a logical type using the provided schema
|
34
|
+
#
|
35
|
+
# @param schema [Avro::Schema]
|
36
|
+
# The schema to use with this instance
|
37
|
+
#
|
38
|
+
# @raise [ArgumentError]
|
39
|
+
# If the provided schema is nil
|
40
|
+
def initialize(schema)
|
41
|
+
raise ArgumentError, 'schema is required' if schema.nil?
|
42
|
+
|
43
|
+
@schema = schema
|
44
|
+
end
|
45
|
+
|
46
|
+
##
|
47
|
+
# Encode the provided datum
|
48
|
+
#
|
49
|
+
# @param datum [Object] The datum to encode
|
50
|
+
#
|
51
|
+
# @raise [NotImplementedError]
|
52
|
+
# Subclass will need to override this method
|
53
|
+
def encode(datum)
|
54
|
+
raise NotImplementedError
|
55
|
+
end
|
56
|
+
|
57
|
+
##
|
58
|
+
# Decode the provided datum
|
59
|
+
#
|
60
|
+
# @param datum [Object] The datum to decode
|
61
|
+
#
|
62
|
+
# @raise [NotImplementedError]
|
63
|
+
# Subclass will need to override this method
|
64
|
+
def decode(datum)
|
65
|
+
raise NotImplementedError
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
##
|
70
|
+
# Logical type to handle arbitrary-precision decimals using byte array.
|
71
|
+
#
|
72
|
+
# The byte array contains the two's-complement representation of the unscaled integer
|
73
|
+
# value in big-endian byte order.
|
74
|
+
class BytesDecimal < LogicalTypeWithSchema
|
75
|
+
# Messages for exceptions
|
76
|
+
ERROR_INSUFFICIENT_PRECISION = 'Precision is too small'
|
77
|
+
ERROR_ROUNDING_NECESSARY = 'Rounding necessary'
|
78
|
+
ERROR_VALUE_MUST_BE_NUMERIC = 'value must be numeric'
|
79
|
+
|
80
|
+
# The pattern used to pack up the byte array (8 bit unsigned integer/char)
|
81
|
+
PACK_UNSIGNED_CHARS = 'C*'
|
82
|
+
|
83
|
+
# The number 10 as BigDecimal
|
84
|
+
TEN = BigDecimal(10).freeze
|
85
|
+
|
86
|
+
##
|
87
|
+
# @return [Integer] The number of total digits supported by the decimal
|
88
|
+
attr_reader :precision
|
89
|
+
|
90
|
+
##
|
91
|
+
# @return [Integer] The number of fractional digits
|
92
|
+
attr_reader :scale
|
93
|
+
|
94
|
+
##
|
95
|
+
# Build a new decimal logical type
|
96
|
+
#
|
97
|
+
# @param schema [Avro::Schema]
|
98
|
+
# The schema defining precision and scale for the conversion
|
99
|
+
def initialize(schema)
|
100
|
+
super
|
101
|
+
|
102
|
+
@scale = schema.scale.to_i
|
103
|
+
@precision = schema.precision.to_i
|
104
|
+
@factor = TEN ** @scale
|
105
|
+
end
|
106
|
+
|
107
|
+
##
|
108
|
+
# Encode the provided value into a byte array
|
109
|
+
#
|
110
|
+
# @param value [BigDecimal, Float, Integer]
|
111
|
+
# The numeric value to encode
|
112
|
+
#
|
113
|
+
# @raise [ArgumentError]
|
114
|
+
# If the provided value is not a numeric type
|
115
|
+
#
|
116
|
+
# @raise [RangeError]
|
117
|
+
# If the provided value has a scale higher than the schema permits,
|
118
|
+
# or does not fit into the schema's precision
|
119
|
+
def encode(value)
|
120
|
+
raise ArgumentError, ERROR_VALUE_MUST_BE_NUMERIC unless value.is_a?(Numeric)
|
121
|
+
|
122
|
+
to_byte_array(unscaled_value(value.to_d)).pack(PACK_UNSIGNED_CHARS).freeze
|
123
|
+
end
|
124
|
+
|
125
|
+
##
|
126
|
+
# Decode a byte array (in form of a string) into a BigDecimal of the
|
127
|
+
# given precision and scale
|
128
|
+
#
|
129
|
+
# @param stream [String]
|
130
|
+
# The byte array to decode
|
131
|
+
#
|
132
|
+
# @return [BigDecimal]
|
133
|
+
def decode(stream)
|
134
|
+
from_byte_array(stream) / @factor
|
135
|
+
end
|
136
|
+
|
137
|
+
private
|
138
|
+
|
139
|
+
##
|
140
|
+
# Convert the provided stream of bytes into the unscaled value
|
141
|
+
#
|
142
|
+
# @param stream [String]
|
143
|
+
# The stream of bytes to convert
|
144
|
+
#
|
145
|
+
# @return [Integer]
|
146
|
+
def from_byte_array(stream)
|
147
|
+
bytes = stream.bytes
|
148
|
+
positive = bytes.first[7].zero?
|
149
|
+
total = 0
|
150
|
+
|
151
|
+
bytes.each_with_index do |value, ix|
|
152
|
+
total += (positive ? value : (value ^ 0xff)) << (bytes.length - ix - 1) * 8
|
153
|
+
end
|
154
|
+
|
155
|
+
return total if positive
|
156
|
+
|
157
|
+
-(total + 1)
|
158
|
+
end
|
159
|
+
|
160
|
+
##
|
161
|
+
# Convert the provided number into its two's complement representation
|
162
|
+
# in network order (big endian).
|
163
|
+
#
|
164
|
+
# @param number [Integer]
|
165
|
+
# The number to convert
|
166
|
+
#
|
167
|
+
# @return [Array<Integer>]
|
168
|
+
# The byte array in network order
|
169
|
+
def to_byte_array(number)
|
170
|
+
[].tap do |result|
|
171
|
+
loop do
|
172
|
+
result.unshift(number & 0xff)
|
173
|
+
number >>= 8
|
174
|
+
|
175
|
+
break if (number == 0 || number == -1) && (result.first[7] == number[7])
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
##
|
181
|
+
# Get the unscaled value from a BigDecimal considering the schema's scale
|
182
|
+
#
|
183
|
+
# @param decimal [BigDecimal]
|
184
|
+
# The decimal to get the unscaled value from
|
185
|
+
#
|
186
|
+
# @return [Integer]
|
187
|
+
def unscaled_value(decimal)
|
188
|
+
details = decimal.split
|
189
|
+
length = details[1].length
|
190
|
+
|
191
|
+
fractional_part = length - details[3]
|
192
|
+
raise RangeError, ERROR_ROUNDING_NECESSARY if fractional_part > scale
|
193
|
+
|
194
|
+
if length > precision || (length - fractional_part) > (precision - scale)
|
195
|
+
raise RangeError, ERROR_INSUFFICIENT_PRECISION
|
196
|
+
end
|
197
|
+
|
198
|
+
(decimal * @factor).to_i
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
22
202
|
module IntDate
|
23
203
|
EPOCH_START = Date.new(1970, 1, 1)
|
24
204
|
|
@@ -72,6 +252,9 @@ module Avro
|
|
72
252
|
end
|
73
253
|
|
74
254
|
TYPES = {
|
255
|
+
"bytes" => {
|
256
|
+
"decimal" => BytesDecimal
|
257
|
+
},
|
75
258
|
"int" => {
|
76
259
|
"date" => IntDate
|
77
260
|
},
|
@@ -81,10 +264,11 @@ module Avro
|
|
81
264
|
},
|
82
265
|
}.freeze
|
83
266
|
|
84
|
-
def self.type_adapter(type, logical_type)
|
267
|
+
def self.type_adapter(type, logical_type, schema = nil)
|
85
268
|
return unless logical_type
|
86
269
|
|
87
|
-
TYPES.fetch(type, {}.freeze).fetch(logical_type, Identity)
|
270
|
+
adapter = TYPES.fetch(type, {}.freeze).fetch(logical_type, Identity)
|
271
|
+
adapter.is_a?(Class) ? adapter.new(schema) : adapter
|
88
272
|
end
|
89
273
|
end
|
90
274
|
end
|
data/lib/avro/protocol.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
# Licensed to the Apache Software Foundation (ASF) under one
|
2
3
|
# or more contributor license agreements. See the NOTICE file
|
3
4
|
# distributed with this work for additional information
|
@@ -6,7 +7,7 @@
|
|
6
7
|
# "License"); you may not use this file except in compliance
|
7
8
|
# with the License. You may obtain a copy of the License at
|
8
9
|
#
|
9
|
-
#
|
10
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
10
11
|
#
|
11
12
|
# Unless required by applicable law or agreed to in writing, software
|
12
13
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
data/lib/avro/schema.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
# Licensed to the Apache Software Foundation (ASF) under one
|
2
3
|
# or more contributor license agreements. See the NOTICE file
|
3
4
|
# distributed with this work for additional information
|
@@ -6,7 +7,7 @@
|
|
6
7
|
# "License"); you may not use this file except in compliance
|
7
8
|
# with the License. You may obtain a copy of the License at
|
8
9
|
#
|
9
|
-
#
|
10
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
10
11
|
#
|
11
12
|
# Unless required by applicable law or agreed to in writing, software
|
12
13
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
@@ -29,11 +30,17 @@ module Avro
|
|
29
30
|
NAMED_TYPES_SYM = Set.new(NAMED_TYPES.map(&:to_sym))
|
30
31
|
VALID_TYPES_SYM = Set.new(VALID_TYPES.map(&:to_sym))
|
31
32
|
|
33
|
+
NAME_REGEX = /^([A-Za-z_][A-Za-z0-9_]*)(\.([A-Za-z_][A-Za-z0-9_]*))*$/.freeze
|
34
|
+
|
32
35
|
INT_MIN_VALUE = -(1 << 31)
|
33
36
|
INT_MAX_VALUE = (1 << 31) - 1
|
34
37
|
LONG_MIN_VALUE = -(1 << 63)
|
35
38
|
LONG_MAX_VALUE = (1 << 63) - 1
|
36
39
|
|
40
|
+
DEFAULT_VALIDATE_OPTIONS = { recursive: true, encoded: false }.freeze
|
41
|
+
|
42
|
+
DECIMAL_LOGICAL_TYPE = 'decimal'
|
43
|
+
|
37
44
|
def self.parse(json_string)
|
38
45
|
real_parse(MultiJson.load(json_string), {})
|
39
46
|
end
|
@@ -53,23 +60,36 @@ module Avro
|
|
53
60
|
|
54
61
|
type_sym = type.to_sym
|
55
62
|
if PRIMITIVE_TYPES_SYM.include?(type_sym)
|
56
|
-
|
57
|
-
|
63
|
+
case type_sym
|
64
|
+
when :bytes
|
65
|
+
precision = json_obj['precision']
|
66
|
+
scale = json_obj['scale']
|
67
|
+
return BytesSchema.new(type_sym, logical_type, precision, scale)
|
68
|
+
else
|
69
|
+
return PrimitiveSchema.new(type_sym, logical_type)
|
70
|
+
end
|
58
71
|
elsif NAMED_TYPES_SYM.include? type_sym
|
59
72
|
name = json_obj['name']
|
73
|
+
if !Avro.disable_schema_name_validation && name !~ NAME_REGEX
|
74
|
+
raise SchemaParseError, "Name #{name} is invalid for type #{type}!"
|
75
|
+
end
|
60
76
|
namespace = json_obj.include?('namespace') ? json_obj['namespace'] : default_namespace
|
77
|
+
aliases = json_obj['aliases']
|
61
78
|
case type_sym
|
62
79
|
when :fixed
|
63
80
|
size = json_obj['size']
|
64
|
-
|
81
|
+
precision = json_obj['precision']
|
82
|
+
scale = json_obj['scale']
|
83
|
+
return FixedSchema.new(name, namespace, size, names, logical_type, aliases, precision, scale)
|
65
84
|
when :enum
|
66
85
|
symbols = json_obj['symbols']
|
67
86
|
doc = json_obj['doc']
|
68
|
-
|
87
|
+
default = json_obj['default']
|
88
|
+
return EnumSchema.new(name, namespace, symbols, names, doc, default, aliases)
|
69
89
|
when :record, :error
|
70
90
|
fields = json_obj['fields']
|
71
91
|
doc = json_obj['doc']
|
72
|
-
return RecordSchema.new(name, namespace, fields, names, type_sym, doc)
|
92
|
+
return RecordSchema.new(name, namespace, fields, names, type_sym, doc, aliases)
|
73
93
|
else
|
74
94
|
raise SchemaParseError.new("Unknown named type: #{type}")
|
75
95
|
end
|
@@ -96,7 +116,7 @@ module Avro
|
|
96
116
|
end
|
97
117
|
|
98
118
|
# Determine if a ruby datum is an instance of a schema
|
99
|
-
def self.validate(expected_schema, logical_datum, options =
|
119
|
+
def self.validate(expected_schema, logical_datum, options = DEFAULT_VALIDATE_OPTIONS)
|
100
120
|
SchemaValidator.validate!(expected_schema, logical_datum, options)
|
101
121
|
true
|
102
122
|
rescue SchemaValidator::ValidationError
|
@@ -116,7 +136,7 @@ module Avro
|
|
116
136
|
def type; @type_sym.to_s; end
|
117
137
|
|
118
138
|
def type_adapter
|
119
|
-
@type_adapter ||= LogicalTypes.type_adapter(type, logical_type) || LogicalTypes::Identity
|
139
|
+
@type_adapter ||= LogicalTypes.type_adapter(type, logical_type, self) || LogicalTypes::Identity
|
120
140
|
end
|
121
141
|
|
122
142
|
# Returns the MD5 fingerprint of the schema as an Integer.
|
@@ -131,6 +151,49 @@ module Avro
|
|
131
151
|
Digest::SHA256.hexdigest(parsing_form).to_i(16)
|
132
152
|
end
|
133
153
|
|
154
|
+
CRC_EMPTY = 0xc15d213aa4d7a795
|
155
|
+
|
156
|
+
# The java library caches this value after initialized, so this pattern
|
157
|
+
# mimics that.
|
158
|
+
@@fp_table = nil
|
159
|
+
def initFPTable
|
160
|
+
@@fp_table = Array.new(256)
|
161
|
+
256.times do |i|
|
162
|
+
fp = i
|
163
|
+
8.times do
|
164
|
+
fp = (fp >> 1) ^ ( CRC_EMPTY & -( fp & 1 ) )
|
165
|
+
end
|
166
|
+
@@fp_table[i] = fp
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
def crc_64_avro_fingerprint
|
171
|
+
parsing_form = Avro::SchemaNormalization.to_parsing_form(self)
|
172
|
+
data_bytes = parsing_form.unpack("C*")
|
173
|
+
|
174
|
+
initFPTable unless @@fp_table
|
175
|
+
|
176
|
+
fp = CRC_EMPTY
|
177
|
+
data_bytes.each do |b|
|
178
|
+
fp = (fp >> 8) ^ @@fp_table[ (fp ^ b) & 0xff ]
|
179
|
+
end
|
180
|
+
fp
|
181
|
+
end
|
182
|
+
|
183
|
+
SINGLE_OBJECT_MAGIC_NUMBER = [0xC3, 0x01].freeze
|
184
|
+
def single_object_encoding_header
|
185
|
+
[SINGLE_OBJECT_MAGIC_NUMBER, single_object_schema_fingerprint].flatten
|
186
|
+
end
|
187
|
+
def single_object_schema_fingerprint
|
188
|
+
working = crc_64_avro_fingerprint
|
189
|
+
bytes = Array.new(8)
|
190
|
+
8.times do |i|
|
191
|
+
bytes[i] = (working & 0xff)
|
192
|
+
working = working >> 8
|
193
|
+
end
|
194
|
+
bytes
|
195
|
+
end
|
196
|
+
|
134
197
|
def read?(writers_schema)
|
135
198
|
SchemaCompatibility.can_read?(writers_schema, self)
|
136
199
|
end
|
@@ -143,11 +206,11 @@ module Avro
|
|
143
206
|
SchemaCompatibility.mutual_read?(other_schema, self)
|
144
207
|
end
|
145
208
|
|
146
|
-
def ==(other,
|
209
|
+
def ==(other, _seen=nil)
|
147
210
|
other.is_a?(Schema) && type_sym == other.type_sym
|
148
211
|
end
|
149
212
|
|
150
|
-
def hash(
|
213
|
+
def hash(_seen=nil)
|
151
214
|
type_sym.hash
|
152
215
|
end
|
153
216
|
|
@@ -165,7 +228,7 @@ module Avro
|
|
165
228
|
end
|
166
229
|
end
|
167
230
|
|
168
|
-
def to_avro(
|
231
|
+
def to_avro(_names=nil)
|
169
232
|
props = {'type' => type}
|
170
233
|
props['logicalType'] = logical_type if logical_type
|
171
234
|
props
|
@@ -175,14 +238,26 @@ module Avro
|
|
175
238
|
MultiJson.dump to_avro
|
176
239
|
end
|
177
240
|
|
241
|
+
def validate_aliases!
|
242
|
+
unless aliases.nil? ||
|
243
|
+
(aliases.is_a?(Array) && aliases.all? { |a| a.is_a?(String) })
|
244
|
+
|
245
|
+
raise Avro::SchemaParseError,
|
246
|
+
"Invalid aliases value #{aliases.inspect} for #{type} #{name}. Must be an array of strings."
|
247
|
+
end
|
248
|
+
end
|
249
|
+
private :validate_aliases!
|
250
|
+
|
178
251
|
class NamedSchema < Schema
|
179
|
-
attr_reader :name, :namespace
|
252
|
+
attr_reader :name, :namespace, :aliases
|
180
253
|
|
181
|
-
def initialize(type, name, namespace=nil, names=nil, doc=nil, logical_type=nil)
|
254
|
+
def initialize(type, name, namespace=nil, names=nil, doc=nil, logical_type=nil, aliases=nil)
|
182
255
|
super(type, logical_type)
|
183
256
|
@name, @namespace = Name.extract_namespace(name, namespace)
|
184
|
-
@doc
|
185
|
-
|
257
|
+
@doc = doc
|
258
|
+
@aliases = aliases
|
259
|
+
validate_aliases! if aliases
|
260
|
+
Name.add_name(names, self)
|
186
261
|
end
|
187
262
|
|
188
263
|
def to_avro(names=Set.new)
|
@@ -192,33 +267,57 @@ module Avro
|
|
192
267
|
end
|
193
268
|
props = {'name' => @name}
|
194
269
|
props.merge!('namespace' => @namespace) if @namespace
|
195
|
-
props
|
270
|
+
props['namespace'] = @namespace if @namespace
|
271
|
+
props['doc'] = @doc if @doc
|
272
|
+
props['aliases'] = aliases if aliases && aliases.any?
|
196
273
|
super.merge props
|
197
274
|
end
|
198
275
|
|
199
276
|
def fullname
|
200
277
|
@fullname ||= Name.make_fullname(@name, @namespace)
|
201
278
|
end
|
279
|
+
|
280
|
+
def fullname_aliases
|
281
|
+
@fullname_aliases ||= if aliases
|
282
|
+
aliases.map { |a| Name.make_fullname(a, namespace) }
|
283
|
+
else
|
284
|
+
[]
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
def match_fullname?(name)
|
289
|
+
name == fullname || fullname_aliases.include?(name)
|
290
|
+
end
|
291
|
+
|
292
|
+
def match_schema?(schema)
|
293
|
+
type_sym == schema.type_sym && match_fullname?(schema.fullname)
|
294
|
+
end
|
202
295
|
end
|
203
296
|
|
204
297
|
class RecordSchema < NamedSchema
|
205
298
|
attr_reader :fields, :doc
|
206
299
|
|
207
300
|
def self.make_field_objects(field_data, names, namespace=nil)
|
208
|
-
field_objects, field_names = [], Set.new
|
209
|
-
field_data.
|
301
|
+
field_objects, field_names, alias_names = [], Set.new, Set.new
|
302
|
+
field_data.each do |field|
|
210
303
|
if field.respond_to?(:[]) # TODO(jmhodges) wtffffff
|
211
304
|
type = field['type']
|
212
305
|
name = field['name']
|
213
306
|
default = field.key?('default') ? field['default'] : :no_default
|
214
307
|
order = field['order']
|
215
308
|
doc = field['doc']
|
216
|
-
|
309
|
+
aliases = field['aliases']
|
310
|
+
new_field = Field.new(type, name, default, order, names, namespace, doc, aliases)
|
217
311
|
# make sure field name has not been used yet
|
218
312
|
if field_names.include?(new_field.name)
|
219
313
|
raise SchemaParseError, "Field name #{new_field.name.inspect} is already in use"
|
220
314
|
end
|
221
315
|
field_names << new_field.name
|
316
|
+
# make sure alias has not be been used yet
|
317
|
+
if new_field.aliases && alias_names.intersect?(new_field.aliases.to_set)
|
318
|
+
raise SchemaParseError, "Alias #{(alias_names & new_field.aliases).to_a} already in use"
|
319
|
+
end
|
320
|
+
alias_names.merge(new_field.aliases) if new_field.aliases
|
222
321
|
else
|
223
322
|
raise SchemaParseError, "Not a valid field: #{field}"
|
224
323
|
end
|
@@ -227,14 +326,14 @@ module Avro
|
|
227
326
|
field_objects
|
228
327
|
end
|
229
328
|
|
230
|
-
def initialize(name, namespace, fields, names=nil, schema_type=:record, doc=nil)
|
329
|
+
def initialize(name, namespace, fields, names=nil, schema_type=:record, doc=nil, aliases=nil)
|
231
330
|
if schema_type == :request || schema_type == 'request'
|
232
331
|
@type_sym = schema_type.to_sym
|
233
332
|
@namespace = namespace
|
234
333
|
@name = nil
|
235
334
|
@doc = nil
|
236
335
|
else
|
237
|
-
super(schema_type, name, namespace, names, doc)
|
336
|
+
super(schema_type, name, namespace, names, doc, nil, aliases)
|
238
337
|
end
|
239
338
|
@fields = if fields
|
240
339
|
RecordSchema.make_field_objects(fields, names, self.namespace)
|
@@ -247,6 +346,16 @@ module Avro
|
|
247
346
|
@fields_hash ||= fields.inject({}){|hsh, field| hsh[field.name] = field; hsh }
|
248
347
|
end
|
249
348
|
|
349
|
+
def fields_by_alias
|
350
|
+
@fields_by_alias ||= fields.each_with_object({}) do |field, hash|
|
351
|
+
if field.aliases
|
352
|
+
field.aliases.each do |a|
|
353
|
+
hash[a] = field
|
354
|
+
end
|
355
|
+
end
|
356
|
+
end
|
357
|
+
end
|
358
|
+
|
250
359
|
def to_avro(names=Set.new)
|
251
360
|
hsh = super
|
252
361
|
return hsh unless hsh.is_a?(Hash)
|
@@ -313,20 +422,41 @@ module Avro
|
|
313
422
|
end
|
314
423
|
|
315
424
|
class EnumSchema < NamedSchema
|
316
|
-
|
425
|
+
SYMBOL_REGEX = /^[A-Za-z_][A-Za-z0-9_]*$/.freeze
|
317
426
|
|
318
|
-
|
427
|
+
attr_reader :symbols, :doc, :default
|
428
|
+
|
429
|
+
def initialize(name, space, symbols, names=nil, doc=nil, default=nil, aliases=nil)
|
319
430
|
if symbols.uniq.length < symbols.length
|
320
431
|
fail_msg = "Duplicate symbol: #{symbols}"
|
321
432
|
raise Avro::SchemaParseError, fail_msg
|
322
433
|
end
|
323
|
-
|
434
|
+
|
435
|
+
if !Avro.disable_enum_symbol_validation
|
436
|
+
invalid_symbols = symbols.select { |symbol| symbol !~ SYMBOL_REGEX }
|
437
|
+
|
438
|
+
if invalid_symbols.any?
|
439
|
+
raise SchemaParseError,
|
440
|
+
"Invalid symbols for #{name}: #{invalid_symbols.join(', ')} don't match #{SYMBOL_REGEX.inspect}"
|
441
|
+
end
|
442
|
+
end
|
443
|
+
|
444
|
+
if default && !symbols.include?(default)
|
445
|
+
raise Avro::SchemaParseError, "Default '#{default}' is not a valid symbol for enum #{name}"
|
446
|
+
end
|
447
|
+
|
448
|
+
super(:enum, name, space, names, doc, nil, aliases)
|
449
|
+
@default = default
|
324
450
|
@symbols = symbols
|
325
451
|
end
|
326
452
|
|
327
|
-
def to_avro(
|
453
|
+
def to_avro(_names=Set.new)
|
328
454
|
avro = super
|
329
|
-
avro.is_a?(Hash)
|
455
|
+
if avro.is_a?(Hash)
|
456
|
+
avro['symbols'] = symbols
|
457
|
+
avro['default'] = default if default
|
458
|
+
end
|
459
|
+
avro
|
330
460
|
end
|
331
461
|
end
|
332
462
|
|
@@ -346,34 +476,102 @@ module Avro
|
|
346
476
|
hsh = super
|
347
477
|
hsh.size == 1 ? type : hsh
|
348
478
|
end
|
479
|
+
|
480
|
+
def match_schema?(schema)
|
481
|
+
return type_sym == schema.type_sym
|
482
|
+
# TODO: eventually this could handle schema promotion for primitive schemas too
|
483
|
+
end
|
484
|
+
end
|
485
|
+
|
486
|
+
class BytesSchema < PrimitiveSchema
|
487
|
+
ERROR_INVALID_SCALE = 'Scale must be greater than or equal to 0'
|
488
|
+
ERROR_INVALID_PRECISION = 'Precision must be positive'
|
489
|
+
ERROR_PRECISION_TOO_SMALL = 'Precision must be greater than scale'
|
490
|
+
|
491
|
+
attr_reader :precision, :scale
|
492
|
+
|
493
|
+
def initialize(type, logical_type=nil, precision=nil, scale=nil)
|
494
|
+
super(type.to_sym, logical_type)
|
495
|
+
|
496
|
+
@precision = precision.to_i if precision
|
497
|
+
@scale = scale.to_i if scale
|
498
|
+
|
499
|
+
validate_decimal! if logical_type == DECIMAL_LOGICAL_TYPE
|
500
|
+
end
|
501
|
+
|
502
|
+
def to_avro(names=nil)
|
503
|
+
avro = super
|
504
|
+
return avro if avro.is_a?(String)
|
505
|
+
|
506
|
+
avro['precision'] = precision if precision
|
507
|
+
avro['scale'] = scale if scale
|
508
|
+
avro
|
509
|
+
end
|
510
|
+
|
511
|
+
def match_schema?(schema)
|
512
|
+
return true if super
|
513
|
+
|
514
|
+
if logical_type == DECIMAL_LOGICAL_TYPE && schema.logical_type == DECIMAL_LOGICAL_TYPE
|
515
|
+
return precision == schema.precision && (scale || 0) == (schema.scale || 0)
|
516
|
+
end
|
517
|
+
|
518
|
+
false
|
519
|
+
end
|
520
|
+
|
521
|
+
private
|
522
|
+
|
523
|
+
def validate_decimal!
|
524
|
+
raise Avro::SchemaParseError, ERROR_INVALID_PRECISION unless precision.to_i.positive?
|
525
|
+
raise Avro::SchemaParseError, ERROR_INVALID_SCALE if scale.to_i.negative?
|
526
|
+
raise Avro::SchemaParseError, ERROR_PRECISION_TOO_SMALL if precision < scale.to_i
|
527
|
+
end
|
349
528
|
end
|
350
529
|
|
351
530
|
class FixedSchema < NamedSchema
|
352
|
-
attr_reader :size
|
353
|
-
def initialize(name, space, size, names=nil, logical_type=nil)
|
531
|
+
attr_reader :size, :precision, :scale
|
532
|
+
def initialize(name, space, size, names=nil, logical_type=nil, aliases=nil, precision=nil, scale=nil)
|
354
533
|
# Ensure valid cto args
|
355
534
|
unless size.is_a?(Integer)
|
356
535
|
raise AvroError, 'Fixed Schema requires a valid integer for size property.'
|
357
536
|
end
|
358
|
-
super(:fixed, name, space, names, nil, logical_type)
|
537
|
+
super(:fixed, name, space, names, nil, logical_type, aliases)
|
359
538
|
@size = size
|
539
|
+
@precision = precision
|
540
|
+
@scale = scale
|
360
541
|
end
|
361
542
|
|
362
543
|
def to_avro(names=Set.new)
|
363
544
|
avro = super
|
364
|
-
avro
|
545
|
+
return avro if avro.is_a?(String)
|
546
|
+
|
547
|
+
avro['size'] = size
|
548
|
+
avro['precision'] = precision if precision
|
549
|
+
avro['scale'] = scale if scale
|
550
|
+
avro
|
551
|
+
end
|
552
|
+
|
553
|
+
def match_schema?(schema)
|
554
|
+
return true if super && size == schema.size
|
555
|
+
|
556
|
+
if logical_type == DECIMAL_LOGICAL_TYPE && schema.logical_type == DECIMAL_LOGICAL_TYPE
|
557
|
+
return precision == schema.precision && (scale || 0) == (schema.scale || 0)
|
558
|
+
end
|
559
|
+
|
560
|
+
false
|
365
561
|
end
|
366
562
|
end
|
367
563
|
|
368
564
|
class Field < Schema
|
369
|
-
attr_reader :type, :name, :default, :order, :doc
|
565
|
+
attr_reader :type, :name, :default, :order, :doc, :aliases
|
370
566
|
|
371
|
-
def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil, doc=nil)
|
567
|
+
def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil, doc=nil, aliases=nil) # rubocop:disable Lint/MissingSuper
|
372
568
|
@type = subparse(type, names, namespace)
|
373
569
|
@name = name
|
374
570
|
@default = default
|
375
571
|
@order = order
|
376
572
|
@doc = doc
|
573
|
+
@aliases = aliases
|
574
|
+
validate_aliases! if aliases
|
377
575
|
validate_default! if default? && !Avro.disable_field_default_validation
|
378
576
|
end
|
379
577
|
|
@@ -389,6 +587,10 @@ module Avro
|
|
389
587
|
end
|
390
588
|
end
|
391
589
|
|
590
|
+
def alias_names
|
591
|
+
@alias_names ||= Array(aliases)
|
592
|
+
end
|
593
|
+
|
392
594
|
private
|
393
595
|
|
394
596
|
def validate_default!
|