avro 1.8.2 → 1.10.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,9 +5,9 @@
5
5
  # to you under the Apache License, Version 2.0 (the
6
6
  # "License"); you may not use this file except in compliance
7
7
  # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
11
  # Unless required by applicable law or agreed to in writing, software
12
12
  # distributed under the License is distributed on an "AS IS" BASIS,
13
13
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -74,10 +74,10 @@ module Avro::IPC
74
74
 
75
75
  class ConnectionClosedException < Avro::AvroError; end
76
76
 
77
+ # Base class for the client side of a protocol interaction.
77
78
  class Requestor
78
- """Base class for the client side of a protocol interaction."""
79
- attr_reader :local_protocol, :transport
80
- attr_accessor :remote_protocol, :remote_hash, :send_protocol
79
+ attr_reader :local_protocol, :transport, :remote_protocol, :remote_hash
80
+ attr_accessor :send_protocol
81
81
 
82
82
  def initialize(local_protocol, transport)
83
83
  @local_protocol = local_protocol
@@ -193,9 +193,9 @@ module Avro::IPC
193
193
  # * a one-byte error flag boolean, followed by either:
194
194
  # * if the error flag is false,
195
195
  # the message response, serialized per the message's response schema.
196
- # * if the error flag is true,
196
+ # * if the error flag is true,
197
197
  # the error, serialized per the message's error union schema.
198
- response_metadata = META_READER.read(decoder)
198
+ _response_metadata = META_READER.read(decoder)
199
199
 
200
200
  # remote response schema
201
201
  remote_message_schema = remote_protocol.messages[message_name]
@@ -257,7 +257,7 @@ module Avro::IPC
257
257
  end
258
258
 
259
259
  # read request using remote protocol
260
- request_metadata = META_READER.read(buffer_decoder)
260
+ _request_metadata = META_READER.read(buffer_decoder)
261
261
  remote_message_name = buffer_decoder.read_string
262
262
 
263
263
  # get remote and local request schemas so we can do
@@ -278,7 +278,7 @@ module Avro::IPC
278
278
  response = call(local_message, request)
279
279
  rescue AvroRemoteError => e
280
280
  error = e
281
- rescue Exception => e
281
+ rescue Exception => e # rubocop:disable Lint/RescueException
282
282
  error = AvroRemoteError.new(e.to_s)
283
283
  end
284
284
 
@@ -350,7 +350,7 @@ module Avro::IPC
350
350
  remote_protocol
351
351
  end
352
352
 
353
- def call(local_message, request)
353
+ def call(_local_message, _request)
354
354
  # Actual work done by server: cf. handler in thrift.
355
355
  raise NotImplementedError
356
356
  end
@@ -0,0 +1,90 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # https://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ require 'date'
19
+
20
+ module Avro
21
+ module LogicalTypes
22
+ module IntDate
23
+ EPOCH_START = Date.new(1970, 1, 1)
24
+
25
+ def self.encode(date)
26
+ return date.to_i if date.is_a?(Numeric)
27
+
28
+ (date - EPOCH_START).to_i
29
+ end
30
+
31
+ def self.decode(int)
32
+ EPOCH_START + int
33
+ end
34
+ end
35
+
36
+ module TimestampMillis
37
+ def self.encode(value)
38
+ return value.to_i if value.is_a?(Numeric)
39
+
40
+ time = value.to_time
41
+ time.to_i * 1000 + time.usec / 1000
42
+ end
43
+
44
+ def self.decode(int)
45
+ s, ms = int / 1000, int % 1000
46
+ Time.at(s, ms * 1000).utc
47
+ end
48
+ end
49
+
50
+ module TimestampMicros
51
+ def self.encode(value)
52
+ return value.to_i if value.is_a?(Numeric)
53
+
54
+ time = value.to_time
55
+ time.to_i * 1000_000 + time.usec
56
+ end
57
+
58
+ def self.decode(int)
59
+ s, us = int / 1000_000, int % 1000_000
60
+ Time.at(s, us).utc
61
+ end
62
+ end
63
+
64
+ module Identity
65
+ def self.encode(datum)
66
+ datum
67
+ end
68
+
69
+ def self.decode(datum)
70
+ datum
71
+ end
72
+ end
73
+
74
+ TYPES = {
75
+ "int" => {
76
+ "date" => IntDate
77
+ },
78
+ "long" => {
79
+ "timestamp-millis" => TimestampMillis,
80
+ "timestamp-micros" => TimestampMicros
81
+ },
82
+ }.freeze
83
+
84
+ def self.type_adapter(type, logical_type)
85
+ return unless logical_type
86
+
87
+ TYPES.fetch(type, {}.freeze).fetch(logical_type, Identity)
88
+ end
89
+ end
90
+ end
@@ -6,7 +6,7 @@
6
6
  # "License"); you may not use this file except in compliance
7
7
  # with the License. You may obtain a copy of the License at
8
8
  #
9
- # http://www.apache.org/licenses/LICENSE-2.0
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
10
  #
11
11
  # Unless required by applicable law or agreed to in writing, software
12
12
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -20,7 +20,7 @@ module Avro
20
20
  VALID_TYPE_SCHEMA_TYPES_SYM = Set.new(VALID_TYPE_SCHEMA_TYPES.map(&:to_sym))
21
21
  class ProtocolParseError < Avro::AvroError; end
22
22
 
23
- attr_reader :name, :namespace, :types, :messages, :md5
23
+ attr_reader :name, :namespace, :types, :messages, :md5, :doc
24
24
  def self.parse(protocol_string)
25
25
  json_data = MultiJson.load(protocol_string)
26
26
 
@@ -29,13 +29,14 @@ module Avro
29
29
  namespace = json_data['namespace']
30
30
  types = json_data['types']
31
31
  messages = json_data['messages']
32
- Protocol.new(name, namespace, types, messages)
32
+ doc = json_data['doc']
33
+ Protocol.new(name, namespace, types, messages, doc)
33
34
  else
34
35
  raise ProtocolParseError, "Not a JSON object: #{json_data}"
35
36
  end
36
37
  end
37
38
 
38
- def initialize(name, namespace=nil, types=nil, messages=nil)
39
+ def initialize(name, namespace=nil, types=nil, messages=nil, doc=nil)
39
40
  # Ensure valid ctor args
40
41
  if !name
41
42
  raise ProtocolParseError, 'Protocols must have a non-empty name.'
@@ -55,6 +56,7 @@ module Avro
55
56
  @types = parse_types(types, type_names)
56
57
  @messages = parse_messages(messages, type_names)
57
58
  @md5 = Digest::MD5.digest(to_s)
59
+ @doc = doc
58
60
  end
59
61
 
60
62
  def to_s
@@ -67,7 +69,6 @@ module Avro
67
69
 
68
70
  private
69
71
  def parse_types(types, type_names)
70
- type_objects = []
71
72
  types.collect do |type|
72
73
  # FIXME adding type.name to type_names is not defined in the
73
74
  # spec. Possible bug in the python impl and the spec.
@@ -92,7 +93,8 @@ module Avro
92
93
  request = body['request']
93
94
  response = body['response']
94
95
  errors = body['errors']
95
- message_objects[name] = Message.new(name, request, response, errors, names, namespace)
96
+ doc = body['doc']
97
+ message_objects[name] = Message.new(name, request, response, errors, names, namespace, doc)
96
98
  end
97
99
  message_objects
98
100
  end
@@ -111,14 +113,15 @@ module Avro
111
113
  end
112
114
 
113
115
  class Message
114
- attr_reader :name, :request, :response, :errors, :default_namespace
116
+ attr_reader :name, :request, :response, :errors, :default_namespace, :doc
115
117
 
116
- def initialize(name, request, response, errors=nil, names=nil, default_namespace=nil)
118
+ def initialize(name, request, response, errors=nil, names=nil, default_namespace=nil, doc=nil)
117
119
  @name = name
118
120
  @default_namespace = default_namespace
119
121
  @request = parse_request(request, names)
120
122
  @response = parse_response(response, names)
121
123
  @errors = parse_errors(errors, names) if errors
124
+ @doc = doc
122
125
  end
123
126
 
124
127
  def to_avro(names=Set.new)
@@ -127,6 +130,7 @@ module Avro
127
130
  'response' => response.to_avro(names)
128
131
  }.tap do |hash|
129
132
  hash['errors'] = errors.to_avro(names) if errors
133
+ hash['doc'] = @doc if @doc
130
134
  end
131
135
  end
132
136
 
@@ -6,7 +6,7 @@
6
6
  # "License"); you may not use this file except in compliance
7
7
  # with the License. You may obtain a copy of the License at
8
8
  #
9
- # http://www.apache.org/licenses/LICENSE-2.0
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
10
  #
11
11
  # Unless required by applicable law or agreed to in writing, software
12
12
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,6 +14,8 @@
14
14
  # See the License for the specific language governing permissions and
15
15
  # limitations under the License.
16
16
 
17
+ require 'avro/logical_types'
18
+
17
19
  module Avro
18
20
  class Schema
19
21
  # Sets of strings, for backwards compatibility. See below for sets of symbols,
@@ -27,6 +29,8 @@ module Avro
27
29
  NAMED_TYPES_SYM = Set.new(NAMED_TYPES.map(&:to_sym))
28
30
  VALID_TYPES_SYM = Set.new(VALID_TYPES.map(&:to_sym))
29
31
 
32
+ NAME_REGEX = /^([A-Za-z_][A-Za-z0-9_]*)(\.([A-Za-z_][A-Za-z0-9_]*))*$/
33
+
30
34
  INT_MIN_VALUE = -(1 << 31)
31
35
  INT_MAX_VALUE = (1 << 31) - 1
32
36
  LONG_MIN_VALUE = -(1 << 63)
@@ -40,6 +44,7 @@ module Avro
40
44
  def self.real_parse(json_obj, names=nil, default_namespace=nil)
41
45
  if json_obj.is_a? Hash
42
46
  type = json_obj['type']
47
+ logical_type = json_obj['logicalType']
43
48
  raise SchemaParseError, %Q(No "type" property: #{json_obj}) if type.nil?
44
49
 
45
50
  # Check that the type is valid before calling #to_sym, since symbols are never garbage
@@ -50,21 +55,34 @@ module Avro
50
55
 
51
56
  type_sym = type.to_sym
52
57
  if PRIMITIVE_TYPES_SYM.include?(type_sym)
53
- return PrimitiveSchema.new(type_sym)
54
-
58
+ case type_sym
59
+ when :bytes
60
+ precision = json_obj['precision']
61
+ scale = json_obj['scale']
62
+ return BytesSchema.new(type_sym, logical_type, precision, scale)
63
+ else
64
+ return PrimitiveSchema.new(type_sym, logical_type)
65
+ end
55
66
  elsif NAMED_TYPES_SYM.include? type_sym
56
67
  name = json_obj['name']
68
+ if !Avro.disable_schema_name_validation && name !~ NAME_REGEX
69
+ raise SchemaParseError, "Name #{name} is invalid for type #{type}!"
70
+ end
57
71
  namespace = json_obj.include?('namespace') ? json_obj['namespace'] : default_namespace
72
+ aliases = json_obj['aliases']
58
73
  case type_sym
59
74
  when :fixed
60
75
  size = json_obj['size']
61
- return FixedSchema.new(name, namespace, size, names)
76
+ return FixedSchema.new(name, namespace, size, names, logical_type, aliases)
62
77
  when :enum
63
78
  symbols = json_obj['symbols']
64
- return EnumSchema.new(name, namespace, symbols, names)
79
+ doc = json_obj['doc']
80
+ default = json_obj['default']
81
+ return EnumSchema.new(name, namespace, symbols, names, doc, default, aliases)
65
82
  when :record, :error
66
83
  fields = json_obj['fields']
67
- return RecordSchema.new(name, namespace, fields, names, type_sym)
84
+ doc = json_obj['doc']
85
+ return RecordSchema.new(name, namespace, fields, names, type_sym, doc, aliases)
68
86
  else
69
87
  raise SchemaParseError.new("Unknown named type: #{type}")
70
88
  end
@@ -91,52 +109,29 @@ module Avro
91
109
  end
92
110
 
93
111
  # Determine if a ruby datum is an instance of a schema
94
- def self.validate(expected_schema, datum)
95
- case expected_schema.type_sym
96
- when :null
97
- datum.nil?
98
- when :boolean
99
- datum == true || datum == false
100
- when :string, :bytes
101
- datum.is_a? String
102
- when :int
103
- (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
104
- (INT_MIN_VALUE <= datum) && (datum <= INT_MAX_VALUE)
105
- when :long
106
- (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
107
- (LONG_MIN_VALUE <= datum) && (datum <= LONG_MAX_VALUE)
108
- when :float, :double
109
- datum.is_a?(Float) || datum.is_a?(Fixnum) || datum.is_a?(Bignum)
110
- when :fixed
111
- datum.is_a?(String) && datum.bytesize == expected_schema.size
112
- when :enum
113
- expected_schema.symbols.include? datum
114
- when :array
115
- datum.is_a?(Array) &&
116
- datum.all?{|d| validate(expected_schema.items, d) }
117
- when :map
118
- datum.keys.all?{|k| k.is_a? String } &&
119
- datum.values.all?{|v| validate(expected_schema.values, v) }
120
- when :union
121
- expected_schema.schemas.any?{|s| validate(s, datum) }
122
- when :record, :error, :request
123
- datum.is_a?(Hash) &&
124
- expected_schema.fields.all?{|f| validate(f.type, datum[f.name]) }
125
- else
126
- raise "you suck #{expected_schema.inspect} is not allowed."
127
- end
112
+ def self.validate(expected_schema, logical_datum, options = { recursive: true, encoded: false })
113
+ SchemaValidator.validate!(expected_schema, logical_datum, options)
114
+ true
115
+ rescue SchemaValidator::ValidationError
116
+ false
128
117
  end
129
118
 
130
- def initialize(type)
119
+ def initialize(type, logical_type=nil)
131
120
  @type_sym = type.is_a?(Symbol) ? type : type.to_sym
121
+ @logical_type = logical_type
132
122
  end
133
123
 
134
124
  attr_reader :type_sym
125
+ attr_reader :logical_type
135
126
 
136
127
  # Returns the type as a string (rather than a symbol), for backwards compatibility.
137
128
  # Deprecated in favor of {#type_sym}.
138
129
  def type; @type_sym.to_s; end
139
130
 
131
+ def type_adapter
132
+ @type_adapter ||= LogicalTypes.type_adapter(type, logical_type) || LogicalTypes::Identity
133
+ end
134
+
140
135
  # Returns the MD5 fingerprint of the schema as an Integer.
141
136
  def md5_fingerprint
142
137
  parsing_form = SchemaNormalization.to_parsing_form(self)
@@ -149,11 +144,66 @@ module Avro
149
144
  Digest::SHA256.hexdigest(parsing_form).to_i(16)
150
145
  end
151
146
 
152
- def ==(other, seen=nil)
147
+ CRC_EMPTY = 0xc15d213aa4d7a795
148
+
149
+ # The java library caches this value after initialized, so this pattern
150
+ # mimics that.
151
+ @@fp_table = nil
152
+ def initFPTable
153
+ @@fp_table = Array.new(256)
154
+ 256.times do |i|
155
+ fp = i
156
+ 8.times do
157
+ fp = (fp >> 1) ^ ( CRC_EMPTY & -( fp & 1 ) )
158
+ end
159
+ @@fp_table[i] = fp
160
+ end
161
+ end
162
+
163
+ def crc_64_avro_fingerprint
164
+ parsing_form = Avro::SchemaNormalization.to_parsing_form(self)
165
+ data_bytes = parsing_form.unpack("C*")
166
+
167
+ initFPTable unless @@fp_table
168
+
169
+ fp = CRC_EMPTY
170
+ data_bytes.each do |b|
171
+ fp = (fp >> 8) ^ @@fp_table[ (fp ^ b) & 0xff ]
172
+ end
173
+ fp
174
+ end
175
+
176
+ SINGLE_OBJECT_MAGIC_NUMBER = [0xC3, 0x01]
177
+ def single_object_encoding_header
178
+ [SINGLE_OBJECT_MAGIC_NUMBER, single_object_schema_fingerprint].flatten
179
+ end
180
+ def single_object_schema_fingerprint
181
+ working = crc_64_avro_fingerprint
182
+ bytes = Array.new(8)
183
+ 8.times do |i|
184
+ bytes[i] = (working & 0xff)
185
+ working = working >> 8
186
+ end
187
+ bytes
188
+ end
189
+
190
+ def read?(writers_schema)
191
+ SchemaCompatibility.can_read?(writers_schema, self)
192
+ end
193
+
194
+ def be_read?(other_schema)
195
+ other_schema.read?(self)
196
+ end
197
+
198
+ def mutual_read?(other_schema)
199
+ SchemaCompatibility.mutual_read?(other_schema, self)
200
+ end
201
+
202
+ def ==(other, _seen=nil)
153
203
  other.is_a?(Schema) && type_sym == other.type_sym
154
204
  end
155
205
 
156
- def hash(seen=nil)
206
+ def hash(_seen=nil)
157
207
  type_sym.hash
158
208
  end
159
209
 
@@ -171,20 +221,36 @@ module Avro
171
221
  end
172
222
  end
173
223
 
174
- def to_avro(names=nil)
175
- {'type' => type}
224
+ def to_avro(_names=nil)
225
+ props = {'type' => type}
226
+ props['logicalType'] = logical_type if logical_type
227
+ props
176
228
  end
177
229
 
178
230
  def to_s
179
231
  MultiJson.dump to_avro
180
232
  end
181
233
 
234
+ def validate_aliases!
235
+ unless aliases.nil? ||
236
+ (aliases.is_a?(Array) && aliases.all? { |a| a.is_a?(String) })
237
+
238
+ raise Avro::SchemaParseError,
239
+ "Invalid aliases value #{aliases.inspect} for #{type} #{name}. Must be an array of strings."
240
+ end
241
+ end
242
+ private :validate_aliases!
243
+
182
244
  class NamedSchema < Schema
183
- attr_reader :name, :namespace
184
- def initialize(type, name, namespace=nil, names=nil)
185
- super(type)
245
+ attr_reader :name, :namespace, :aliases
246
+
247
+ def initialize(type, name, namespace=nil, names=nil, doc=nil, logical_type=nil, aliases=nil)
248
+ super(type, logical_type)
186
249
  @name, @namespace = Name.extract_namespace(name, namespace)
187
- names = Name.add_name(names, self)
250
+ @doc = doc
251
+ @aliases = aliases
252
+ validate_aliases! if aliases
253
+ Name.add_name(names, self)
188
254
  end
189
255
 
190
256
  def to_avro(names=Set.new)
@@ -194,31 +260,53 @@ module Avro
194
260
  end
195
261
  props = {'name' => @name}
196
262
  props.merge!('namespace' => @namespace) if @namespace
263
+ props['namespace'] = @namespace if @namespace
264
+ props['doc'] = @doc if @doc
265
+ props['aliases'] = aliases if aliases && aliases.any?
197
266
  super.merge props
198
267
  end
199
268
 
200
269
  def fullname
201
270
  @fullname ||= Name.make_fullname(@name, @namespace)
202
271
  end
272
+
273
+ def fullname_aliases
274
+ @fullname_aliases ||= if aliases
275
+ aliases.map { |a| Name.make_fullname(a, namespace) }
276
+ else
277
+ []
278
+ end
279
+ end
280
+
281
+ def match_fullname?(name)
282
+ name == fullname || fullname_aliases.include?(name)
283
+ end
203
284
  end
204
285
 
205
286
  class RecordSchema < NamedSchema
206
- attr_reader :fields
287
+ attr_reader :fields, :doc
207
288
 
208
289
  def self.make_field_objects(field_data, names, namespace=nil)
209
- field_objects, field_names = [], Set.new
210
- field_data.each_with_index do |field, i|
290
+ field_objects, field_names, alias_names = [], Set.new, Set.new
291
+ field_data.each do |field|
211
292
  if field.respond_to?(:[]) # TODO(jmhodges) wtffffff
212
293
  type = field['type']
213
294
  name = field['name']
214
295
  default = field.key?('default') ? field['default'] : :no_default
215
296
  order = field['order']
216
- new_field = Field.new(type, name, default, order, names, namespace)
297
+ doc = field['doc']
298
+ aliases = field['aliases']
299
+ new_field = Field.new(type, name, default, order, names, namespace, doc, aliases)
217
300
  # make sure field name has not been used yet
218
301
  if field_names.include?(new_field.name)
219
302
  raise SchemaParseError, "Field name #{new_field.name.inspect} is already in use"
220
303
  end
221
304
  field_names << new_field.name
305
+ # make sure alias has not be been used yet
306
+ if new_field.aliases && alias_names.intersect?(new_field.aliases.to_set)
307
+ raise SchemaParseError, "Alias #{(alias_names & new_field.aliases).to_a} already in use"
308
+ end
309
+ alias_names.merge(new_field.aliases) if new_field.aliases
222
310
  else
223
311
  raise SchemaParseError, "Not a valid field: #{field}"
224
312
  end
@@ -227,20 +315,36 @@ module Avro
227
315
  field_objects
228
316
  end
229
317
 
230
- def initialize(name, namespace, fields, names=nil, schema_type=:record)
318
+ def initialize(name, namespace, fields, names=nil, schema_type=:record, doc=nil, aliases=nil)
231
319
  if schema_type == :request || schema_type == 'request'
232
320
  @type_sym = schema_type.to_sym
233
321
  @namespace = namespace
322
+ @name = nil
323
+ @doc = nil
234
324
  else
235
- super(schema_type, name, namespace, names)
325
+ super(schema_type, name, namespace, names, doc, nil, aliases)
236
326
  end
237
- @fields = RecordSchema.make_field_objects(fields, names, self.namespace)
327
+ @fields = if fields
328
+ RecordSchema.make_field_objects(fields, names, self.namespace)
329
+ else
330
+ {}
331
+ end
238
332
  end
239
333
 
240
334
  def fields_hash
241
335
  @fields_hash ||= fields.inject({}){|hsh, field| hsh[field.name] = field; hsh }
242
336
  end
243
337
 
338
+ def fields_by_alias
339
+ @fields_by_alias ||= fields.each_with_object({}) do |field, hash|
340
+ if field.aliases
341
+ field.aliases.each do |a|
342
+ hash[a] = field
343
+ end
344
+ end
345
+ end
346
+ end
347
+
244
348
  def to_avro(names=Set.new)
245
349
  hsh = super
246
350
  return hsh unless hsh.is_a?(Hash)
@@ -285,8 +389,7 @@ module Avro
285
389
  def initialize(schemas, names=nil, default_namespace=nil)
286
390
  super(:union)
287
391
 
288
- schema_objects = []
289
- schemas.each_with_index do |schema, i|
392
+ @schemas = schemas.each_with_object([]) do |schema, schema_objects|
290
393
  new_schema = subparse(schema, names, default_namespace)
291
394
  ns_type = new_schema.type_sym
292
395
 
@@ -299,7 +402,6 @@ module Avro
299
402
  else
300
403
  schema_objects << new_schema
301
404
  end
302
- @schemas = schema_objects
303
405
  end
304
406
  end
305
407
 
@@ -309,29 +411,51 @@ module Avro
309
411
  end
310
412
 
311
413
  class EnumSchema < NamedSchema
312
- attr_reader :symbols
313
- def initialize(name, space, symbols, names=nil)
414
+ SYMBOL_REGEX = /^[A-Za-z_][A-Za-z0-9_]*$/
415
+
416
+ attr_reader :symbols, :doc, :default
417
+
418
+ def initialize(name, space, symbols, names=nil, doc=nil, default=nil, aliases=nil)
314
419
  if symbols.uniq.length < symbols.length
315
- fail_msg = 'Duplicate symbol: %s' % symbols
420
+ fail_msg = "Duplicate symbol: #{symbols}"
316
421
  raise Avro::SchemaParseError, fail_msg
317
422
  end
318
- super(:enum, name, space, names)
423
+
424
+ if !Avro.disable_enum_symbol_validation
425
+ invalid_symbols = symbols.select { |symbol| symbol !~ SYMBOL_REGEX }
426
+
427
+ if invalid_symbols.any?
428
+ raise SchemaParseError,
429
+ "Invalid symbols for #{name}: #{invalid_symbols.join(', ')} don't match #{SYMBOL_REGEX.inspect}"
430
+ end
431
+ end
432
+
433
+ if default && !symbols.include?(default)
434
+ raise Avro::SchemaParseError, "Default '#{default}' is not a valid symbol for enum #{name}"
435
+ end
436
+
437
+ super(:enum, name, space, names, doc, nil, aliases)
438
+ @default = default
319
439
  @symbols = symbols
320
440
  end
321
441
 
322
- def to_avro(names=Set.new)
442
+ def to_avro(_names=Set.new)
323
443
  avro = super
324
- avro.is_a?(Hash) ? avro.merge('symbols' => symbols) : avro
444
+ if avro.is_a?(Hash)
445
+ avro['symbols'] = symbols
446
+ avro['default'] = default if default
447
+ end
448
+ avro
325
449
  end
326
450
  end
327
451
 
328
452
  # Valid primitive types are in PRIMITIVE_TYPES.
329
453
  class PrimitiveSchema < Schema
330
- def initialize(type)
454
+ def initialize(type, logical_type=nil)
331
455
  if PRIMITIVE_TYPES_SYM.include?(type)
332
- super(type)
456
+ super(type, logical_type)
333
457
  elsif PRIMITIVE_TYPES.include?(type)
334
- super(type.to_sym)
458
+ super(type.to_sym, logical_type)
335
459
  else
336
460
  raise AvroError.new("#{type} is not a valid primitive type.")
337
461
  end
@@ -343,14 +467,32 @@ module Avro
343
467
  end
344
468
  end
345
469
 
470
+ class BytesSchema < PrimitiveSchema
471
+ attr_reader :precision, :scale
472
+ def initialize(type, logical_type=nil, precision=nil, scale=nil)
473
+ super(type.to_sym, logical_type)
474
+ @precision = precision
475
+ @scale = scale
476
+ end
477
+
478
+ def to_avro(names=nil)
479
+ avro = super
480
+ return avro if avro.is_a?(String)
481
+
482
+ avro['precision'] = precision if precision
483
+ avro['scale'] = scale if scale
484
+ avro
485
+ end
486
+ end
487
+
346
488
  class FixedSchema < NamedSchema
347
489
  attr_reader :size
348
- def initialize(name, space, size, names=nil)
490
+ def initialize(name, space, size, names=nil, logical_type=nil, aliases=nil)
349
491
  # Ensure valid cto args
350
- unless size.is_a?(Fixnum) || size.is_a?(Bignum)
492
+ unless size.is_a?(Integer)
351
493
  raise AvroError, 'Fixed Schema requires a valid integer for size property.'
352
494
  end
353
- super(:fixed, name, space, names)
495
+ super(:fixed, name, space, names, nil, logical_type, aliases)
354
496
  @size = size
355
497
  end
356
498
 
@@ -361,21 +503,48 @@ module Avro
361
503
  end
362
504
 
363
505
  class Field < Schema
364
- attr_reader :type, :name, :default, :order
506
+ attr_reader :type, :name, :default, :order, :doc, :aliases
365
507
 
366
- def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil)
508
+ def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil, doc=nil, aliases=nil)
367
509
  @type = subparse(type, names, namespace)
368
510
  @name = name
369
511
  @default = default
370
512
  @order = order
513
+ @doc = doc
514
+ @aliases = aliases
515
+ validate_aliases! if aliases
516
+ validate_default! if default? && !Avro.disable_field_default_validation
517
+ end
518
+
519
+ def default?
520
+ @default != :no_default
371
521
  end
372
522
 
373
523
  def to_avro(names=Set.new)
374
524
  {'name' => name, 'type' => type.to_avro(names)}.tap do |avro|
375
- avro['default'] = default unless default == :no_default
525
+ avro['default'] = default if default?
376
526
  avro['order'] = order if order
527
+ avro['doc'] = doc if doc
377
528
  end
378
529
  end
530
+
531
+ def alias_names
532
+ @alias_names ||= Array(aliases)
533
+ end
534
+
535
+ private
536
+
537
+ def validate_default!
538
+ type_for_default = if type.type_sym == :union
539
+ type.schemas.first
540
+ else
541
+ type
542
+ end
543
+
544
+ Avro::SchemaValidator.validate!(type_for_default, default)
545
+ rescue Avro::SchemaValidator::ValidationError => e
546
+ raise Avro::SchemaParseError, "Error validating default for #{name}: #{e.message}"
547
+ end
379
548
  end
380
549
  end
381
550