avro 1.8.2 → 1.9.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -20,7 +20,7 @@ module Avro
20
20
  VALID_TYPE_SCHEMA_TYPES_SYM = Set.new(VALID_TYPE_SCHEMA_TYPES.map(&:to_sym))
21
21
  class ProtocolParseError < Avro::AvroError; end
22
22
 
23
- attr_reader :name, :namespace, :types, :messages, :md5
23
+ attr_reader :name, :namespace, :types, :messages, :md5, :doc
24
24
  def self.parse(protocol_string)
25
25
  json_data = MultiJson.load(protocol_string)
26
26
 
@@ -29,13 +29,14 @@ module Avro
29
29
  namespace = json_data['namespace']
30
30
  types = json_data['types']
31
31
  messages = json_data['messages']
32
- Protocol.new(name, namespace, types, messages)
32
+ doc = json_data['doc']
33
+ Protocol.new(name, namespace, types, messages, doc)
33
34
  else
34
35
  raise ProtocolParseError, "Not a JSON object: #{json_data}"
35
36
  end
36
37
  end
37
38
 
38
- def initialize(name, namespace=nil, types=nil, messages=nil)
39
+ def initialize(name, namespace=nil, types=nil, messages=nil, doc=nil)
39
40
  # Ensure valid ctor args
40
41
  if !name
41
42
  raise ProtocolParseError, 'Protocols must have a non-empty name.'
@@ -55,6 +56,7 @@ module Avro
55
56
  @types = parse_types(types, type_names)
56
57
  @messages = parse_messages(messages, type_names)
57
58
  @md5 = Digest::MD5.digest(to_s)
59
+ @doc = doc
58
60
  end
59
61
 
60
62
  def to_s
@@ -67,7 +69,6 @@ module Avro
67
69
 
68
70
  private
69
71
  def parse_types(types, type_names)
70
- type_objects = []
71
72
  types.collect do |type|
72
73
  # FIXME adding type.name to type_names is not defined in the
73
74
  # spec. Possible bug in the python impl and the spec.
@@ -92,7 +93,8 @@ module Avro
92
93
  request = body['request']
93
94
  response = body['response']
94
95
  errors = body['errors']
95
- message_objects[name] = Message.new(name, request, response, errors, names, namespace)
96
+ doc = body['doc']
97
+ message_objects[name] = Message.new(name, request, response, errors, names, namespace, doc)
96
98
  end
97
99
  message_objects
98
100
  end
@@ -111,14 +113,15 @@ module Avro
111
113
  end
112
114
 
113
115
  class Message
114
- attr_reader :name, :request, :response, :errors, :default_namespace
116
+ attr_reader :name, :request, :response, :errors, :default_namespace, :doc
115
117
 
116
- def initialize(name, request, response, errors=nil, names=nil, default_namespace=nil)
118
+ def initialize(name, request, response, errors=nil, names=nil, default_namespace=nil, doc=nil)
117
119
  @name = name
118
120
  @default_namespace = default_namespace
119
121
  @request = parse_request(request, names)
120
122
  @response = parse_response(response, names)
121
123
  @errors = parse_errors(errors, names) if errors
124
+ @doc = doc
122
125
  end
123
126
 
124
127
  def to_avro(names=Set.new)
@@ -127,6 +130,7 @@ module Avro
127
130
  'response' => response.to_avro(names)
128
131
  }.tap do |hash|
129
132
  hash['errors'] = errors.to_avro(names) if errors
133
+ hash['doc'] = @doc if @doc
130
134
  end
131
135
  end
132
136
 
@@ -14,6 +14,8 @@
14
14
  # See the License for the specific language governing permissions and
15
15
  # limitations under the License.
16
16
 
17
+ require 'avro/logical_types'
18
+
17
19
  module Avro
18
20
  class Schema
19
21
  # Sets of strings, for backwards compatibility. See below for sets of symbols,
@@ -40,6 +42,7 @@ module Avro
40
42
  def self.real_parse(json_obj, names=nil, default_namespace=nil)
41
43
  if json_obj.is_a? Hash
42
44
  type = json_obj['type']
45
+ logical_type = json_obj['logicalType']
43
46
  raise SchemaParseError, %Q(No "type" property: #{json_obj}) if type.nil?
44
47
 
45
48
  # Check that the type is valid before calling #to_sym, since symbols are never garbage
@@ -50,7 +53,7 @@ module Avro
50
53
 
51
54
  type_sym = type.to_sym
52
55
  if PRIMITIVE_TYPES_SYM.include?(type_sym)
53
- return PrimitiveSchema.new(type_sym)
56
+ return PrimitiveSchema.new(type_sym, logical_type)
54
57
 
55
58
  elsif NAMED_TYPES_SYM.include? type_sym
56
59
  name = json_obj['name']
@@ -58,13 +61,15 @@ module Avro
58
61
  case type_sym
59
62
  when :fixed
60
63
  size = json_obj['size']
61
- return FixedSchema.new(name, namespace, size, names)
64
+ return FixedSchema.new(name, namespace, size, names, logical_type)
62
65
  when :enum
63
66
  symbols = json_obj['symbols']
64
- return EnumSchema.new(name, namespace, symbols, names)
67
+ doc = json_obj['doc']
68
+ return EnumSchema.new(name, namespace, symbols, names, doc)
65
69
  when :record, :error
66
70
  fields = json_obj['fields']
67
- return RecordSchema.new(name, namespace, fields, names, type_sym)
71
+ doc = json_obj['doc']
72
+ return RecordSchema.new(name, namespace, fields, names, type_sym, doc)
68
73
  else
69
74
  raise SchemaParseError.new("Unknown named type: #{type}")
70
75
  end
@@ -91,52 +96,29 @@ module Avro
91
96
  end
92
97
 
93
98
  # Determine if a ruby datum is an instance of a schema
94
- def self.validate(expected_schema, datum)
95
- case expected_schema.type_sym
96
- when :null
97
- datum.nil?
98
- when :boolean
99
- datum == true || datum == false
100
- when :string, :bytes
101
- datum.is_a? String
102
- when :int
103
- (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
104
- (INT_MIN_VALUE <= datum) && (datum <= INT_MAX_VALUE)
105
- when :long
106
- (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
107
- (LONG_MIN_VALUE <= datum) && (datum <= LONG_MAX_VALUE)
108
- when :float, :double
109
- datum.is_a?(Float) || datum.is_a?(Fixnum) || datum.is_a?(Bignum)
110
- when :fixed
111
- datum.is_a?(String) && datum.bytesize == expected_schema.size
112
- when :enum
113
- expected_schema.symbols.include? datum
114
- when :array
115
- datum.is_a?(Array) &&
116
- datum.all?{|d| validate(expected_schema.items, d) }
117
- when :map
118
- datum.keys.all?{|k| k.is_a? String } &&
119
- datum.values.all?{|v| validate(expected_schema.values, v) }
120
- when :union
121
- expected_schema.schemas.any?{|s| validate(s, datum) }
122
- when :record, :error, :request
123
- datum.is_a?(Hash) &&
124
- expected_schema.fields.all?{|f| validate(f.type, datum[f.name]) }
125
- else
126
- raise "you suck #{expected_schema.inspect} is not allowed."
127
- end
99
+ def self.validate(expected_schema, logical_datum, options = { recursive: true, encoded: false })
100
+ SchemaValidator.validate!(expected_schema, logical_datum, options)
101
+ true
102
+ rescue SchemaValidator::ValidationError
103
+ false
128
104
  end
129
105
 
130
- def initialize(type)
106
+ def initialize(type, logical_type=nil)
131
107
  @type_sym = type.is_a?(Symbol) ? type : type.to_sym
108
+ @logical_type = logical_type
132
109
  end
133
110
 
134
111
  attr_reader :type_sym
112
+ attr_reader :logical_type
135
113
 
136
114
  # Returns the type as a string (rather than a symbol), for backwards compatibility.
137
115
  # Deprecated in favor of {#type_sym}.
138
116
  def type; @type_sym.to_s; end
139
117
 
118
+ def type_adapter
119
+ @type_adapter ||= LogicalTypes.type_adapter(type, logical_type) || LogicalTypes::Identity
120
+ end
121
+
140
122
  # Returns the MD5 fingerprint of the schema as an Integer.
141
123
  def md5_fingerprint
142
124
  parsing_form = SchemaNormalization.to_parsing_form(self)
@@ -149,6 +131,18 @@ module Avro
149
131
  Digest::SHA256.hexdigest(parsing_form).to_i(16)
150
132
  end
151
133
 
134
+ def read?(writers_schema)
135
+ SchemaCompatibility.can_read?(writers_schema, self)
136
+ end
137
+
138
+ def be_read?(other_schema)
139
+ other_schema.read?(self)
140
+ end
141
+
142
+ def mutual_read?(other_schema)
143
+ SchemaCompatibility.mutual_read?(other_schema, self)
144
+ end
145
+
152
146
  def ==(other, seen=nil)
153
147
  other.is_a?(Schema) && type_sym == other.type_sym
154
148
  end
@@ -172,7 +166,9 @@ module Avro
172
166
  end
173
167
 
174
168
  def to_avro(names=nil)
175
- {'type' => type}
169
+ props = {'type' => type}
170
+ props['logicalType'] = logical_type if logical_type
171
+ props
176
172
  end
177
173
 
178
174
  def to_s
@@ -181,9 +177,11 @@ module Avro
181
177
 
182
178
  class NamedSchema < Schema
183
179
  attr_reader :name, :namespace
184
- def initialize(type, name, namespace=nil, names=nil)
185
- super(type)
180
+
181
+ def initialize(type, name, namespace=nil, names=nil, doc=nil, logical_type=nil)
182
+ super(type, logical_type)
186
183
  @name, @namespace = Name.extract_namespace(name, namespace)
184
+ @doc = doc
187
185
  names = Name.add_name(names, self)
188
186
  end
189
187
 
@@ -194,6 +192,7 @@ module Avro
194
192
  end
195
193
  props = {'name' => @name}
196
194
  props.merge!('namespace' => @namespace) if @namespace
195
+ props.merge!('doc' => @doc) if @doc
197
196
  super.merge props
198
197
  end
199
198
 
@@ -203,7 +202,7 @@ module Avro
203
202
  end
204
203
 
205
204
  class RecordSchema < NamedSchema
206
- attr_reader :fields
205
+ attr_reader :fields, :doc
207
206
 
208
207
  def self.make_field_objects(field_data, names, namespace=nil)
209
208
  field_objects, field_names = [], Set.new
@@ -213,7 +212,8 @@ module Avro
213
212
  name = field['name']
214
213
  default = field.key?('default') ? field['default'] : :no_default
215
214
  order = field['order']
216
- new_field = Field.new(type, name, default, order, names, namespace)
215
+ doc = field['doc']
216
+ new_field = Field.new(type, name, default, order, names, namespace, doc)
217
217
  # make sure field name has not been used yet
218
218
  if field_names.include?(new_field.name)
219
219
  raise SchemaParseError, "Field name #{new_field.name.inspect} is already in use"
@@ -227,14 +227,20 @@ module Avro
227
227
  field_objects
228
228
  end
229
229
 
230
- def initialize(name, namespace, fields, names=nil, schema_type=:record)
230
+ def initialize(name, namespace, fields, names=nil, schema_type=:record, doc=nil)
231
231
  if schema_type == :request || schema_type == 'request'
232
232
  @type_sym = schema_type.to_sym
233
233
  @namespace = namespace
234
+ @name = nil
235
+ @doc = nil
234
236
  else
235
- super(schema_type, name, namespace, names)
237
+ super(schema_type, name, namespace, names, doc)
236
238
  end
237
- @fields = RecordSchema.make_field_objects(fields, names, self.namespace)
239
+ @fields = if fields
240
+ RecordSchema.make_field_objects(fields, names, self.namespace)
241
+ else
242
+ {}
243
+ end
238
244
  end
239
245
 
240
246
  def fields_hash
@@ -285,8 +291,7 @@ module Avro
285
291
  def initialize(schemas, names=nil, default_namespace=nil)
286
292
  super(:union)
287
293
 
288
- schema_objects = []
289
- schemas.each_with_index do |schema, i|
294
+ @schemas = schemas.each_with_object([]) do |schema, schema_objects|
290
295
  new_schema = subparse(schema, names, default_namespace)
291
296
  ns_type = new_schema.type_sym
292
297
 
@@ -299,7 +304,6 @@ module Avro
299
304
  else
300
305
  schema_objects << new_schema
301
306
  end
302
- @schemas = schema_objects
303
307
  end
304
308
  end
305
309
 
@@ -309,13 +313,14 @@ module Avro
309
313
  end
310
314
 
311
315
  class EnumSchema < NamedSchema
312
- attr_reader :symbols
313
- def initialize(name, space, symbols, names=nil)
316
+ attr_reader :symbols, :doc
317
+
318
+ def initialize(name, space, symbols, names=nil, doc=nil)
314
319
  if symbols.uniq.length < symbols.length
315
- fail_msg = 'Duplicate symbol: %s' % symbols
320
+ fail_msg = "Duplicate symbol: #{symbols}"
316
321
  raise Avro::SchemaParseError, fail_msg
317
322
  end
318
- super(:enum, name, space, names)
323
+ super(:enum, name, space, names, doc)
319
324
  @symbols = symbols
320
325
  end
321
326
 
@@ -327,11 +332,11 @@ module Avro
327
332
 
328
333
  # Valid primitive types are in PRIMITIVE_TYPES.
329
334
  class PrimitiveSchema < Schema
330
- def initialize(type)
335
+ def initialize(type, logical_type=nil)
331
336
  if PRIMITIVE_TYPES_SYM.include?(type)
332
- super(type)
337
+ super(type, logical_type)
333
338
  elsif PRIMITIVE_TYPES.include?(type)
334
- super(type.to_sym)
339
+ super(type.to_sym, logical_type)
335
340
  else
336
341
  raise AvroError.new("#{type} is not a valid primitive type.")
337
342
  end
@@ -345,12 +350,12 @@ module Avro
345
350
 
346
351
  class FixedSchema < NamedSchema
347
352
  attr_reader :size
348
- def initialize(name, space, size, names=nil)
353
+ def initialize(name, space, size, names=nil, logical_type=nil)
349
354
  # Ensure valid cto args
350
- unless size.is_a?(Fixnum) || size.is_a?(Bignum)
355
+ unless size.is_a?(Integer)
351
356
  raise AvroError, 'Fixed Schema requires a valid integer for size property.'
352
357
  end
353
- super(:fixed, name, space, names)
358
+ super(:fixed, name, space, names, nil, logical_type)
354
359
  @size = size
355
360
  end
356
361
 
@@ -361,21 +366,42 @@ module Avro
361
366
  end
362
367
 
363
368
  class Field < Schema
364
- attr_reader :type, :name, :default, :order
369
+ attr_reader :type, :name, :default, :order, :doc
365
370
 
366
- def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil)
371
+ def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil, doc=nil)
367
372
  @type = subparse(type, names, namespace)
368
373
  @name = name
369
374
  @default = default
370
375
  @order = order
376
+ @doc = doc
377
+ validate_default! if default? && !Avro.disable_field_default_validation
378
+ end
379
+
380
+ def default?
381
+ @default != :no_default
371
382
  end
372
383
 
373
384
  def to_avro(names=Set.new)
374
385
  {'name' => name, 'type' => type.to_avro(names)}.tap do |avro|
375
- avro['default'] = default unless default == :no_default
386
+ avro['default'] = default if default?
376
387
  avro['order'] = order if order
388
+ avro['doc'] = doc if doc
377
389
  end
378
390
  end
391
+
392
+ private
393
+
394
+ def validate_default!
395
+ type_for_default = if type.type_sym == :union
396
+ type.schemas.first
397
+ else
398
+ type
399
+ end
400
+
401
+ Avro::SchemaValidator.validate!(type_for_default, default)
402
+ rescue Avro::SchemaValidator::ValidationError => e
403
+ raise Avro::SchemaParseError, "Error validating default for #{name}: #{e.message}"
404
+ end
379
405
  end
380
406
  end
381
407
 
@@ -0,0 +1,170 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ module Avro
17
+ module SchemaCompatibility
18
+ # Perform a full, recursive check that a datum written using the writers_schema
19
+ # can be read using the readers_schema.
20
+ def self.can_read?(writers_schema, readers_schema)
21
+ Checker.new.can_read?(writers_schema, readers_schema)
22
+ end
23
+
24
+ # Perform a full, recursive check that a datum written using either the
25
+ # writers_schema or the readers_schema can be read using the other schema.
26
+ def self.mutual_read?(writers_schema, readers_schema)
27
+ Checker.new.mutual_read?(writers_schema, readers_schema)
28
+ end
29
+
30
+ # Perform a basic check that a datum written with the writers_schema could
31
+ # be read using the readers_schema. This check only includes matching the types,
32
+ # including schema promotion, and matching the full name for named types.
33
+ # Aliases for named types are not supported here, and the ruby implementation
34
+ # of Avro in general does not include support for aliases.
35
+ def self.match_schemas(writers_schema, readers_schema)
36
+ w_type = writers_schema.type_sym
37
+ r_type = readers_schema.type_sym
38
+
39
+ # This conditional is begging for some OO love.
40
+ if w_type == :union || r_type == :union
41
+ return true
42
+ end
43
+
44
+ if w_type == r_type
45
+ return true if Schema::PRIMITIVE_TYPES_SYM.include?(r_type)
46
+
47
+ case r_type
48
+ when :record
49
+ return writers_schema.fullname == readers_schema.fullname
50
+ when :error
51
+ return writers_schema.fullname == readers_schema.fullname
52
+ when :request
53
+ return true
54
+ when :fixed
55
+ return writers_schema.fullname == readers_schema.fullname &&
56
+ writers_schema.size == readers_schema.size
57
+ when :enum
58
+ return writers_schema.fullname == readers_schema.fullname
59
+ when :map
60
+ return match_schemas(writers_schema.values, readers_schema.values)
61
+ when :array
62
+ return match_schemas(writers_schema.items, readers_schema.items)
63
+ end
64
+ end
65
+
66
+ # Handle schema promotion
67
+ if w_type == :int && [:long, :float, :double].include?(r_type)
68
+ return true
69
+ elsif w_type == :long && [:float, :double].include?(r_type)
70
+ return true
71
+ elsif w_type == :float && r_type == :double
72
+ return true
73
+ elsif w_type == :string && r_type == :bytes
74
+ return true
75
+ elsif w_type == :bytes && r_type == :string
76
+ return true
77
+ end
78
+
79
+ return false
80
+ end
81
+
82
+ class Checker
83
+ SIMPLE_CHECKS = Schema::PRIMITIVE_TYPES_SYM.dup.add(:fixed).freeze
84
+
85
+ attr_reader :recursion_set
86
+ private :recursion_set
87
+
88
+ def initialize
89
+ @recursion_set = Set.new
90
+ end
91
+
92
+ def can_read?(writers_schema, readers_schema)
93
+ full_match_schemas(writers_schema, readers_schema)
94
+ end
95
+
96
+ def mutual_read?(writers_schema, readers_schema)
97
+ can_read?(writers_schema, readers_schema) && can_read?(readers_schema, writers_schema)
98
+ end
99
+
100
+ private
101
+
102
+ def full_match_schemas(writers_schema, readers_schema)
103
+ return true if recursion_in_progress?(writers_schema, readers_schema)
104
+
105
+ return false unless Avro::SchemaCompatibility.match_schemas(writers_schema, readers_schema)
106
+
107
+ if writers_schema.type_sym != :union && SIMPLE_CHECKS.include?(readers_schema.type_sym)
108
+ return true
109
+ end
110
+
111
+ case readers_schema.type_sym
112
+ when :record
113
+ match_record_schemas(writers_schema, readers_schema)
114
+ when :map
115
+ full_match_schemas(writers_schema.values, readers_schema.values)
116
+ when :array
117
+ full_match_schemas(writers_schema.items, readers_schema.items)
118
+ when :union
119
+ match_union_schemas(writers_schema, readers_schema)
120
+ when :enum
121
+ # reader's symbols must contain all writer's symbols
122
+ (writers_schema.symbols - readers_schema.symbols).empty?
123
+ else
124
+ if writers_schema.type_sym == :union && writers_schema.schemas.size == 1
125
+ full_match_schemas(writers_schema.schemas.first, readers_schema)
126
+ else
127
+ false
128
+ end
129
+ end
130
+ end
131
+
132
+ def match_union_schemas(writers_schema, readers_schema)
133
+ raise 'readers_schema must be a union' unless readers_schema.type_sym == :union
134
+
135
+ case writers_schema.type_sym
136
+ when :union
137
+ writers_schema.schemas.all? { |writer_type| full_match_schemas(writer_type, readers_schema) }
138
+ else
139
+ readers_schema.schemas.any? { |reader_type| full_match_schemas(writers_schema, reader_type) }
140
+ end
141
+ end
142
+
143
+ def match_record_schemas(writers_schema, readers_schema)
144
+ return false if writers_schema.type_sym == :union
145
+
146
+ writer_fields_hash = writers_schema.fields_hash
147
+ readers_schema.fields.each do |field|
148
+ if writer_fields_hash.key?(field.name)
149
+ return false unless full_match_schemas(writer_fields_hash[field.name].type, field.type)
150
+ else
151
+ return false unless field.default?
152
+ end
153
+ end
154
+
155
+ return true
156
+ end
157
+
158
+ def recursion_in_progress?(writers_schema, readers_schema)
159
+ key = [writers_schema.object_id, readers_schema.object_id]
160
+
161
+ if recursion_set.include?(key)
162
+ true
163
+ else
164
+ recursion_set.add(key)
165
+ false
166
+ end
167
+ end
168
+ end
169
+ end
170
+ end