avro 1.8.2 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,7 +20,7 @@ module Avro
20
20
  VALID_TYPE_SCHEMA_TYPES_SYM = Set.new(VALID_TYPE_SCHEMA_TYPES.map(&:to_sym))
21
21
  class ProtocolParseError < Avro::AvroError; end
22
22
 
23
- attr_reader :name, :namespace, :types, :messages, :md5
23
+ attr_reader :name, :namespace, :types, :messages, :md5, :doc
24
24
  def self.parse(protocol_string)
25
25
  json_data = MultiJson.load(protocol_string)
26
26
 
@@ -29,13 +29,14 @@ module Avro
29
29
  namespace = json_data['namespace']
30
30
  types = json_data['types']
31
31
  messages = json_data['messages']
32
- Protocol.new(name, namespace, types, messages)
32
+ doc = json_data['doc']
33
+ Protocol.new(name, namespace, types, messages, doc)
33
34
  else
34
35
  raise ProtocolParseError, "Not a JSON object: #{json_data}"
35
36
  end
36
37
  end
37
38
 
38
- def initialize(name, namespace=nil, types=nil, messages=nil)
39
+ def initialize(name, namespace=nil, types=nil, messages=nil, doc=nil)
39
40
  # Ensure valid ctor args
40
41
  if !name
41
42
  raise ProtocolParseError, 'Protocols must have a non-empty name.'
@@ -55,6 +56,7 @@ module Avro
55
56
  @types = parse_types(types, type_names)
56
57
  @messages = parse_messages(messages, type_names)
57
58
  @md5 = Digest::MD5.digest(to_s)
59
+ @doc = doc
58
60
  end
59
61
 
60
62
  def to_s
@@ -67,7 +69,6 @@ module Avro
67
69
 
68
70
  private
69
71
  def parse_types(types, type_names)
70
- type_objects = []
71
72
  types.collect do |type|
72
73
  # FIXME adding type.name to type_names is not defined in the
73
74
  # spec. Possible bug in the python impl and the spec.
@@ -92,7 +93,8 @@ module Avro
92
93
  request = body['request']
93
94
  response = body['response']
94
95
  errors = body['errors']
95
- message_objects[name] = Message.new(name, request, response, errors, names, namespace)
96
+ doc = body['doc']
97
+ message_objects[name] = Message.new(name, request, response, errors, names, namespace, doc)
96
98
  end
97
99
  message_objects
98
100
  end
@@ -111,14 +113,15 @@ module Avro
111
113
  end
112
114
 
113
115
  class Message
114
- attr_reader :name, :request, :response, :errors, :default_namespace
116
+ attr_reader :name, :request, :response, :errors, :default_namespace, :doc
115
117
 
116
- def initialize(name, request, response, errors=nil, names=nil, default_namespace=nil)
118
+ def initialize(name, request, response, errors=nil, names=nil, default_namespace=nil, doc=nil)
117
119
  @name = name
118
120
  @default_namespace = default_namespace
119
121
  @request = parse_request(request, names)
120
122
  @response = parse_response(response, names)
121
123
  @errors = parse_errors(errors, names) if errors
124
+ @doc = doc
122
125
  end
123
126
 
124
127
  def to_avro(names=Set.new)
@@ -127,6 +130,7 @@ module Avro
127
130
  'response' => response.to_avro(names)
128
131
  }.tap do |hash|
129
132
  hash['errors'] = errors.to_avro(names) if errors
133
+ hash['doc'] = @doc if @doc
130
134
  end
131
135
  end
132
136
 
@@ -14,6 +14,8 @@
14
14
  # See the License for the specific language governing permissions and
15
15
  # limitations under the License.
16
16
 
17
+ require 'avro/logical_types'
18
+
17
19
  module Avro
18
20
  class Schema
19
21
  # Sets of strings, for backwards compatibility. See below for sets of symbols,
@@ -40,6 +42,7 @@ module Avro
40
42
  def self.real_parse(json_obj, names=nil, default_namespace=nil)
41
43
  if json_obj.is_a? Hash
42
44
  type = json_obj['type']
45
+ logical_type = json_obj['logicalType']
43
46
  raise SchemaParseError, %Q(No "type" property: #{json_obj}) if type.nil?
44
47
 
45
48
  # Check that the type is valid before calling #to_sym, since symbols are never garbage
@@ -50,7 +53,7 @@ module Avro
50
53
 
51
54
  type_sym = type.to_sym
52
55
  if PRIMITIVE_TYPES_SYM.include?(type_sym)
53
- return PrimitiveSchema.new(type_sym)
56
+ return PrimitiveSchema.new(type_sym, logical_type)
54
57
 
55
58
  elsif NAMED_TYPES_SYM.include? type_sym
56
59
  name = json_obj['name']
@@ -58,13 +61,15 @@ module Avro
58
61
  case type_sym
59
62
  when :fixed
60
63
  size = json_obj['size']
61
- return FixedSchema.new(name, namespace, size, names)
64
+ return FixedSchema.new(name, namespace, size, names, logical_type)
62
65
  when :enum
63
66
  symbols = json_obj['symbols']
64
- return EnumSchema.new(name, namespace, symbols, names)
67
+ doc = json_obj['doc']
68
+ return EnumSchema.new(name, namespace, symbols, names, doc)
65
69
  when :record, :error
66
70
  fields = json_obj['fields']
67
- return RecordSchema.new(name, namespace, fields, names, type_sym)
71
+ doc = json_obj['doc']
72
+ return RecordSchema.new(name, namespace, fields, names, type_sym, doc)
68
73
  else
69
74
  raise SchemaParseError.new("Unknown named type: #{type}")
70
75
  end
@@ -91,52 +96,29 @@ module Avro
91
96
  end
92
97
 
93
98
  # Determine if a ruby datum is an instance of a schema
94
- def self.validate(expected_schema, datum)
95
- case expected_schema.type_sym
96
- when :null
97
- datum.nil?
98
- when :boolean
99
- datum == true || datum == false
100
- when :string, :bytes
101
- datum.is_a? String
102
- when :int
103
- (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
104
- (INT_MIN_VALUE <= datum) && (datum <= INT_MAX_VALUE)
105
- when :long
106
- (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
107
- (LONG_MIN_VALUE <= datum) && (datum <= LONG_MAX_VALUE)
108
- when :float, :double
109
- datum.is_a?(Float) || datum.is_a?(Fixnum) || datum.is_a?(Bignum)
110
- when :fixed
111
- datum.is_a?(String) && datum.bytesize == expected_schema.size
112
- when :enum
113
- expected_schema.symbols.include? datum
114
- when :array
115
- datum.is_a?(Array) &&
116
- datum.all?{|d| validate(expected_schema.items, d) }
117
- when :map
118
- datum.keys.all?{|k| k.is_a? String } &&
119
- datum.values.all?{|v| validate(expected_schema.values, v) }
120
- when :union
121
- expected_schema.schemas.any?{|s| validate(s, datum) }
122
- when :record, :error, :request
123
- datum.is_a?(Hash) &&
124
- expected_schema.fields.all?{|f| validate(f.type, datum[f.name]) }
125
- else
126
- raise "you suck #{expected_schema.inspect} is not allowed."
127
- end
99
+ def self.validate(expected_schema, logical_datum, options = { recursive: true, encoded: false })
100
+ SchemaValidator.validate!(expected_schema, logical_datum, options)
101
+ true
102
+ rescue SchemaValidator::ValidationError
103
+ false
128
104
  end
129
105
 
130
- def initialize(type)
106
+ def initialize(type, logical_type=nil)
131
107
  @type_sym = type.is_a?(Symbol) ? type : type.to_sym
108
+ @logical_type = logical_type
132
109
  end
133
110
 
134
111
  attr_reader :type_sym
112
+ attr_reader :logical_type
135
113
 
136
114
  # Returns the type as a string (rather than a symbol), for backwards compatibility.
137
115
  # Deprecated in favor of {#type_sym}.
138
116
  def type; @type_sym.to_s; end
139
117
 
118
+ def type_adapter
119
+ @type_adapter ||= LogicalTypes.type_adapter(type, logical_type) || LogicalTypes::Identity
120
+ end
121
+
140
122
  # Returns the MD5 fingerprint of the schema as an Integer.
141
123
  def md5_fingerprint
142
124
  parsing_form = SchemaNormalization.to_parsing_form(self)
@@ -149,6 +131,18 @@ module Avro
149
131
  Digest::SHA256.hexdigest(parsing_form).to_i(16)
150
132
  end
151
133
 
134
+ def read?(writers_schema)
135
+ SchemaCompatibility.can_read?(writers_schema, self)
136
+ end
137
+
138
+ def be_read?(other_schema)
139
+ other_schema.read?(self)
140
+ end
141
+
142
+ def mutual_read?(other_schema)
143
+ SchemaCompatibility.mutual_read?(other_schema, self)
144
+ end
145
+
152
146
  def ==(other, seen=nil)
153
147
  other.is_a?(Schema) && type_sym == other.type_sym
154
148
  end
@@ -172,7 +166,9 @@ module Avro
172
166
  end
173
167
 
174
168
  def to_avro(names=nil)
175
- {'type' => type}
169
+ props = {'type' => type}
170
+ props['logicalType'] = logical_type if logical_type
171
+ props
176
172
  end
177
173
 
178
174
  def to_s
@@ -181,9 +177,11 @@ module Avro
181
177
 
182
178
  class NamedSchema < Schema
183
179
  attr_reader :name, :namespace
184
- def initialize(type, name, namespace=nil, names=nil)
185
- super(type)
180
+
181
+ def initialize(type, name, namespace=nil, names=nil, doc=nil, logical_type=nil)
182
+ super(type, logical_type)
186
183
  @name, @namespace = Name.extract_namespace(name, namespace)
184
+ @doc = doc
187
185
  names = Name.add_name(names, self)
188
186
  end
189
187
 
@@ -194,6 +192,7 @@ module Avro
194
192
  end
195
193
  props = {'name' => @name}
196
194
  props.merge!('namespace' => @namespace) if @namespace
195
+ props.merge!('doc' => @doc) if @doc
197
196
  super.merge props
198
197
  end
199
198
 
@@ -203,7 +202,7 @@ module Avro
203
202
  end
204
203
 
205
204
  class RecordSchema < NamedSchema
206
- attr_reader :fields
205
+ attr_reader :fields, :doc
207
206
 
208
207
  def self.make_field_objects(field_data, names, namespace=nil)
209
208
  field_objects, field_names = [], Set.new
@@ -213,7 +212,8 @@ module Avro
213
212
  name = field['name']
214
213
  default = field.key?('default') ? field['default'] : :no_default
215
214
  order = field['order']
216
- new_field = Field.new(type, name, default, order, names, namespace)
215
+ doc = field['doc']
216
+ new_field = Field.new(type, name, default, order, names, namespace, doc)
217
217
  # make sure field name has not been used yet
218
218
  if field_names.include?(new_field.name)
219
219
  raise SchemaParseError, "Field name #{new_field.name.inspect} is already in use"
@@ -227,14 +227,20 @@ module Avro
227
227
  field_objects
228
228
  end
229
229
 
230
- def initialize(name, namespace, fields, names=nil, schema_type=:record)
230
+ def initialize(name, namespace, fields, names=nil, schema_type=:record, doc=nil)
231
231
  if schema_type == :request || schema_type == 'request'
232
232
  @type_sym = schema_type.to_sym
233
233
  @namespace = namespace
234
+ @name = nil
235
+ @doc = nil
234
236
  else
235
- super(schema_type, name, namespace, names)
237
+ super(schema_type, name, namespace, names, doc)
236
238
  end
237
- @fields = RecordSchema.make_field_objects(fields, names, self.namespace)
239
+ @fields = if fields
240
+ RecordSchema.make_field_objects(fields, names, self.namespace)
241
+ else
242
+ {}
243
+ end
238
244
  end
239
245
 
240
246
  def fields_hash
@@ -285,8 +291,7 @@ module Avro
285
291
  def initialize(schemas, names=nil, default_namespace=nil)
286
292
  super(:union)
287
293
 
288
- schema_objects = []
289
- schemas.each_with_index do |schema, i|
294
+ @schemas = schemas.each_with_object([]) do |schema, schema_objects|
290
295
  new_schema = subparse(schema, names, default_namespace)
291
296
  ns_type = new_schema.type_sym
292
297
 
@@ -299,7 +304,6 @@ module Avro
299
304
  else
300
305
  schema_objects << new_schema
301
306
  end
302
- @schemas = schema_objects
303
307
  end
304
308
  end
305
309
 
@@ -309,13 +313,14 @@ module Avro
309
313
  end
310
314
 
311
315
  class EnumSchema < NamedSchema
312
- attr_reader :symbols
313
- def initialize(name, space, symbols, names=nil)
316
+ attr_reader :symbols, :doc
317
+
318
+ def initialize(name, space, symbols, names=nil, doc=nil)
314
319
  if symbols.uniq.length < symbols.length
315
- fail_msg = 'Duplicate symbol: %s' % symbols
320
+ fail_msg = "Duplicate symbol: #{symbols}"
316
321
  raise Avro::SchemaParseError, fail_msg
317
322
  end
318
- super(:enum, name, space, names)
323
+ super(:enum, name, space, names, doc)
319
324
  @symbols = symbols
320
325
  end
321
326
 
@@ -327,11 +332,11 @@ module Avro
327
332
 
328
333
  # Valid primitive types are in PRIMITIVE_TYPES.
329
334
  class PrimitiveSchema < Schema
330
- def initialize(type)
335
+ def initialize(type, logical_type=nil)
331
336
  if PRIMITIVE_TYPES_SYM.include?(type)
332
- super(type)
337
+ super(type, logical_type)
333
338
  elsif PRIMITIVE_TYPES.include?(type)
334
- super(type.to_sym)
339
+ super(type.to_sym, logical_type)
335
340
  else
336
341
  raise AvroError.new("#{type} is not a valid primitive type.")
337
342
  end
@@ -345,12 +350,12 @@ module Avro
345
350
 
346
351
  class FixedSchema < NamedSchema
347
352
  attr_reader :size
348
- def initialize(name, space, size, names=nil)
353
+ def initialize(name, space, size, names=nil, logical_type=nil)
349
354
  # Ensure valid cto args
350
- unless size.is_a?(Fixnum) || size.is_a?(Bignum)
355
+ unless size.is_a?(Integer)
351
356
  raise AvroError, 'Fixed Schema requires a valid integer for size property.'
352
357
  end
353
- super(:fixed, name, space, names)
358
+ super(:fixed, name, space, names, nil, logical_type)
354
359
  @size = size
355
360
  end
356
361
 
@@ -361,21 +366,42 @@ module Avro
361
366
  end
362
367
 
363
368
  class Field < Schema
364
- attr_reader :type, :name, :default, :order
369
+ attr_reader :type, :name, :default, :order, :doc
365
370
 
366
- def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil)
371
+ def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil, doc=nil)
367
372
  @type = subparse(type, names, namespace)
368
373
  @name = name
369
374
  @default = default
370
375
  @order = order
376
+ @doc = doc
377
+ validate_default! if default? && !Avro.disable_field_default_validation
378
+ end
379
+
380
+ def default?
381
+ @default != :no_default
371
382
  end
372
383
 
373
384
  def to_avro(names=Set.new)
374
385
  {'name' => name, 'type' => type.to_avro(names)}.tap do |avro|
375
- avro['default'] = default unless default == :no_default
386
+ avro['default'] = default if default?
376
387
  avro['order'] = order if order
388
+ avro['doc'] = doc if doc
377
389
  end
378
390
  end
391
+
392
+ private
393
+
394
+ def validate_default!
395
+ type_for_default = if type.type_sym == :union
396
+ type.schemas.first
397
+ else
398
+ type
399
+ end
400
+
401
+ Avro::SchemaValidator.validate!(type_for_default, default)
402
+ rescue Avro::SchemaValidator::ValidationError => e
403
+ raise Avro::SchemaParseError, "Error validating default for #{name}: #{e.message}"
404
+ end
379
405
  end
380
406
  end
381
407
 
@@ -0,0 +1,170 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ module Avro
17
+ module SchemaCompatibility
18
+ # Perform a full, recursive check that a datum written using the writers_schema
19
+ # can be read using the readers_schema.
20
+ def self.can_read?(writers_schema, readers_schema)
21
+ Checker.new.can_read?(writers_schema, readers_schema)
22
+ end
23
+
24
+ # Perform a full, recursive check that a datum written using either the
25
+ # writers_schema or the readers_schema can be read using the other schema.
26
+ def self.mutual_read?(writers_schema, readers_schema)
27
+ Checker.new.mutual_read?(writers_schema, readers_schema)
28
+ end
29
+
30
+ # Perform a basic check that a datum written with the writers_schema could
31
+ # be read using the readers_schema. This check only includes matching the types,
32
+ # including schema promotion, and matching the full name for named types.
33
+ # Aliases for named types are not supported here, and the ruby implementation
34
+ # of Avro in general does not include support for aliases.
35
+ def self.match_schemas(writers_schema, readers_schema)
36
+ w_type = writers_schema.type_sym
37
+ r_type = readers_schema.type_sym
38
+
39
+ # This conditional is begging for some OO love.
40
+ if w_type == :union || r_type == :union
41
+ return true
42
+ end
43
+
44
+ if w_type == r_type
45
+ return true if Schema::PRIMITIVE_TYPES_SYM.include?(r_type)
46
+
47
+ case r_type
48
+ when :record
49
+ return writers_schema.fullname == readers_schema.fullname
50
+ when :error
51
+ return writers_schema.fullname == readers_schema.fullname
52
+ when :request
53
+ return true
54
+ when :fixed
55
+ return writers_schema.fullname == readers_schema.fullname &&
56
+ writers_schema.size == readers_schema.size
57
+ when :enum
58
+ return writers_schema.fullname == readers_schema.fullname
59
+ when :map
60
+ return match_schemas(writers_schema.values, readers_schema.values)
61
+ when :array
62
+ return match_schemas(writers_schema.items, readers_schema.items)
63
+ end
64
+ end
65
+
66
+ # Handle schema promotion
67
+ if w_type == :int && [:long, :float, :double].include?(r_type)
68
+ return true
69
+ elsif w_type == :long && [:float, :double].include?(r_type)
70
+ return true
71
+ elsif w_type == :float && r_type == :double
72
+ return true
73
+ elsif w_type == :string && r_type == :bytes
74
+ return true
75
+ elsif w_type == :bytes && r_type == :string
76
+ return true
77
+ end
78
+
79
+ return false
80
+ end
81
+
82
+ class Checker
83
+ SIMPLE_CHECKS = Schema::PRIMITIVE_TYPES_SYM.dup.add(:fixed).freeze
84
+
85
+ attr_reader :recursion_set
86
+ private :recursion_set
87
+
88
+ def initialize
89
+ @recursion_set = Set.new
90
+ end
91
+
92
+ def can_read?(writers_schema, readers_schema)
93
+ full_match_schemas(writers_schema, readers_schema)
94
+ end
95
+
96
+ def mutual_read?(writers_schema, readers_schema)
97
+ can_read?(writers_schema, readers_schema) && can_read?(readers_schema, writers_schema)
98
+ end
99
+
100
+ private
101
+
102
+ def full_match_schemas(writers_schema, readers_schema)
103
+ return true if recursion_in_progress?(writers_schema, readers_schema)
104
+
105
+ return false unless Avro::SchemaCompatibility.match_schemas(writers_schema, readers_schema)
106
+
107
+ if writers_schema.type_sym != :union && SIMPLE_CHECKS.include?(readers_schema.type_sym)
108
+ return true
109
+ end
110
+
111
+ case readers_schema.type_sym
112
+ when :record
113
+ match_record_schemas(writers_schema, readers_schema)
114
+ when :map
115
+ full_match_schemas(writers_schema.values, readers_schema.values)
116
+ when :array
117
+ full_match_schemas(writers_schema.items, readers_schema.items)
118
+ when :union
119
+ match_union_schemas(writers_schema, readers_schema)
120
+ when :enum
121
+ # reader's symbols must contain all writer's symbols
122
+ (writers_schema.symbols - readers_schema.symbols).empty?
123
+ else
124
+ if writers_schema.type_sym == :union && writers_schema.schemas.size == 1
125
+ full_match_schemas(writers_schema.schemas.first, readers_schema)
126
+ else
127
+ false
128
+ end
129
+ end
130
+ end
131
+
132
+ def match_union_schemas(writers_schema, readers_schema)
133
+ raise 'readers_schema must be a union' unless readers_schema.type_sym == :union
134
+
135
+ case writers_schema.type_sym
136
+ when :union
137
+ writers_schema.schemas.all? { |writer_type| full_match_schemas(writer_type, readers_schema) }
138
+ else
139
+ readers_schema.schemas.any? { |reader_type| full_match_schemas(writers_schema, reader_type) }
140
+ end
141
+ end
142
+
143
+ def match_record_schemas(writers_schema, readers_schema)
144
+ return false if writers_schema.type_sym == :union
145
+
146
+ writer_fields_hash = writers_schema.fields_hash
147
+ readers_schema.fields.each do |field|
148
+ if writer_fields_hash.key?(field.name)
149
+ return false unless full_match_schemas(writer_fields_hash[field.name].type, field.type)
150
+ else
151
+ return false unless field.default?
152
+ end
153
+ end
154
+
155
+ return true
156
+ end
157
+
158
+ def recursion_in_progress?(writers_schema, readers_schema)
159
+ key = [writers_schema.object_id, readers_schema.object_id]
160
+
161
+ if recursion_set.include?(key)
162
+ true
163
+ else
164
+ recursion_set.add(key)
165
+ false
166
+ end
167
+ end
168
+ end
169
+ end
170
+ end