avro 1.9.0 → 1.10.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -7,7 +7,7 @@
7
7
  # "License"); you may not use this file except in compliance
8
8
  # with the License. You may obtain a copy of the License at
9
9
  #
10
- # http://www.apache.org/licenses/LICENSE-2.0
10
+ # https://www.apache.org/licenses/LICENSE-2.0
11
11
  #
12
12
  # Unless required by applicable law or agreed to in writing, software
13
13
  # distributed under the License is distributed on an "AS IS" BASIS,
data/lib/avro/protocol.rb CHANGED
@@ -6,7 +6,7 @@
6
6
  # "License"); you may not use this file except in compliance
7
7
  # with the License. You may obtain a copy of the License at
8
8
  #
9
- # http://www.apache.org/licenses/LICENSE-2.0
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
10
  #
11
11
  # Unless required by applicable law or agreed to in writing, software
12
12
  # distributed under the License is distributed on an "AS IS" BASIS,
data/lib/avro/schema.rb CHANGED
@@ -6,7 +6,7 @@
6
6
  # "License"); you may not use this file except in compliance
7
7
  # with the License. You may obtain a copy of the License at
8
8
  #
9
- # http://www.apache.org/licenses/LICENSE-2.0
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
10
  #
11
11
  # Unless required by applicable law or agreed to in writing, software
12
12
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -29,11 +29,15 @@ module Avro
29
29
  NAMED_TYPES_SYM = Set.new(NAMED_TYPES.map(&:to_sym))
30
30
  VALID_TYPES_SYM = Set.new(VALID_TYPES.map(&:to_sym))
31
31
 
32
+ NAME_REGEX = /^([A-Za-z_][A-Za-z0-9_]*)(\.([A-Za-z_][A-Za-z0-9_]*))*$/
33
+
32
34
  INT_MIN_VALUE = -(1 << 31)
33
35
  INT_MAX_VALUE = (1 << 31) - 1
34
36
  LONG_MIN_VALUE = -(1 << 63)
35
37
  LONG_MAX_VALUE = (1 << 63) - 1
36
38
 
39
+ DEFAULT_VALIDATE_OPTIONS = { recursive: true, encoded: false }.freeze
40
+
37
41
  def self.parse(json_string)
38
42
  real_parse(MultiJson.load(json_string), {})
39
43
  end
@@ -53,23 +57,34 @@ module Avro
53
57
 
54
58
  type_sym = type.to_sym
55
59
  if PRIMITIVE_TYPES_SYM.include?(type_sym)
56
- return PrimitiveSchema.new(type_sym, logical_type)
57
-
60
+ case type_sym
61
+ when :bytes
62
+ precision = json_obj['precision']
63
+ scale = json_obj['scale']
64
+ return BytesSchema.new(type_sym, logical_type, precision, scale)
65
+ else
66
+ return PrimitiveSchema.new(type_sym, logical_type)
67
+ end
58
68
  elsif NAMED_TYPES_SYM.include? type_sym
59
69
  name = json_obj['name']
70
+ if !Avro.disable_schema_name_validation && name !~ NAME_REGEX
71
+ raise SchemaParseError, "Name #{name} is invalid for type #{type}!"
72
+ end
60
73
  namespace = json_obj.include?('namespace') ? json_obj['namespace'] : default_namespace
74
+ aliases = json_obj['aliases']
61
75
  case type_sym
62
76
  when :fixed
63
77
  size = json_obj['size']
64
- return FixedSchema.new(name, namespace, size, names, logical_type)
78
+ return FixedSchema.new(name, namespace, size, names, logical_type, aliases)
65
79
  when :enum
66
80
  symbols = json_obj['symbols']
67
81
  doc = json_obj['doc']
68
- return EnumSchema.new(name, namespace, symbols, names, doc)
82
+ default = json_obj['default']
83
+ return EnumSchema.new(name, namespace, symbols, names, doc, default, aliases)
69
84
  when :record, :error
70
85
  fields = json_obj['fields']
71
86
  doc = json_obj['doc']
72
- return RecordSchema.new(name, namespace, fields, names, type_sym, doc)
87
+ return RecordSchema.new(name, namespace, fields, names, type_sym, doc, aliases)
73
88
  else
74
89
  raise SchemaParseError.new("Unknown named type: #{type}")
75
90
  end
@@ -96,7 +111,7 @@ module Avro
96
111
  end
97
112
 
98
113
  # Determine if a ruby datum is an instance of a schema
99
- def self.validate(expected_schema, logical_datum, options = { recursive: true, encoded: false })
114
+ def self.validate(expected_schema, logical_datum, options = DEFAULT_VALIDATE_OPTIONS)
100
115
  SchemaValidator.validate!(expected_schema, logical_datum, options)
101
116
  true
102
117
  rescue SchemaValidator::ValidationError
@@ -131,6 +146,49 @@ module Avro
131
146
  Digest::SHA256.hexdigest(parsing_form).to_i(16)
132
147
  end
133
148
 
149
+ CRC_EMPTY = 0xc15d213aa4d7a795
150
+
151
+ # The java library caches this value after initialized, so this pattern
152
+ # mimics that.
153
+ @@fp_table = nil
154
+ def initFPTable
155
+ @@fp_table = Array.new(256)
156
+ 256.times do |i|
157
+ fp = i
158
+ 8.times do
159
+ fp = (fp >> 1) ^ ( CRC_EMPTY & -( fp & 1 ) )
160
+ end
161
+ @@fp_table[i] = fp
162
+ end
163
+ end
164
+
165
+ def crc_64_avro_fingerprint
166
+ parsing_form = Avro::SchemaNormalization.to_parsing_form(self)
167
+ data_bytes = parsing_form.unpack("C*")
168
+
169
+ initFPTable unless @@fp_table
170
+
171
+ fp = CRC_EMPTY
172
+ data_bytes.each do |b|
173
+ fp = (fp >> 8) ^ @@fp_table[ (fp ^ b) & 0xff ]
174
+ end
175
+ fp
176
+ end
177
+
178
+ SINGLE_OBJECT_MAGIC_NUMBER = [0xC3, 0x01]
179
+ def single_object_encoding_header
180
+ [SINGLE_OBJECT_MAGIC_NUMBER, single_object_schema_fingerprint].flatten
181
+ end
182
+ def single_object_schema_fingerprint
183
+ working = crc_64_avro_fingerprint
184
+ bytes = Array.new(8)
185
+ 8.times do |i|
186
+ bytes[i] = (working & 0xff)
187
+ working = working >> 8
188
+ end
189
+ bytes
190
+ end
191
+
134
192
  def read?(writers_schema)
135
193
  SchemaCompatibility.can_read?(writers_schema, self)
136
194
  end
@@ -143,11 +201,11 @@ module Avro
143
201
  SchemaCompatibility.mutual_read?(other_schema, self)
144
202
  end
145
203
 
146
- def ==(other, seen=nil)
204
+ def ==(other, _seen=nil)
147
205
  other.is_a?(Schema) && type_sym == other.type_sym
148
206
  end
149
207
 
150
- def hash(seen=nil)
208
+ def hash(_seen=nil)
151
209
  type_sym.hash
152
210
  end
153
211
 
@@ -165,7 +223,7 @@ module Avro
165
223
  end
166
224
  end
167
225
 
168
- def to_avro(names=nil)
226
+ def to_avro(_names=nil)
169
227
  props = {'type' => type}
170
228
  props['logicalType'] = logical_type if logical_type
171
229
  props
@@ -175,14 +233,26 @@ module Avro
175
233
  MultiJson.dump to_avro
176
234
  end
177
235
 
236
+ def validate_aliases!
237
+ unless aliases.nil? ||
238
+ (aliases.is_a?(Array) && aliases.all? { |a| a.is_a?(String) })
239
+
240
+ raise Avro::SchemaParseError,
241
+ "Invalid aliases value #{aliases.inspect} for #{type} #{name}. Must be an array of strings."
242
+ end
243
+ end
244
+ private :validate_aliases!
245
+
178
246
  class NamedSchema < Schema
179
- attr_reader :name, :namespace
247
+ attr_reader :name, :namespace, :aliases
180
248
 
181
- def initialize(type, name, namespace=nil, names=nil, doc=nil, logical_type=nil)
249
+ def initialize(type, name, namespace=nil, names=nil, doc=nil, logical_type=nil, aliases=nil)
182
250
  super(type, logical_type)
183
251
  @name, @namespace = Name.extract_namespace(name, namespace)
184
- @doc = doc
185
- names = Name.add_name(names, self)
252
+ @doc = doc
253
+ @aliases = aliases
254
+ validate_aliases! if aliases
255
+ Name.add_name(names, self)
186
256
  end
187
257
 
188
258
  def to_avro(names=Set.new)
@@ -192,33 +262,53 @@ module Avro
192
262
  end
193
263
  props = {'name' => @name}
194
264
  props.merge!('namespace' => @namespace) if @namespace
195
- props.merge!('doc' => @doc) if @doc
265
+ props['namespace'] = @namespace if @namespace
266
+ props['doc'] = @doc if @doc
267
+ props['aliases'] = aliases if aliases && aliases.any?
196
268
  super.merge props
197
269
  end
198
270
 
199
271
  def fullname
200
272
  @fullname ||= Name.make_fullname(@name, @namespace)
201
273
  end
274
+
275
+ def fullname_aliases
276
+ @fullname_aliases ||= if aliases
277
+ aliases.map { |a| Name.make_fullname(a, namespace) }
278
+ else
279
+ []
280
+ end
281
+ end
282
+
283
+ def match_fullname?(name)
284
+ name == fullname || fullname_aliases.include?(name)
285
+ end
202
286
  end
203
287
 
204
288
  class RecordSchema < NamedSchema
205
289
  attr_reader :fields, :doc
206
290
 
207
291
  def self.make_field_objects(field_data, names, namespace=nil)
208
- field_objects, field_names = [], Set.new
209
- field_data.each_with_index do |field, i|
292
+ field_objects, field_names, alias_names = [], Set.new, Set.new
293
+ field_data.each do |field|
210
294
  if field.respond_to?(:[]) # TODO(jmhodges) wtffffff
211
295
  type = field['type']
212
296
  name = field['name']
213
297
  default = field.key?('default') ? field['default'] : :no_default
214
298
  order = field['order']
215
299
  doc = field['doc']
216
- new_field = Field.new(type, name, default, order, names, namespace, doc)
300
+ aliases = field['aliases']
301
+ new_field = Field.new(type, name, default, order, names, namespace, doc, aliases)
217
302
  # make sure field name has not been used yet
218
303
  if field_names.include?(new_field.name)
219
304
  raise SchemaParseError, "Field name #{new_field.name.inspect} is already in use"
220
305
  end
221
306
  field_names << new_field.name
307
+ # make sure alias has not be been used yet
308
+ if new_field.aliases && alias_names.intersect?(new_field.aliases.to_set)
309
+ raise SchemaParseError, "Alias #{(alias_names & new_field.aliases).to_a} already in use"
310
+ end
311
+ alias_names.merge(new_field.aliases) if new_field.aliases
222
312
  else
223
313
  raise SchemaParseError, "Not a valid field: #{field}"
224
314
  end
@@ -227,14 +317,14 @@ module Avro
227
317
  field_objects
228
318
  end
229
319
 
230
- def initialize(name, namespace, fields, names=nil, schema_type=:record, doc=nil)
320
+ def initialize(name, namespace, fields, names=nil, schema_type=:record, doc=nil, aliases=nil)
231
321
  if schema_type == :request || schema_type == 'request'
232
322
  @type_sym = schema_type.to_sym
233
323
  @namespace = namespace
234
324
  @name = nil
235
325
  @doc = nil
236
326
  else
237
- super(schema_type, name, namespace, names, doc)
327
+ super(schema_type, name, namespace, names, doc, nil, aliases)
238
328
  end
239
329
  @fields = if fields
240
330
  RecordSchema.make_field_objects(fields, names, self.namespace)
@@ -247,6 +337,16 @@ module Avro
247
337
  @fields_hash ||= fields.inject({}){|hsh, field| hsh[field.name] = field; hsh }
248
338
  end
249
339
 
340
+ def fields_by_alias
341
+ @fields_by_alias ||= fields.each_with_object({}) do |field, hash|
342
+ if field.aliases
343
+ field.aliases.each do |a|
344
+ hash[a] = field
345
+ end
346
+ end
347
+ end
348
+ end
349
+
250
350
  def to_avro(names=Set.new)
251
351
  hsh = super
252
352
  return hsh unless hsh.is_a?(Hash)
@@ -313,20 +413,41 @@ module Avro
313
413
  end
314
414
 
315
415
  class EnumSchema < NamedSchema
316
- attr_reader :symbols, :doc
416
+ SYMBOL_REGEX = /^[A-Za-z_][A-Za-z0-9_]*$/
317
417
 
318
- def initialize(name, space, symbols, names=nil, doc=nil)
418
+ attr_reader :symbols, :doc, :default
419
+
420
+ def initialize(name, space, symbols, names=nil, doc=nil, default=nil, aliases=nil)
319
421
  if symbols.uniq.length < symbols.length
320
422
  fail_msg = "Duplicate symbol: #{symbols}"
321
423
  raise Avro::SchemaParseError, fail_msg
322
424
  end
323
- super(:enum, name, space, names, doc)
425
+
426
+ if !Avro.disable_enum_symbol_validation
427
+ invalid_symbols = symbols.select { |symbol| symbol !~ SYMBOL_REGEX }
428
+
429
+ if invalid_symbols.any?
430
+ raise SchemaParseError,
431
+ "Invalid symbols for #{name}: #{invalid_symbols.join(', ')} don't match #{SYMBOL_REGEX.inspect}"
432
+ end
433
+ end
434
+
435
+ if default && !symbols.include?(default)
436
+ raise Avro::SchemaParseError, "Default '#{default}' is not a valid symbol for enum #{name}"
437
+ end
438
+
439
+ super(:enum, name, space, names, doc, nil, aliases)
440
+ @default = default
324
441
  @symbols = symbols
325
442
  end
326
443
 
327
- def to_avro(names=Set.new)
444
+ def to_avro(_names=Set.new)
328
445
  avro = super
329
- avro.is_a?(Hash) ? avro.merge('symbols' => symbols) : avro
446
+ if avro.is_a?(Hash)
447
+ avro['symbols'] = symbols
448
+ avro['default'] = default if default
449
+ end
450
+ avro
330
451
  end
331
452
  end
332
453
 
@@ -348,14 +469,32 @@ module Avro
348
469
  end
349
470
  end
350
471
 
472
+ class BytesSchema < PrimitiveSchema
473
+ attr_reader :precision, :scale
474
+ def initialize(type, logical_type=nil, precision=nil, scale=nil)
475
+ super(type.to_sym, logical_type)
476
+ @precision = precision
477
+ @scale = scale
478
+ end
479
+
480
+ def to_avro(names=nil)
481
+ avro = super
482
+ return avro if avro.is_a?(String)
483
+
484
+ avro['precision'] = precision if precision
485
+ avro['scale'] = scale if scale
486
+ avro
487
+ end
488
+ end
489
+
351
490
  class FixedSchema < NamedSchema
352
491
  attr_reader :size
353
- def initialize(name, space, size, names=nil, logical_type=nil)
492
+ def initialize(name, space, size, names=nil, logical_type=nil, aliases=nil)
354
493
  # Ensure valid cto args
355
494
  unless size.is_a?(Integer)
356
495
  raise AvroError, 'Fixed Schema requires a valid integer for size property.'
357
496
  end
358
- super(:fixed, name, space, names, nil, logical_type)
497
+ super(:fixed, name, space, names, nil, logical_type, aliases)
359
498
  @size = size
360
499
  end
361
500
 
@@ -366,14 +505,16 @@ module Avro
366
505
  end
367
506
 
368
507
  class Field < Schema
369
- attr_reader :type, :name, :default, :order, :doc
508
+ attr_reader :type, :name, :default, :order, :doc, :aliases
370
509
 
371
- def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil, doc=nil)
510
+ def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil, doc=nil, aliases=nil)
372
511
  @type = subparse(type, names, namespace)
373
512
  @name = name
374
513
  @default = default
375
514
  @order = order
376
515
  @doc = doc
516
+ @aliases = aliases
517
+ validate_aliases! if aliases
377
518
  validate_default! if default? && !Avro.disable_field_default_validation
378
519
  end
379
520
 
@@ -389,6 +530,10 @@ module Avro
389
530
  end
390
531
  end
391
532
 
533
+ def alias_names
534
+ @alias_names ||= Array(aliases)
535
+ end
536
+
392
537
  private
393
538
 
394
539
  def validate_default!
@@ -6,7 +6,7 @@
6
6
  # "License"); you may not use this file except in compliance
7
7
  # with the License. You may obtain a copy of the License at
8
8
  #
9
- # http://www.apache.org/licenses/LICENSE-2.0
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
10
  #
11
11
  # Unless required by applicable law or agreed to in writing, software
12
12
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -15,6 +15,9 @@
15
15
  # limitations under the License.
16
16
  module Avro
17
17
  module SchemaCompatibility
18
+ INT_COERCIBLE_TYPES_SYM = [:long, :float, :double].freeze
19
+ LONG_COERCIBLE_TYPES_SYM = [:float, :double].freeze
20
+
18
21
  # Perform a full, recursive check that a datum written using the writers_schema
19
22
  # can be read using the readers_schema.
20
23
  def self.can_read?(writers_schema, readers_schema)
@@ -28,11 +31,12 @@ module Avro
28
31
  end
29
32
 
30
33
  # Perform a basic check that a datum written with the writers_schema could
31
- # be read using the readers_schema. This check only includes matching the types,
32
- # including schema promotion, and matching the full name for named types.
33
- # Aliases for named types are not supported here, and the ruby implementation
34
- # of Avro in general does not include support for aliases.
34
+ # be read using the readers_schema. This check includes matching the types,
35
+ # including schema promotion, and matching the full name (including aliases) for named types.
35
36
  def self.match_schemas(writers_schema, readers_schema)
37
+ # Bypass deeper checks if the schemas are the same Ruby objects
38
+ return true if writers_schema.equal?(readers_schema)
39
+
36
40
  w_type = writers_schema.type_sym
37
41
  r_type = readers_schema.type_sym
38
42
 
@@ -46,16 +50,16 @@ module Avro
46
50
 
47
51
  case r_type
48
52
  when :record
49
- return writers_schema.fullname == readers_schema.fullname
53
+ return readers_schema.match_fullname?(writers_schema.fullname)
50
54
  when :error
51
- return writers_schema.fullname == readers_schema.fullname
55
+ return readers_schema.match_fullname?(writers_schema.fullname)
52
56
  when :request
53
57
  return true
54
58
  when :fixed
55
- return writers_schema.fullname == readers_schema.fullname &&
59
+ return readers_schema.match_fullname?(writers_schema.fullname) &&
56
60
  writers_schema.size == readers_schema.size
57
61
  when :enum
58
- return writers_schema.fullname == readers_schema.fullname
62
+ return readers_schema.match_fullname?(writers_schema.fullname)
59
63
  when :map
60
64
  return match_schemas(writers_schema.values, readers_schema.values)
61
65
  when :array
@@ -64,9 +68,9 @@ module Avro
64
68
  end
65
69
 
66
70
  # Handle schema promotion
67
- if w_type == :int && [:long, :float, :double].include?(r_type)
71
+ if w_type == :int && INT_COERCIBLE_TYPES_SYM.include?(r_type)
68
72
  return true
69
- elsif w_type == :long && [:float, :double].include?(r_type)
73
+ elsif w_type == :long && LONG_COERCIBLE_TYPES_SYM.include?(r_type)
70
74
  return true
71
75
  elsif w_type == :float && r_type == :double
72
76
  return true
@@ -118,8 +122,8 @@ module Avro
118
122
  when :union
119
123
  match_union_schemas(writers_schema, readers_schema)
120
124
  when :enum
121
- # reader's symbols must contain all writer's symbols
122
- (writers_schema.symbols - readers_schema.symbols).empty?
125
+ # reader's symbols must contain all writer's symbols or reader has default
126
+ (writers_schema.symbols - readers_schema.symbols).empty? || !readers_schema.default.nil?
123
127
  else
124
128
  if writers_schema.type_sym == :union && writers_schema.schemas.size == 1
125
129
  full_match_schemas(writers_schema.schemas.first, readers_schema)
@@ -148,7 +152,14 @@ module Avro
148
152
  if writer_fields_hash.key?(field.name)
149
153
  return false unless full_match_schemas(writer_fields_hash[field.name].type, field.type)
150
154
  else
151
- return false unless field.default?
155
+ names = writer_fields_hash.keys & field.alias_names
156
+ if names.size > 1
157
+ return false
158
+ elsif names.size == 1
159
+ return false unless full_match_schemas(writer_fields_hash[names.first].type, field.type)
160
+ else
161
+ return false unless field.default?
162
+ end
152
163
  end
153
164
  end
154
165