avro 1.9.2 → 1.11.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  # Licensed to the Apache Software Foundation (ASF) under one
2
3
  # or more contributor license agreements. See the NOTICE file
3
4
  # distributed with this work for additional information
@@ -15,6 +16,9 @@
15
16
  # limitations under the License.
16
17
  module Avro
17
18
  module SchemaCompatibility
19
+ INT_COERCIBLE_TYPES_SYM = [:long, :float, :double].freeze
20
+ LONG_COERCIBLE_TYPES_SYM = [:float, :double].freeze
21
+
18
22
  # Perform a full, recursive check that a datum written using the writers_schema
19
23
  # can be read using the readers_schema.
20
24
  def self.can_read?(writers_schema, readers_schema)
@@ -28,11 +32,12 @@ module Avro
28
32
  end
29
33
 
30
34
  # Perform a basic check that a datum written with the writers_schema could
31
- # be read using the readers_schema. This check only includes matching the types,
32
- # including schema promotion, and matching the full name for named types.
33
- # Aliases for named types are not supported here, and the ruby implementation
34
- # of Avro in general does not include support for aliases.
35
+ # be read using the readers_schema. This check includes matching the types,
36
+ # including schema promotion, and matching the full name (including aliases) for named types.
35
37
  def self.match_schemas(writers_schema, readers_schema)
38
+ # Bypass deeper checks if the schemas are the same Ruby objects
39
+ return true if writers_schema.equal?(readers_schema)
40
+
36
41
  w_type = writers_schema.type_sym
37
42
  r_type = readers_schema.type_sym
38
43
 
@@ -42,31 +47,25 @@ module Avro
42
47
  end
43
48
 
44
49
  if w_type == r_type
45
- return true if Schema::PRIMITIVE_TYPES_SYM.include?(r_type)
50
+ return readers_schema.match_schema?(writers_schema) if Schema::PRIMITIVE_TYPES_SYM.include?(r_type)
46
51
 
47
52
  case r_type
48
- when :record
49
- return writers_schema.fullname == readers_schema.fullname
50
- when :error
51
- return writers_schema.fullname == readers_schema.fullname
52
53
  when :request
53
54
  return true
54
- when :fixed
55
- return writers_schema.fullname == readers_schema.fullname &&
56
- writers_schema.size == readers_schema.size
57
- when :enum
58
- return writers_schema.fullname == readers_schema.fullname
59
55
  when :map
60
56
  return match_schemas(writers_schema.values, readers_schema.values)
61
57
  when :array
62
58
  return match_schemas(writers_schema.items, readers_schema.items)
59
+ else
60
+ return readers_schema.match_schema?(writers_schema)
63
61
  end
64
62
  end
65
63
 
66
64
  # Handle schema promotion
67
- if w_type == :int && [:long, :float, :double].include?(r_type)
65
+ # rubocop:disable Lint/DuplicateBranch
66
+ if w_type == :int && INT_COERCIBLE_TYPES_SYM.include?(r_type)
68
67
  return true
69
- elsif w_type == :long && [:float, :double].include?(r_type)
68
+ elsif w_type == :long && LONG_COERCIBLE_TYPES_SYM.include?(r_type)
70
69
  return true
71
70
  elsif w_type == :float && r_type == :double
72
71
  return true
@@ -75,8 +74,13 @@ module Avro
75
74
  elsif w_type == :bytes && r_type == :string
76
75
  return true
77
76
  end
77
+ # rubocop:enable Lint/DuplicateBranch
78
78
 
79
- return false
79
+ if readers_schema.respond_to?(:match_schema?)
80
+ readers_schema.match_schema?(writers_schema)
81
+ else
82
+ false
83
+ end
80
84
  end
81
85
 
82
86
  class Checker
@@ -118,8 +122,8 @@ module Avro
118
122
  when :union
119
123
  match_union_schemas(writers_schema, readers_schema)
120
124
  when :enum
121
- # reader's symbols must contain all writer's symbols
122
- (writers_schema.symbols - readers_schema.symbols).empty?
125
+ # reader's symbols must contain all writer's symbols or reader has default
126
+ (writers_schema.symbols - readers_schema.symbols).empty? || !readers_schema.default.nil?
123
127
  else
124
128
  if writers_schema.type_sym == :union && writers_schema.schemas.size == 1
125
129
  full_match_schemas(writers_schema.schemas.first, readers_schema)
@@ -148,7 +152,14 @@ module Avro
148
152
  if writer_fields_hash.key?(field.name)
149
153
  return false unless full_match_schemas(writer_fields_hash[field.name].type, field.type)
150
154
  else
151
- return false unless field.default?
155
+ names = writer_fields_hash.keys & field.alias_names
156
+ if names.size > 1
157
+ return false
158
+ elsif names.size == 1
159
+ return false unless full_match_schemas(writer_fields_hash[names.first].type, field.type)
160
+ else
161
+ return false unless field.default?
162
+ end
152
163
  end
153
164
  end
154
165
 
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  # Licensed to the Apache Software Foundation (ASF) under one
2
3
  # or more contributor license agreements. See the NOTICE file
3
4
  # distributed with this work for additional information
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  # Licensed to the Apache Software Foundation (ASF) under one
2
3
  # or more contributor license agreements. See the NOTICE file
3
4
  # distributed with this work for additional information
@@ -16,22 +17,24 @@
16
17
 
17
18
  module Avro
18
19
  class SchemaValidator
19
- ROOT_IDENTIFIER = '.'.freeze
20
- PATH_SEPARATOR = '.'.freeze
21
- INT_RANGE = Schema::INT_MIN_VALUE..Schema::INT_MAX_VALUE
22
- LONG_RANGE = Schema::LONG_MIN_VALUE..Schema::LONG_MAX_VALUE
20
+ ROOT_IDENTIFIER = '.'
21
+ PATH_SEPARATOR = '.'
22
+ INT_RANGE = (Schema::INT_MIN_VALUE..Schema::INT_MAX_VALUE).freeze
23
+ LONG_RANGE = (Schema::LONG_MIN_VALUE..Schema::LONG_MAX_VALUE).freeze
23
24
  COMPLEX_TYPES = [:array, :error, :map, :record, :request].freeze
24
25
  BOOLEAN_VALUES = [true, false].freeze
26
+ DEFAULT_VALIDATION_OPTIONS = { recursive: true, encoded: false, fail_on_extra_fields: false }.freeze
27
+ RECURSIVE_SIMPLE_VALIDATION_OPTIONS = { encoded: true }.freeze
28
+ RUBY_CLASS_TO_AVRO_TYPE = {
29
+ NilClass => 'null',
30
+ String => 'string',
31
+ Float => 'float',
32
+ Hash => 'record'
33
+ }.freeze
25
34
 
26
35
  class Result
27
- attr_reader :errors
28
-
29
- def initialize
30
- @errors = []
31
- end
32
-
33
36
  def <<(error)
34
- @errors << error
37
+ errors << error
35
38
  end
36
39
 
37
40
  def add_error(path, message)
@@ -39,11 +42,16 @@ module Avro
39
42
  end
40
43
 
41
44
  def failure?
42
- @errors.any?
45
+ defined?(@errors) && errors.any?
43
46
  end
44
47
 
45
48
  def to_s
46
- errors.join("\n")
49
+ failure? ? errors.join("\n") : ''
50
+ end
51
+
52
+ def errors
53
+ # Use less memory for success results by lazily creating the errors array
54
+ @errors ||= []
47
55
  end
48
56
  end
49
57
 
@@ -63,12 +71,9 @@ module Avro
63
71
  TypeMismatchError = Class.new(ValidationError)
64
72
 
65
73
  class << self
66
- def validate!(expected_schema, logical_datum, options = { recursive: true, encoded: false, fail_on_extra_fields: false })
67
- options ||= {}
68
- options[:recursive] = true unless options.key?(:recursive)
69
-
74
+ def validate!(expected_schema, logical_datum, options = DEFAULT_VALIDATION_OPTIONS)
70
75
  result = Result.new
71
- if options[:recursive]
76
+ if options.fetch(:recursive, true)
72
77
  validate_recursive(expected_schema, logical_datum, ROOT_IDENTIFIER, result, options)
73
78
  else
74
79
  validate_simple(expected_schema, logical_datum, ROOT_IDENTIFIER, result, options)
@@ -79,10 +84,10 @@ module Avro
79
84
 
80
85
  private
81
86
 
82
- def validate_recursive(expected_schema, logical_datum, path, result, options = {})
87
+ def validate_recursive(expected_schema, logical_datum, path, result, options)
83
88
  datum = resolve_datum(expected_schema, logical_datum, options[:encoded])
84
89
 
85
- validate_simple(expected_schema, datum, path, result, encoded: true)
90
+ validate_simple(expected_schema, datum, path, result, RECURSIVE_SIMPLE_VALIDATION_OPTIONS)
86
91
 
87
92
  case expected_schema.type_sym
88
93
  when :array
@@ -95,7 +100,8 @@ module Avro
95
100
  fail TypeMismatchError unless datum.is_a?(Hash)
96
101
  expected_schema.fields.each do |field|
97
102
  deeper_path = deeper_path_for_hash(field.name, path)
98
- validate_recursive(field.type, datum[field.name], deeper_path, result, options)
103
+ nested_value = datum.key?(field.name) ? datum[field.name] : datum[field.name.to_sym]
104
+ validate_recursive(field.type, nested_value, deeper_path, result, options)
99
105
  end
100
106
  if options[:fail_on_extra_fields]
101
107
  datum_fields = datum.keys.map(&:to_s)
@@ -109,7 +115,7 @@ module Avro
109
115
  result.add_error(path, "expected type #{expected_schema.type_sym}, got #{actual_value_message(datum)}")
110
116
  end
111
117
 
112
- def validate_simple(expected_schema, logical_datum, path, result, options = {})
118
+ def validate_simple(expected_schema, logical_datum, path, result, options)
113
119
  datum = resolve_datum(expected_schema, logical_datum, options[:encoded])
114
120
  validate_type(expected_schema)
115
121
 
@@ -127,7 +133,7 @@ module Avro
127
133
  fail TypeMismatchError unless datum.is_a?(Integer)
128
134
  result.add_error(path, "out of bound value #{datum}") unless LONG_RANGE.cover?(datum)
129
135
  when :float, :double
130
- fail TypeMismatchError unless datum.is_a?(Float) || datum.is_a?(Integer)
136
+ fail TypeMismatchError unless datum.is_a?(Float) || datum.is_a?(Integer) || datum.is_a?(BigDecimal)
131
137
  when :fixed
132
138
  if datum.is_a? String
133
139
  result.add_error(path, fixed_string_message(expected_schema.size, datum)) unless datum.bytesize == expected_schema.size
@@ -163,14 +169,14 @@ module Avro
163
169
  "expected enum with values #{symbols}, got #{actual_value_message(datum)}"
164
170
  end
165
171
 
166
- def validate_array(expected_schema, datum, path, result, options = {})
172
+ def validate_array(expected_schema, datum, path, result, options)
167
173
  fail TypeMismatchError unless datum.is_a?(Array)
168
174
  datum.each_with_index do |d, i|
169
- validate_recursive(expected_schema.items, d, path + "[#{i}]", result, options)
175
+ validate_recursive(expected_schema.items, d, "#{path}[#{i}]", result, options)
170
176
  end
171
177
  end
172
178
 
173
- def validate_map(expected_schema, datum, path, result, options = {})
179
+ def validate_map(expected_schema, datum, path, result, options)
174
180
  fail TypeMismatchError unless datum.is_a?(Hash)
175
181
  datum.keys.each do |k|
176
182
  result.add_error(path, "unexpected key type '#{ruby_to_avro_type(k.class)}' in map") unless k.is_a?(String)
@@ -181,7 +187,7 @@ module Avro
181
187
  end
182
188
  end
183
189
 
184
- def validate_union(expected_schema, datum, path, result, options = {})
190
+ def validate_union(expected_schema, datum, path, result, options)
185
191
  if expected_schema.schemas.size == 1
186
192
  validate_recursive(expected_schema.schemas.first, datum, path, result, options)
187
193
  return
@@ -201,6 +207,9 @@ module Avro
201
207
 
202
208
  def first_compatible_type(datum, expected_schema, path, failures, options = {})
203
209
  expected_schema.schemas.find do |schema|
210
+ # Avoid expensive validation if we're just validating a nil
211
+ next datum.nil? if schema.type_sym == :null
212
+
204
213
  result = Result.new
205
214
  validate_recursive(schema, datum, path, result, options)
206
215
  failures << { type: schema.type_sym, result: result } if result.failure?
@@ -209,7 +218,9 @@ module Avro
209
218
  end
210
219
 
211
220
  def deeper_path_for_hash(sub_key, path)
212
- "#{path}#{PATH_SEPARATOR}#{sub_key}".squeeze(PATH_SEPARATOR)
221
+ deeper_path = +"#{path}#{PATH_SEPARATOR}#{sub_key}"
222
+ deeper_path.squeeze!(PATH_SEPARATOR)
223
+ deeper_path.freeze
213
224
  end
214
225
 
215
226
  def actual_value_message(value)
@@ -226,12 +237,7 @@ module Avro
226
237
  end
227
238
 
228
239
  def ruby_to_avro_type(ruby_class)
229
- {
230
- NilClass => 'null',
231
- String => 'string',
232
- Float => 'float',
233
- Hash => 'record'
234
- }.fetch(ruby_class, ruby_class)
240
+ RUBY_CLASS_TO_AVRO_TYPE.fetch(ruby_class, ruby_class)
235
241
  end
236
242
 
237
243
  def ruby_integer_to_avro_type(value)
data/lib/avro.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  # Licensed to the Apache Software Foundation (ASF) under one
2
3
  # or more contributor license agreements. See the NOTICE file
3
4
  # distributed with this work for additional information
@@ -22,24 +23,36 @@ require 'stringio'
22
23
  require 'zlib'
23
24
 
24
25
  module Avro
25
- VERSION = "FIXME"
26
+ VERSION = File.read("#{__dir__}/avro/VERSION.txt").freeze
26
27
 
27
28
  class AvroError < StandardError; end
28
29
 
29
30
  class AvroTypeError < Avro::AvroError
30
31
  def initialize(schm=nil, datum=nil, msg=nil)
31
- msg ||= "Not a #{schm.to_s}: #{datum}"
32
+ msg ||= "Not a #{schm}: #{datum}"
32
33
  super(msg)
33
34
  end
34
35
  end
35
36
 
36
37
  class << self
38
+ attr_writer :disable_enum_symbol_validation
37
39
  attr_writer :disable_field_default_validation
40
+ attr_writer :disable_schema_name_validation
41
+
42
+ def disable_enum_symbol_validation
43
+ @disable_enum_symbol_validation ||=
44
+ ENV.fetch('AVRO_DISABLE_ENUM_SYMBOL_VALIDATION', '') != ''
45
+ end
38
46
 
39
47
  def disable_field_default_validation
40
48
  @disable_field_default_validation ||=
41
49
  ENV.fetch('AVRO_DISABLE_FIELD_DEFAULT_VALIDATION', '') != ''
42
50
  end
51
+
52
+ def disable_schema_name_validation
53
+ @disable_schema_name_validation ||=
54
+ ENV.fetch('AVRO_DISABLE_SCHEMA_NAME_VALIDATION', '') != ''
55
+ end
43
56
  end
44
57
  end
45
58
 
data/test/case_finder.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  #
2
3
  # Licensed to the Apache Software Foundation (ASF) under one
3
4
  # or more contributor license agreements. See the NOTICE file
@@ -44,13 +45,18 @@ class CaseFinder
44
45
  private
45
46
 
46
47
  def scan_case
47
- if id = @scanner.scan(/\/\/ \d+\n/)
48
+ if (id = @scanner.scan(/\/\/ \d+\n/))
48
49
  while @scanner.skip(/\/\/ .*\n/); end
49
50
 
50
51
  input = scan_input
51
52
  canonical = scan_canonical
52
53
  fingerprint = scan_fingerprint
53
-
54
+ if not fingerprint and @cases
55
+ fingerprint = @cases[-1].fingerprint
56
+ end
57
+ if fingerprint
58
+ fingerprint = fingerprint.to_i & 0xFFFF_FFFF_FFFF_FFFF
59
+ end
54
60
  Case.new(id, input, canonical, fingerprint)
55
61
  else
56
62
  @scanner.skip(/.*\n/)
@@ -61,7 +67,7 @@ class CaseFinder
61
67
  def scan_item(name)
62
68
  if @scanner.scan(/<<#{name}\n/)
63
69
  lines = []
64
- while line = @scanner.scan(/.+\n/)
70
+ while (line = @scanner.scan(/.+\n/))
65
71
  break if line.chomp == name
66
72
  lines << line
67
73
  end
data/test/random_data.rb CHANGED
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env ruby
1
+ # frozen_string_literal: true
2
2
  # Licensed to the Apache Software Foundation (ASF) under one
3
3
  # or more contributor license agreements. See the NOTICE file
4
4
  # distributed with this work for additional information
@@ -6,9 +6,9 @@
6
6
  # to you under the Apache License, Version 2.0 (the
7
7
  # "License"); you may not use this file except in compliance
8
8
  # with the License. You may obtain a copy of the License at
9
- #
9
+ #
10
10
  # https://www.apache.org/licenses/LICENSE-2.0
11
- #
11
+ #
12
12
  # Unless required by applicable law or agreed to in writing, software
13
13
  # distributed under the License is distributed on an "AS IS" BASIS,
14
14
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -75,7 +75,7 @@ class RandomData
75
75
  return nil if len == 0
76
76
  symbols[rand(len)]
77
77
  when :fixed
78
- f = ""
78
+ f = +""
79
79
  schm.size.times { f << BYTEPOOL[rand(BYTEPOOL.size), 1] }
80
80
  f
81
81
  end
@@ -85,8 +85,10 @@ class RandomData
85
85
  case schm.logical_type
86
86
  when 'date'
87
87
  Avro::LogicalTypes::IntDate.decode(rand_int)
88
- when 'timestamp-millis', 'timestamp-micros'
88
+ when 'timestamp-micros'
89
89
  Avro::LogicalTypes::TimestampMicros.decode(rand_long)
90
+ when 'timestamp-millis'
91
+ Avro::LogicalTypes::TimestampMillis.decode(rand_long)
90
92
  end
91
93
  end
92
94
 
@@ -94,7 +96,7 @@ class RandomData
94
96
  BYTEPOOL = '12345abcd'
95
97
 
96
98
  def randstr(chars=CHARPOOL, length=20)
97
- str = ''
99
+ str = +''
98
100
  rand(length+1).times { str << chars[rand(chars.size)] }
99
101
  str
100
102
  end
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
  # Licensed to the Apache Software Foundation (ASF) under one
3
4
  # or more contributor license agreements. See the NOTICE file
4
5
  # distributed with this work for additional information
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
  # Licensed to the Apache Software Foundation (ASF) under one
3
4
  # or more contributor license agreements. See the NOTICE file
4
5
  # distributed with this work for additional information
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  # Licensed to the Apache Software Foundation (ASF) under one
2
3
  # or more contributor license agreements. See the NOTICE file
3
4
  # distributed with this work for additional information
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
  # Licensed to the Apache Software Foundation (ASF) under one
3
4
  # or more contributor license agreements. See the NOTICE file
4
5
  # distributed with this work for additional information
@@ -1,4 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
+ # frozen_string_literal: true
2
3
  # Licensed to the Apache Software Foundation (ASF) under one
3
4
  # or more contributor license agreements. See the NOTICE file
4
5
  # distributed with this work for additional information
@@ -180,6 +181,19 @@ JSON
180
181
  assert_equal records, ['a' * 10_000]
181
182
  end
182
183
 
184
+ def test_zstandard
185
+ Avro::DataFile.open('data.avr', 'w', '"string"', :zstandard) do |writer|
186
+ writer << 'a' * 10_000
187
+ end
188
+ assert(File.size('data.avr') < 600)
189
+
190
+ records = []
191
+ Avro::DataFile.open('data.avr') do |reader|
192
+ reader.each {|record| records << record }
193
+ end
194
+ assert_equal records, ['a' * 10_000]
195
+ end
196
+
183
197
  def test_append_to_deflated_file
184
198
  schema = Avro::Schema.parse('"string"')
185
199
  writer = Avro::IO::DatumWriter.new(schema)
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  # Licensed to the Apache Software Foundation (ASF) under one
2
3
  # or more contributor license agreements. See the NOTICE file
3
4
  # distributed with this work for additional information
@@ -34,4 +35,23 @@ class TestFingerprints < Test::Unit::TestCase
34
35
  assert_equal 28572620203319713300323544804233350633246234624932075150020181448463213378117,
35
36
  schema.sha256_fingerprint
36
37
  end
38
+
39
+ def test_crc_64_avro_fingerprint
40
+ schema = Avro::Schema.parse <<-SCHEMA
41
+ { "type": "int" }
42
+ SCHEMA
43
+
44
+ assert_equal 8247732601305521295, # hex: 0x7275d51a3f395c8f
45
+ schema.crc_64_avro_fingerprint
46
+ end
47
+
48
+ # This definitely belongs somewhere else
49
+ def test_single_object_encoding_header
50
+ schema = Avro::Schema.parse <<-SCHEMA
51
+ { "type": "int" }
52
+ SCHEMA
53
+
54
+ assert_equal ["c3", "01", "8f", "5c", "39", "3f", "1a", "D5", "75", "72"].map{|e| e.to_i(16) },
55
+ schema.single_object_encoding_header
56
+ end
37
57
  end
data/test/test_help.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  # Licensed to the Apache Software Foundation (ASF) under one
2
3
  # or more contributor license agreements. See the NOTICE file
3
4
  # distributed with this work for additional information