avro 1.8.2 → 1.9.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,170 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ module Avro
17
+ module SchemaCompatibility
18
+ # Perform a full, recursive check that a datum written using the writers_schema
19
+ # can be read using the readers_schema.
20
+ def self.can_read?(writers_schema, readers_schema)
21
+ Checker.new.can_read?(writers_schema, readers_schema)
22
+ end
23
+
24
+ # Perform a full, recursive check that a datum written using either the
25
+ # writers_schema or the readers_schema can be read using the other schema.
26
+ def self.mutual_read?(writers_schema, readers_schema)
27
+ Checker.new.mutual_read?(writers_schema, readers_schema)
28
+ end
29
+
30
+ # Perform a basic check that a datum written with the writers_schema could
31
+ # be read using the readers_schema. This check only includes matching the types,
32
+ # including schema promotion, and matching the full name for named types.
33
+ # Aliases for named types are not supported here, and the ruby implementation
34
+ # of Avro in general does not include support for aliases.
35
+ def self.match_schemas(writers_schema, readers_schema)
36
+ w_type = writers_schema.type_sym
37
+ r_type = readers_schema.type_sym
38
+
39
+ # This conditional is begging for some OO love.
40
+ if w_type == :union || r_type == :union
41
+ return true
42
+ end
43
+
44
+ if w_type == r_type
45
+ return true if Schema::PRIMITIVE_TYPES_SYM.include?(r_type)
46
+
47
+ case r_type
48
+ when :record
49
+ return writers_schema.fullname == readers_schema.fullname
50
+ when :error
51
+ return writers_schema.fullname == readers_schema.fullname
52
+ when :request
53
+ return true
54
+ when :fixed
55
+ return writers_schema.fullname == readers_schema.fullname &&
56
+ writers_schema.size == readers_schema.size
57
+ when :enum
58
+ return writers_schema.fullname == readers_schema.fullname
59
+ when :map
60
+ return match_schemas(writers_schema.values, readers_schema.values)
61
+ when :array
62
+ return match_schemas(writers_schema.items, readers_schema.items)
63
+ end
64
+ end
65
+
66
+ # Handle schema promotion
67
+ if w_type == :int && [:long, :float, :double].include?(r_type)
68
+ return true
69
+ elsif w_type == :long && [:float, :double].include?(r_type)
70
+ return true
71
+ elsif w_type == :float && r_type == :double
72
+ return true
73
+ elsif w_type == :string && r_type == :bytes
74
+ return true
75
+ elsif w_type == :bytes && r_type == :string
76
+ return true
77
+ end
78
+
79
+ return false
80
+ end
81
+
82
+ class Checker
83
+ SIMPLE_CHECKS = Schema::PRIMITIVE_TYPES_SYM.dup.add(:fixed).freeze
84
+
85
+ attr_reader :recursion_set
86
+ private :recursion_set
87
+
88
+ def initialize
89
+ @recursion_set = Set.new
90
+ end
91
+
92
+ def can_read?(writers_schema, readers_schema)
93
+ full_match_schemas(writers_schema, readers_schema)
94
+ end
95
+
96
+ def mutual_read?(writers_schema, readers_schema)
97
+ can_read?(writers_schema, readers_schema) && can_read?(readers_schema, writers_schema)
98
+ end
99
+
100
+ private
101
+
102
+ def full_match_schemas(writers_schema, readers_schema)
103
+ return true if recursion_in_progress?(writers_schema, readers_schema)
104
+
105
+ return false unless Avro::SchemaCompatibility.match_schemas(writers_schema, readers_schema)
106
+
107
+ if writers_schema.type_sym != :union && SIMPLE_CHECKS.include?(readers_schema.type_sym)
108
+ return true
109
+ end
110
+
111
+ case readers_schema.type_sym
112
+ when :record
113
+ match_record_schemas(writers_schema, readers_schema)
114
+ when :map
115
+ full_match_schemas(writers_schema.values, readers_schema.values)
116
+ when :array
117
+ full_match_schemas(writers_schema.items, readers_schema.items)
118
+ when :union
119
+ match_union_schemas(writers_schema, readers_schema)
120
+ when :enum
121
+ # reader's symbols must contain all writer's symbols
122
+ (writers_schema.symbols - readers_schema.symbols).empty?
123
+ else
124
+ if writers_schema.type_sym == :union && writers_schema.schemas.size == 1
125
+ full_match_schemas(writers_schema.schemas.first, readers_schema)
126
+ else
127
+ false
128
+ end
129
+ end
130
+ end
131
+
132
+ def match_union_schemas(writers_schema, readers_schema)
133
+ raise 'readers_schema must be a union' unless readers_schema.type_sym == :union
134
+
135
+ case writers_schema.type_sym
136
+ when :union
137
+ writers_schema.schemas.all? { |writer_type| full_match_schemas(writer_type, readers_schema) }
138
+ else
139
+ readers_schema.schemas.any? { |reader_type| full_match_schemas(writers_schema, reader_type) }
140
+ end
141
+ end
142
+
143
+ def match_record_schemas(writers_schema, readers_schema)
144
+ return false if writers_schema.type_sym == :union
145
+
146
+ writer_fields_hash = writers_schema.fields_hash
147
+ readers_schema.fields.each do |field|
148
+ if writer_fields_hash.key?(field.name)
149
+ return false unless full_match_schemas(writer_fields_hash[field.name].type, field.type)
150
+ else
151
+ return false unless field.default?
152
+ end
153
+ end
154
+
155
+ return true
156
+ end
157
+
158
+ def recursion_in_progress?(writers_schema, readers_schema)
159
+ key = [writers_schema.object_id, readers_schema.object_id]
160
+
161
+ if recursion_set.include?(key)
162
+ true
163
+ else
164
+ recursion_set.add(key)
165
+ false
166
+ end
167
+ end
168
+ end
169
+ end
170
+ end
@@ -6,7 +6,7 @@
6
6
  # "License"); you may not use this file except in compliance
7
7
  # with the License. You may obtain a copy of the License at
8
8
  #
9
- # http://www.apache.org/licenses/LICENSE-2.0
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
10
  #
11
11
  # Unless required by applicable law or agreed to in writing, software
12
12
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -0,0 +1,242 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ module Avro
18
+ class SchemaValidator
19
+ ROOT_IDENTIFIER = '.'.freeze
20
+ PATH_SEPARATOR = '.'.freeze
21
+ INT_RANGE = Schema::INT_MIN_VALUE..Schema::INT_MAX_VALUE
22
+ LONG_RANGE = Schema::LONG_MIN_VALUE..Schema::LONG_MAX_VALUE
23
+ COMPLEX_TYPES = [:array, :error, :map, :record, :request].freeze
24
+ BOOLEAN_VALUES = [true, false].freeze
25
+
26
+ class Result
27
+ attr_reader :errors
28
+
29
+ def initialize
30
+ @errors = []
31
+ end
32
+
33
+ def <<(error)
34
+ @errors << error
35
+ end
36
+
37
+ def add_error(path, message)
38
+ self << "at #{path} #{message}"
39
+ end
40
+
41
+ def failure?
42
+ @errors.any?
43
+ end
44
+
45
+ def to_s
46
+ errors.join("\n")
47
+ end
48
+ end
49
+
50
+ class ValidationError < StandardError
51
+ attr_reader :result
52
+
53
+ def initialize(result = Result.new)
54
+ @result = result
55
+ super
56
+ end
57
+
58
+ def to_s
59
+ result.to_s
60
+ end
61
+ end
62
+
63
+ TypeMismatchError = Class.new(ValidationError)
64
+
65
+ class << self
66
+ def validate!(expected_schema, logical_datum, options = { recursive: true, encoded: false, fail_on_extra_fields: false })
67
+ options ||= {}
68
+ options[:recursive] = true unless options.key?(:recursive)
69
+
70
+ result = Result.new
71
+ if options[:recursive]
72
+ validate_recursive(expected_schema, logical_datum, ROOT_IDENTIFIER, result, options)
73
+ else
74
+ validate_simple(expected_schema, logical_datum, ROOT_IDENTIFIER, result, options)
75
+ end
76
+ fail ValidationError, result if result.failure?
77
+ result
78
+ end
79
+
80
+ private
81
+
82
+ def validate_recursive(expected_schema, logical_datum, path, result, options = {})
83
+ datum = resolve_datum(expected_schema, logical_datum, options[:encoded])
84
+
85
+ validate_simple(expected_schema, datum, path, result, encoded: true)
86
+
87
+ case expected_schema.type_sym
88
+ when :array
89
+ validate_array(expected_schema, datum, path, result, options)
90
+ when :map
91
+ validate_map(expected_schema, datum, path, result, options)
92
+ when :union
93
+ validate_union(expected_schema, datum, path, result, options)
94
+ when :record, :error, :request
95
+ fail TypeMismatchError unless datum.is_a?(Hash)
96
+ expected_schema.fields.each do |field|
97
+ deeper_path = deeper_path_for_hash(field.name, path)
98
+ validate_recursive(field.type, datum[field.name], deeper_path, result, options)
99
+ end
100
+ if options[:fail_on_extra_fields]
101
+ datum_fields = datum.keys.map(&:to_s)
102
+ schema_fields = expected_schema.fields.map(&:name)
103
+ (datum_fields - schema_fields).each do |extra_field|
104
+ result.add_error(path, "extra field '#{extra_field}' - not in schema")
105
+ end
106
+ end
107
+ end
108
+ rescue TypeMismatchError
109
+ result.add_error(path, "expected type #{expected_schema.type_sym}, got #{actual_value_message(datum)}")
110
+ end
111
+
112
+ def validate_simple(expected_schema, logical_datum, path, result, options = {})
113
+ datum = resolve_datum(expected_schema, logical_datum, options[:encoded])
114
+ validate_type(expected_schema)
115
+
116
+ case expected_schema.type_sym
117
+ when :null
118
+ fail TypeMismatchError unless datum.nil?
119
+ when :boolean
120
+ fail TypeMismatchError unless BOOLEAN_VALUES.include?(datum)
121
+ when :string, :bytes
122
+ fail TypeMismatchError unless datum.is_a?(String)
123
+ when :int
124
+ fail TypeMismatchError unless datum.is_a?(Integer)
125
+ result.add_error(path, "out of bound value #{datum}") unless INT_RANGE.cover?(datum)
126
+ when :long
127
+ fail TypeMismatchError unless datum.is_a?(Integer)
128
+ result.add_error(path, "out of bound value #{datum}") unless LONG_RANGE.cover?(datum)
129
+ when :float, :double
130
+ fail TypeMismatchError unless datum.is_a?(Float) || datum.is_a?(Integer)
131
+ when :fixed
132
+ if datum.is_a? String
133
+ result.add_error(path, fixed_string_message(expected_schema.size, datum)) unless datum.bytesize == expected_schema.size
134
+ else
135
+ result.add_error(path, "expected fixed with size #{expected_schema.size}, got #{actual_value_message(datum)}")
136
+ end
137
+ when :enum
138
+ result.add_error(path, enum_message(expected_schema.symbols, datum)) unless expected_schema.symbols.include?(datum)
139
+ end
140
+ rescue TypeMismatchError
141
+ result.add_error(path, "expected type #{expected_schema.type_sym}, got #{actual_value_message(datum)}")
142
+ end
143
+
144
+ def resolve_datum(expected_schema, logical_datum, encoded)
145
+ if encoded
146
+ logical_datum
147
+ else
148
+ expected_schema.type_adapter.encode(logical_datum) rescue nil
149
+ end
150
+ end
151
+
152
+ def validate_type(expected_schema)
153
+ unless Avro::Schema::VALID_TYPES_SYM.include?(expected_schema.type_sym)
154
+ fail "Unexpected schema type #{expected_schema.type_sym} #{expected_schema.inspect}"
155
+ end
156
+ end
157
+
158
+ def fixed_string_message(size, datum)
159
+ "expected fixed with size #{size}, got \"#{datum}\" with size #{datum.bytesize}"
160
+ end
161
+
162
+ def enum_message(symbols, datum)
163
+ "expected enum with values #{symbols}, got #{actual_value_message(datum)}"
164
+ end
165
+
166
+ def validate_array(expected_schema, datum, path, result, options = {})
167
+ fail TypeMismatchError unless datum.is_a?(Array)
168
+ datum.each_with_index do |d, i|
169
+ validate_recursive(expected_schema.items, d, path + "[#{i}]", result, options)
170
+ end
171
+ end
172
+
173
+ def validate_map(expected_schema, datum, path, result, options = {})
174
+ fail TypeMismatchError unless datum.is_a?(Hash)
175
+ datum.keys.each do |k|
176
+ result.add_error(path, "unexpected key type '#{ruby_to_avro_type(k.class)}' in map") unless k.is_a?(String)
177
+ end
178
+ datum.each do |k, v|
179
+ deeper_path = deeper_path_for_hash(k, path)
180
+ validate_recursive(expected_schema.values, v, deeper_path, result, options)
181
+ end
182
+ end
183
+
184
+ def validate_union(expected_schema, datum, path, result, options = {})
185
+ if expected_schema.schemas.size == 1
186
+ validate_recursive(expected_schema.schemas.first, datum, path, result, options)
187
+ return
188
+ end
189
+ failures = []
190
+ compatible_type = first_compatible_type(datum, expected_schema, path, failures, options)
191
+ return unless compatible_type.nil?
192
+
193
+ complex_type_failed = failures.detect { |r| COMPLEX_TYPES.include?(r[:type]) }
194
+ if complex_type_failed
195
+ complex_type_failed[:result].errors.each { |error| result << error }
196
+ else
197
+ types = expected_schema.schemas.map { |s| "'#{s.type_sym}'" }.join(', ')
198
+ result.add_error(path, "expected union of [#{types}], got #{actual_value_message(datum)}")
199
+ end
200
+ end
201
+
202
+ def first_compatible_type(datum, expected_schema, path, failures, options = {})
203
+ expected_schema.schemas.find do |schema|
204
+ result = Result.new
205
+ validate_recursive(schema, datum, path, result, options)
206
+ failures << { type: schema.type_sym, result: result } if result.failure?
207
+ !result.failure?
208
+ end
209
+ end
210
+
211
+ def deeper_path_for_hash(sub_key, path)
212
+ "#{path}#{PATH_SEPARATOR}#{sub_key}".squeeze(PATH_SEPARATOR)
213
+ end
214
+
215
+ def actual_value_message(value)
216
+ avro_type = if value.is_a?(Integer)
217
+ ruby_integer_to_avro_type(value)
218
+ else
219
+ ruby_to_avro_type(value.class)
220
+ end
221
+ if value.nil?
222
+ avro_type
223
+ else
224
+ "#{avro_type} with value #{value.inspect}"
225
+ end
226
+ end
227
+
228
+ def ruby_to_avro_type(ruby_class)
229
+ {
230
+ NilClass => 'null',
231
+ String => 'string',
232
+ Float => 'float',
233
+ Hash => 'record'
234
+ }.fetch(ruby_class, ruby_class)
235
+ end
236
+
237
+ def ruby_integer_to_avro_type(value)
238
+ INT_RANGE.cover?(value) ? 'int' : 'long'
239
+ end
240
+ end
241
+ end
242
+ end
@@ -7,7 +7,7 @@
7
7
  # "License"); you may not use this file except in compliance
8
8
  # with the License. You may obtain a copy of the License at
9
9
  #
10
- # http://www.apache.org/licenses/LICENSE-2.0
10
+ # https://www.apache.org/licenses/LICENSE-2.0
11
11
  #
12
12
  # Unless required by applicable law or agreed to in writing,
13
13
  # software distributed under the License is distributed on an
@@ -7,7 +7,7 @@
7
7
  # "License"); you may not use this file except in compliance
8
8
  # with the License. You may obtain a copy of the License at
9
9
  #
10
- # http://www.apache.org/licenses/LICENSE-2.0
10
+ # https://www.apache.org/licenses/LICENSE-2.0
11
11
  #
12
12
  # Unless required by applicable law or agreed to in writing, software
13
13
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -27,15 +27,17 @@ class RandomData
27
27
  end
28
28
 
29
29
  def nextdata(schm, d=0)
30
+ return logical_nextdata(schm, d=0) unless schm.type_adapter.eql?(Avro::LogicalTypes::Identity)
31
+
30
32
  case schm.type_sym
31
33
  when :boolean
32
34
  rand > 0.5
33
35
  when :string
34
36
  randstr()
35
37
  when :int
36
- rand(Avro::Schema::INT_MAX_VALUE - Avro::Schema::INT_MIN_VALUE) + Avro::Schema::INT_MIN_VALUE
38
+ rand_int
37
39
  when :long
38
- rand(Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) + Avro::Schema::LONG_MIN_VALUE
40
+ rand_long
39
41
  when :float
40
42
  (-1024 + 2048 * rand).round.to_f
41
43
  when :double
@@ -79,6 +81,15 @@ class RandomData
79
81
  end
80
82
  end
81
83
 
84
+ def logical_nextdata(schm, _d=0)
85
+ case schm.logical_type
86
+ when 'date'
87
+ Avro::LogicalTypes::IntDate.decode(rand_int)
88
+ when 'timestamp-millis', 'timestamp-micros'
89
+ Avro::LogicalTypes::TimestampMicros.decode(rand_long)
90
+ end
91
+ end
92
+
82
93
  CHARPOOL = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789'
83
94
  BYTEPOOL = '12345abcd'
84
95
 
@@ -87,4 +98,12 @@ class RandomData
87
98
  rand(length+1).times { str << chars[rand(chars.size)] }
88
99
  str
89
100
  end
101
+
102
+ def rand_int
103
+ rand(Avro::Schema::INT_MAX_VALUE - Avro::Schema::INT_MIN_VALUE) + Avro::Schema::INT_MIN_VALUE
104
+ end
105
+
106
+ def rand_long
107
+ rand(Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) + Avro::Schema::LONG_MIN_VALUE
108
+ end
90
109
  end