avro 1.8.2 → 1.10.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,175 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ module Avro
17
+ module SchemaCompatibility
18
+ # Perform a full, recursive check that a datum written using the writers_schema
19
+ # can be read using the readers_schema.
20
+ def self.can_read?(writers_schema, readers_schema)
21
+ Checker.new.can_read?(writers_schema, readers_schema)
22
+ end
23
+
24
+ # Perform a full, recursive check that a datum written using either the
25
+ # writers_schema or the readers_schema can be read using the other schema.
26
+ def self.mutual_read?(writers_schema, readers_schema)
27
+ Checker.new.mutual_read?(writers_schema, readers_schema)
28
+ end
29
+
30
+ # Perform a basic check that a datum written with the writers_schema could
31
+ # be read using the readers_schema. This check includes matching the types,
32
+ # including schema promotion, and matching the full name (including aliases) for named types.
33
+ def self.match_schemas(writers_schema, readers_schema)
34
+ w_type = writers_schema.type_sym
35
+ r_type = readers_schema.type_sym
36
+
37
+ # This conditional is begging for some OO love.
38
+ if w_type == :union || r_type == :union
39
+ return true
40
+ end
41
+
42
+ if w_type == r_type
43
+ return true if Schema::PRIMITIVE_TYPES_SYM.include?(r_type)
44
+
45
+ case r_type
46
+ when :record
47
+ return readers_schema.match_fullname?(writers_schema.fullname)
48
+ when :error
49
+ return readers_schema.match_fullname?(writers_schema.fullname)
50
+ when :request
51
+ return true
52
+ when :fixed
53
+ return readers_schema.match_fullname?(writers_schema.fullname) &&
54
+ writers_schema.size == readers_schema.size
55
+ when :enum
56
+ return readers_schema.match_fullname?(writers_schema.fullname)
57
+ when :map
58
+ return match_schemas(writers_schema.values, readers_schema.values)
59
+ when :array
60
+ return match_schemas(writers_schema.items, readers_schema.items)
61
+ end
62
+ end
63
+
64
+ # Handle schema promotion
65
+ if w_type == :int && [:long, :float, :double].include?(r_type)
66
+ return true
67
+ elsif w_type == :long && [:float, :double].include?(r_type)
68
+ return true
69
+ elsif w_type == :float && r_type == :double
70
+ return true
71
+ elsif w_type == :string && r_type == :bytes
72
+ return true
73
+ elsif w_type == :bytes && r_type == :string
74
+ return true
75
+ end
76
+
77
+ return false
78
+ end
79
+
80
+ class Checker
81
+ SIMPLE_CHECKS = Schema::PRIMITIVE_TYPES_SYM.dup.add(:fixed).freeze
82
+
83
+ attr_reader :recursion_set
84
+ private :recursion_set
85
+
86
+ def initialize
87
+ @recursion_set = Set.new
88
+ end
89
+
90
+ def can_read?(writers_schema, readers_schema)
91
+ full_match_schemas(writers_schema, readers_schema)
92
+ end
93
+
94
+ def mutual_read?(writers_schema, readers_schema)
95
+ can_read?(writers_schema, readers_schema) && can_read?(readers_schema, writers_schema)
96
+ end
97
+
98
+ private
99
+
100
+ def full_match_schemas(writers_schema, readers_schema)
101
+ return true if recursion_in_progress?(writers_schema, readers_schema)
102
+
103
+ return false unless Avro::SchemaCompatibility.match_schemas(writers_schema, readers_schema)
104
+
105
+ if writers_schema.type_sym != :union && SIMPLE_CHECKS.include?(readers_schema.type_sym)
106
+ return true
107
+ end
108
+
109
+ case readers_schema.type_sym
110
+ when :record
111
+ match_record_schemas(writers_schema, readers_schema)
112
+ when :map
113
+ full_match_schemas(writers_schema.values, readers_schema.values)
114
+ when :array
115
+ full_match_schemas(writers_schema.items, readers_schema.items)
116
+ when :union
117
+ match_union_schemas(writers_schema, readers_schema)
118
+ when :enum
119
+ # reader's symbols must contain all writer's symbols or reader has default
120
+ (writers_schema.symbols - readers_schema.symbols).empty? || !readers_schema.default.nil?
121
+ else
122
+ if writers_schema.type_sym == :union && writers_schema.schemas.size == 1
123
+ full_match_schemas(writers_schema.schemas.first, readers_schema)
124
+ else
125
+ false
126
+ end
127
+ end
128
+ end
129
+
130
+ def match_union_schemas(writers_schema, readers_schema)
131
+ raise 'readers_schema must be a union' unless readers_schema.type_sym == :union
132
+
133
+ case writers_schema.type_sym
134
+ when :union
135
+ writers_schema.schemas.all? { |writer_type| full_match_schemas(writer_type, readers_schema) }
136
+ else
137
+ readers_schema.schemas.any? { |reader_type| full_match_schemas(writers_schema, reader_type) }
138
+ end
139
+ end
140
+
141
+ def match_record_schemas(writers_schema, readers_schema)
142
+ return false if writers_schema.type_sym == :union
143
+
144
+ writer_fields_hash = writers_schema.fields_hash
145
+ readers_schema.fields.each do |field|
146
+ if writer_fields_hash.key?(field.name)
147
+ return false unless full_match_schemas(writer_fields_hash[field.name].type, field.type)
148
+ else
149
+ names = writer_fields_hash.keys & field.alias_names
150
+ if names.size > 1
151
+ return false
152
+ elsif names.size == 1
153
+ return false unless full_match_schemas(writer_fields_hash[names.first].type, field.type)
154
+ else
155
+ return false unless field.default?
156
+ end
157
+ end
158
+ end
159
+
160
+ return true
161
+ end
162
+
163
+ def recursion_in_progress?(writers_schema, readers_schema)
164
+ key = [writers_schema.object_id, readers_schema.object_id]
165
+
166
+ if recursion_set.include?(key)
167
+ true
168
+ else
169
+ recursion_set.add(key)
170
+ false
171
+ end
172
+ end
173
+ end
174
+ end
175
+ end
@@ -6,7 +6,7 @@
6
6
  # "License"); you may not use this file except in compliance
7
7
  # with the License. You may obtain a copy of the License at
8
8
  #
9
- # http://www.apache.org/licenses/LICENSE-2.0
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
10
  #
11
11
  # Unless required by applicable law or agreed to in writing, software
12
12
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -0,0 +1,242 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ module Avro
18
+ class SchemaValidator
19
+ ROOT_IDENTIFIER = '.'.freeze
20
+ PATH_SEPARATOR = '.'.freeze
21
+ INT_RANGE = Schema::INT_MIN_VALUE..Schema::INT_MAX_VALUE
22
+ LONG_RANGE = Schema::LONG_MIN_VALUE..Schema::LONG_MAX_VALUE
23
+ COMPLEX_TYPES = [:array, :error, :map, :record, :request].freeze
24
+ BOOLEAN_VALUES = [true, false].freeze
25
+
26
+ class Result
27
+ attr_reader :errors
28
+
29
+ def initialize
30
+ @errors = []
31
+ end
32
+
33
+ def <<(error)
34
+ @errors << error
35
+ end
36
+
37
+ def add_error(path, message)
38
+ self << "at #{path} #{message}"
39
+ end
40
+
41
+ def failure?
42
+ @errors.any?
43
+ end
44
+
45
+ def to_s
46
+ errors.join("\n")
47
+ end
48
+ end
49
+
50
+ class ValidationError < StandardError
51
+ attr_reader :result
52
+
53
+ def initialize(result = Result.new)
54
+ @result = result
55
+ super
56
+ end
57
+
58
+ def to_s
59
+ result.to_s
60
+ end
61
+ end
62
+
63
+ TypeMismatchError = Class.new(ValidationError)
64
+
65
+ class << self
66
+ def validate!(expected_schema, logical_datum, options = { recursive: true, encoded: false, fail_on_extra_fields: false })
67
+ options ||= {}
68
+ options[:recursive] = true unless options.key?(:recursive)
69
+
70
+ result = Result.new
71
+ if options[:recursive]
72
+ validate_recursive(expected_schema, logical_datum, ROOT_IDENTIFIER, result, options)
73
+ else
74
+ validate_simple(expected_schema, logical_datum, ROOT_IDENTIFIER, result, options)
75
+ end
76
+ fail ValidationError, result if result.failure?
77
+ result
78
+ end
79
+
80
+ private
81
+
82
+ def validate_recursive(expected_schema, logical_datum, path, result, options = {})
83
+ datum = resolve_datum(expected_schema, logical_datum, options[:encoded])
84
+
85
+ validate_simple(expected_schema, datum, path, result, encoded: true)
86
+
87
+ case expected_schema.type_sym
88
+ when :array
89
+ validate_array(expected_schema, datum, path, result, options)
90
+ when :map
91
+ validate_map(expected_schema, datum, path, result, options)
92
+ when :union
93
+ validate_union(expected_schema, datum, path, result, options)
94
+ when :record, :error, :request
95
+ fail TypeMismatchError unless datum.is_a?(Hash)
96
+ expected_schema.fields.each do |field|
97
+ deeper_path = deeper_path_for_hash(field.name, path)
98
+ validate_recursive(field.type, datum[field.name], deeper_path, result, options)
99
+ end
100
+ if options[:fail_on_extra_fields]
101
+ datum_fields = datum.keys.map(&:to_s)
102
+ schema_fields = expected_schema.fields.map(&:name)
103
+ (datum_fields - schema_fields).each do |extra_field|
104
+ result.add_error(path, "extra field '#{extra_field}' - not in schema")
105
+ end
106
+ end
107
+ end
108
+ rescue TypeMismatchError
109
+ result.add_error(path, "expected type #{expected_schema.type_sym}, got #{actual_value_message(datum)}")
110
+ end
111
+
112
+ def validate_simple(expected_schema, logical_datum, path, result, options = {})
113
+ datum = resolve_datum(expected_schema, logical_datum, options[:encoded])
114
+ validate_type(expected_schema)
115
+
116
+ case expected_schema.type_sym
117
+ when :null
118
+ fail TypeMismatchError unless datum.nil?
119
+ when :boolean
120
+ fail TypeMismatchError unless BOOLEAN_VALUES.include?(datum)
121
+ when :string, :bytes
122
+ fail TypeMismatchError unless datum.is_a?(String)
123
+ when :int
124
+ fail TypeMismatchError unless datum.is_a?(Integer)
125
+ result.add_error(path, "out of bound value #{datum}") unless INT_RANGE.cover?(datum)
126
+ when :long
127
+ fail TypeMismatchError unless datum.is_a?(Integer)
128
+ result.add_error(path, "out of bound value #{datum}") unless LONG_RANGE.cover?(datum)
129
+ when :float, :double
130
+ fail TypeMismatchError unless datum.is_a?(Float) || datum.is_a?(Integer)
131
+ when :fixed
132
+ if datum.is_a? String
133
+ result.add_error(path, fixed_string_message(expected_schema.size, datum)) unless datum.bytesize == expected_schema.size
134
+ else
135
+ result.add_error(path, "expected fixed with size #{expected_schema.size}, got #{actual_value_message(datum)}")
136
+ end
137
+ when :enum
138
+ result.add_error(path, enum_message(expected_schema.symbols, datum)) unless expected_schema.symbols.include?(datum)
139
+ end
140
+ rescue TypeMismatchError
141
+ result.add_error(path, "expected type #{expected_schema.type_sym}, got #{actual_value_message(datum)}")
142
+ end
143
+
144
+ def resolve_datum(expected_schema, logical_datum, encoded)
145
+ if encoded
146
+ logical_datum
147
+ else
148
+ expected_schema.type_adapter.encode(logical_datum) rescue nil
149
+ end
150
+ end
151
+
152
+ def validate_type(expected_schema)
153
+ unless Avro::Schema::VALID_TYPES_SYM.include?(expected_schema.type_sym)
154
+ fail "Unexpected schema type #{expected_schema.type_sym} #{expected_schema.inspect}"
155
+ end
156
+ end
157
+
158
+ def fixed_string_message(size, datum)
159
+ "expected fixed with size #{size}, got \"#{datum}\" with size #{datum.bytesize}"
160
+ end
161
+
162
+ def enum_message(symbols, datum)
163
+ "expected enum with values #{symbols}, got #{actual_value_message(datum)}"
164
+ end
165
+
166
+ def validate_array(expected_schema, datum, path, result, options = {})
167
+ fail TypeMismatchError unless datum.is_a?(Array)
168
+ datum.each_with_index do |d, i|
169
+ validate_recursive(expected_schema.items, d, path + "[#{i}]", result, options)
170
+ end
171
+ end
172
+
173
+ def validate_map(expected_schema, datum, path, result, options = {})
174
+ fail TypeMismatchError unless datum.is_a?(Hash)
175
+ datum.keys.each do |k|
176
+ result.add_error(path, "unexpected key type '#{ruby_to_avro_type(k.class)}' in map") unless k.is_a?(String)
177
+ end
178
+ datum.each do |k, v|
179
+ deeper_path = deeper_path_for_hash(k, path)
180
+ validate_recursive(expected_schema.values, v, deeper_path, result, options)
181
+ end
182
+ end
183
+
184
+ def validate_union(expected_schema, datum, path, result, options = {})
185
+ if expected_schema.schemas.size == 1
186
+ validate_recursive(expected_schema.schemas.first, datum, path, result, options)
187
+ return
188
+ end
189
+ failures = []
190
+ compatible_type = first_compatible_type(datum, expected_schema, path, failures, options)
191
+ return unless compatible_type.nil?
192
+
193
+ complex_type_failed = failures.detect { |r| COMPLEX_TYPES.include?(r[:type]) }
194
+ if complex_type_failed
195
+ complex_type_failed[:result].errors.each { |error| result << error }
196
+ else
197
+ types = expected_schema.schemas.map { |s| "'#{s.type_sym}'" }.join(', ')
198
+ result.add_error(path, "expected union of [#{types}], got #{actual_value_message(datum)}")
199
+ end
200
+ end
201
+
202
+ def first_compatible_type(datum, expected_schema, path, failures, options = {})
203
+ expected_schema.schemas.find do |schema|
204
+ result = Result.new
205
+ validate_recursive(schema, datum, path, result, options)
206
+ failures << { type: schema.type_sym, result: result } if result.failure?
207
+ !result.failure?
208
+ end
209
+ end
210
+
211
+ def deeper_path_for_hash(sub_key, path)
212
+ "#{path}#{PATH_SEPARATOR}#{sub_key}".squeeze(PATH_SEPARATOR)
213
+ end
214
+
215
+ def actual_value_message(value)
216
+ avro_type = if value.is_a?(Integer)
217
+ ruby_integer_to_avro_type(value)
218
+ else
219
+ ruby_to_avro_type(value.class)
220
+ end
221
+ if value.nil?
222
+ avro_type
223
+ else
224
+ "#{avro_type} with value #{value.inspect}"
225
+ end
226
+ end
227
+
228
+ def ruby_to_avro_type(ruby_class)
229
+ {
230
+ NilClass => 'null',
231
+ String => 'string',
232
+ Float => 'float',
233
+ Hash => 'record'
234
+ }.fetch(ruby_class, ruby_class)
235
+ end
236
+
237
+ def ruby_integer_to_avro_type(value)
238
+ INT_RANGE.cover?(value) ? 'int' : 'long'
239
+ end
240
+ end
241
+ end
242
+ end
@@ -7,7 +7,7 @@
7
7
  # "License"); you may not use this file except in compliance
8
8
  # with the License. You may obtain a copy of the License at
9
9
  #
10
- # http://www.apache.org/licenses/LICENSE-2.0
10
+ # https://www.apache.org/licenses/LICENSE-2.0
11
11
  #
12
12
  # Unless required by applicable law or agreed to in writing,
13
13
  # software distributed under the License is distributed on an
@@ -44,13 +44,18 @@ class CaseFinder
44
44
  private
45
45
 
46
46
  def scan_case
47
- if id = @scanner.scan(/\/\/ \d+\n/)
47
+ if (id = @scanner.scan(/\/\/ \d+\n/))
48
48
  while @scanner.skip(/\/\/ .*\n/); end
49
49
 
50
50
  input = scan_input
51
51
  canonical = scan_canonical
52
52
  fingerprint = scan_fingerprint
53
-
53
+ if not fingerprint and @cases
54
+ fingerprint = @cases[-1].fingerprint
55
+ end
56
+ if fingerprint
57
+ fingerprint = fingerprint.to_i & 0xFFFF_FFFF_FFFF_FFFF
58
+ end
54
59
  Case.new(id, input, canonical, fingerprint)
55
60
  else
56
61
  @scanner.skip(/.*\n/)
@@ -61,7 +66,7 @@ class CaseFinder
61
66
  def scan_item(name)
62
67
  if @scanner.scan(/<<#{name}\n/)
63
68
  lines = []
64
- while line = @scanner.scan(/.+\n/)
69
+ while (line = @scanner.scan(/.+\n/))
65
70
  break if line.chomp == name
66
71
  lines << line
67
72
  end