avro 1.8.2 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest +6 -0
- data/avro.gemspec +21 -21
- data/lib/avro.rb +11 -0
- data/lib/avro/io.rb +44 -64
- data/lib/avro/ipc.rb +8 -8
- data/lib/avro/logical_types.rb +90 -0
- data/lib/avro/protocol.rb +11 -7
- data/lib/avro/schema.rb +89 -63
- data/lib/avro/schema_compatibility.rb +170 -0
- data/lib/avro/schema_validator.rb +242 -0
- data/test/random_data.rb +21 -2
- data/test/test_datafile.rb +3 -3
- data/test/test_io.rb +73 -6
- data/test/test_logical_types.rb +128 -0
- data/test/test_protocol.rb +36 -3
- data/test/test_schema.rb +323 -27
- data/test/test_schema_compatibility.rb +475 -0
- data/test/test_schema_validator.rb +554 -0
- data/test/tool.rb +0 -1
- metadata +20 -8
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
module Avro
|
|
18
|
+
class SchemaValidator
|
|
19
|
+
ROOT_IDENTIFIER = '.'.freeze
|
|
20
|
+
PATH_SEPARATOR = '.'.freeze
|
|
21
|
+
INT_RANGE = Schema::INT_MIN_VALUE..Schema::INT_MAX_VALUE
|
|
22
|
+
LONG_RANGE = Schema::LONG_MIN_VALUE..Schema::LONG_MAX_VALUE
|
|
23
|
+
COMPLEX_TYPES = [:array, :error, :map, :record, :request].freeze
|
|
24
|
+
BOOLEAN_VALUES = [true, false].freeze
|
|
25
|
+
|
|
26
|
+
class Result
|
|
27
|
+
attr_reader :errors
|
|
28
|
+
|
|
29
|
+
def initialize
|
|
30
|
+
@errors = []
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def <<(error)
|
|
34
|
+
@errors << error
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def add_error(path, message)
|
|
38
|
+
self << "at #{path} #{message}"
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def failure?
|
|
42
|
+
@errors.any?
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def to_s
|
|
46
|
+
errors.join("\n")
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
class ValidationError < StandardError
|
|
51
|
+
attr_reader :result
|
|
52
|
+
|
|
53
|
+
def initialize(result = Result.new)
|
|
54
|
+
@result = result
|
|
55
|
+
super
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def to_s
|
|
59
|
+
result.to_s
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
TypeMismatchError = Class.new(ValidationError)
|
|
64
|
+
|
|
65
|
+
class << self
|
|
66
|
+
def validate!(expected_schema, logical_datum, options = { recursive: true, encoded: false, fail_on_extra_fields: false })
|
|
67
|
+
options ||= {}
|
|
68
|
+
options[:recursive] = true unless options.key?(:recursive)
|
|
69
|
+
|
|
70
|
+
result = Result.new
|
|
71
|
+
if options[:recursive]
|
|
72
|
+
validate_recursive(expected_schema, logical_datum, ROOT_IDENTIFIER, result, options)
|
|
73
|
+
else
|
|
74
|
+
validate_simple(expected_schema, logical_datum, ROOT_IDENTIFIER, result, options)
|
|
75
|
+
end
|
|
76
|
+
fail ValidationError, result if result.failure?
|
|
77
|
+
result
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
|
|
82
|
+
def validate_recursive(expected_schema, logical_datum, path, result, options = {})
|
|
83
|
+
datum = resolve_datum(expected_schema, logical_datum, options[:encoded])
|
|
84
|
+
|
|
85
|
+
validate_simple(expected_schema, datum, path, result, encoded: true)
|
|
86
|
+
|
|
87
|
+
case expected_schema.type_sym
|
|
88
|
+
when :array
|
|
89
|
+
validate_array(expected_schema, datum, path, result, options)
|
|
90
|
+
when :map
|
|
91
|
+
validate_map(expected_schema, datum, path, result, options)
|
|
92
|
+
when :union
|
|
93
|
+
validate_union(expected_schema, datum, path, result, options)
|
|
94
|
+
when :record, :error, :request
|
|
95
|
+
fail TypeMismatchError unless datum.is_a?(Hash)
|
|
96
|
+
expected_schema.fields.each do |field|
|
|
97
|
+
deeper_path = deeper_path_for_hash(field.name, path)
|
|
98
|
+
validate_recursive(field.type, datum[field.name], deeper_path, result, options)
|
|
99
|
+
end
|
|
100
|
+
if options[:fail_on_extra_fields]
|
|
101
|
+
datum_fields = datum.keys.map(&:to_s)
|
|
102
|
+
schema_fields = expected_schema.fields.map(&:name)
|
|
103
|
+
(datum_fields - schema_fields).each do |extra_field|
|
|
104
|
+
result.add_error(path, "extra field '#{extra_field}' - not in schema")
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
rescue TypeMismatchError
|
|
109
|
+
result.add_error(path, "expected type #{expected_schema.type_sym}, got #{actual_value_message(datum)}")
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def validate_simple(expected_schema, logical_datum, path, result, options = {})
|
|
113
|
+
datum = resolve_datum(expected_schema, logical_datum, options[:encoded])
|
|
114
|
+
validate_type(expected_schema)
|
|
115
|
+
|
|
116
|
+
case expected_schema.type_sym
|
|
117
|
+
when :null
|
|
118
|
+
fail TypeMismatchError unless datum.nil?
|
|
119
|
+
when :boolean
|
|
120
|
+
fail TypeMismatchError unless BOOLEAN_VALUES.include?(datum)
|
|
121
|
+
when :string, :bytes
|
|
122
|
+
fail TypeMismatchError unless datum.is_a?(String)
|
|
123
|
+
when :int
|
|
124
|
+
fail TypeMismatchError unless datum.is_a?(Integer)
|
|
125
|
+
result.add_error(path, "out of bound value #{datum}") unless INT_RANGE.cover?(datum)
|
|
126
|
+
when :long
|
|
127
|
+
fail TypeMismatchError unless datum.is_a?(Integer)
|
|
128
|
+
result.add_error(path, "out of bound value #{datum}") unless LONG_RANGE.cover?(datum)
|
|
129
|
+
when :float, :double
|
|
130
|
+
fail TypeMismatchError unless datum.is_a?(Float) || datum.is_a?(Integer)
|
|
131
|
+
when :fixed
|
|
132
|
+
if datum.is_a? String
|
|
133
|
+
result.add_error(path, fixed_string_message(expected_schema.size, datum)) unless datum.bytesize == expected_schema.size
|
|
134
|
+
else
|
|
135
|
+
result.add_error(path, "expected fixed with size #{expected_schema.size}, got #{actual_value_message(datum)}")
|
|
136
|
+
end
|
|
137
|
+
when :enum
|
|
138
|
+
result.add_error(path, enum_message(expected_schema.symbols, datum)) unless expected_schema.symbols.include?(datum)
|
|
139
|
+
end
|
|
140
|
+
rescue TypeMismatchError
|
|
141
|
+
result.add_error(path, "expected type #{expected_schema.type_sym}, got #{actual_value_message(datum)}")
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def resolve_datum(expected_schema, logical_datum, encoded)
|
|
145
|
+
if encoded
|
|
146
|
+
logical_datum
|
|
147
|
+
else
|
|
148
|
+
expected_schema.type_adapter.encode(logical_datum) rescue nil
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def validate_type(expected_schema)
|
|
153
|
+
unless Avro::Schema::VALID_TYPES_SYM.include?(expected_schema.type_sym)
|
|
154
|
+
fail "Unexpected schema type #{expected_schema.type_sym} #{expected_schema.inspect}"
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def fixed_string_message(size, datum)
|
|
159
|
+
"expected fixed with size #{size}, got \"#{datum}\" with size #{datum.bytesize}"
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def enum_message(symbols, datum)
|
|
163
|
+
"expected enum with values #{symbols}, got #{actual_value_message(datum)}"
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def validate_array(expected_schema, datum, path, result, options = {})
|
|
167
|
+
fail TypeMismatchError unless datum.is_a?(Array)
|
|
168
|
+
datum.each_with_index do |d, i|
|
|
169
|
+
validate_recursive(expected_schema.items, d, path + "[#{i}]", result, options)
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def validate_map(expected_schema, datum, path, result, options = {})
|
|
174
|
+
fail TypeMismatchError unless datum.is_a?(Hash)
|
|
175
|
+
datum.keys.each do |k|
|
|
176
|
+
result.add_error(path, "unexpected key type '#{ruby_to_avro_type(k.class)}' in map") unless k.is_a?(String)
|
|
177
|
+
end
|
|
178
|
+
datum.each do |k, v|
|
|
179
|
+
deeper_path = deeper_path_for_hash(k, path)
|
|
180
|
+
validate_recursive(expected_schema.values, v, deeper_path, result, options)
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def validate_union(expected_schema, datum, path, result, options = {})
|
|
185
|
+
if expected_schema.schemas.size == 1
|
|
186
|
+
validate_recursive(expected_schema.schemas.first, datum, path, result, options)
|
|
187
|
+
return
|
|
188
|
+
end
|
|
189
|
+
failures = []
|
|
190
|
+
compatible_type = first_compatible_type(datum, expected_schema, path, failures, options)
|
|
191
|
+
return unless compatible_type.nil?
|
|
192
|
+
|
|
193
|
+
complex_type_failed = failures.detect { |r| COMPLEX_TYPES.include?(r[:type]) }
|
|
194
|
+
if complex_type_failed
|
|
195
|
+
complex_type_failed[:result].errors.each { |error| result << error }
|
|
196
|
+
else
|
|
197
|
+
types = expected_schema.schemas.map { |s| "'#{s.type_sym}'" }.join(', ')
|
|
198
|
+
result.add_error(path, "expected union of [#{types}], got #{actual_value_message(datum)}")
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def first_compatible_type(datum, expected_schema, path, failures, options = {})
|
|
203
|
+
expected_schema.schemas.find do |schema|
|
|
204
|
+
result = Result.new
|
|
205
|
+
validate_recursive(schema, datum, path, result, options)
|
|
206
|
+
failures << { type: schema.type_sym, result: result } if result.failure?
|
|
207
|
+
!result.failure?
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def deeper_path_for_hash(sub_key, path)
|
|
212
|
+
"#{path}#{PATH_SEPARATOR}#{sub_key}".squeeze(PATH_SEPARATOR)
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def actual_value_message(value)
|
|
216
|
+
avro_type = if value.is_a?(Integer)
|
|
217
|
+
ruby_integer_to_avro_type(value)
|
|
218
|
+
else
|
|
219
|
+
ruby_to_avro_type(value.class)
|
|
220
|
+
end
|
|
221
|
+
if value.nil?
|
|
222
|
+
avro_type
|
|
223
|
+
else
|
|
224
|
+
"#{avro_type} with value #{value.inspect}"
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def ruby_to_avro_type(ruby_class)
|
|
229
|
+
{
|
|
230
|
+
NilClass => 'null',
|
|
231
|
+
String => 'string',
|
|
232
|
+
Float => 'float',
|
|
233
|
+
Hash => 'record'
|
|
234
|
+
}.fetch(ruby_class, ruby_class)
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def ruby_integer_to_avro_type(value)
|
|
238
|
+
INT_RANGE.cover?(value) ? 'int' : 'long'
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
end
|
data/test/random_data.rb
CHANGED
|
@@ -27,15 +27,17 @@ class RandomData
|
|
|
27
27
|
end
|
|
28
28
|
|
|
29
29
|
def nextdata(schm, d=0)
|
|
30
|
+
return logical_nextdata(schm, d=0) unless schm.type_adapter.eql?(Avro::LogicalTypes::Identity)
|
|
31
|
+
|
|
30
32
|
case schm.type_sym
|
|
31
33
|
when :boolean
|
|
32
34
|
rand > 0.5
|
|
33
35
|
when :string
|
|
34
36
|
randstr()
|
|
35
37
|
when :int
|
|
36
|
-
|
|
38
|
+
rand_int
|
|
37
39
|
when :long
|
|
38
|
-
|
|
40
|
+
rand_long
|
|
39
41
|
when :float
|
|
40
42
|
(-1024 + 2048 * rand).round.to_f
|
|
41
43
|
when :double
|
|
@@ -79,6 +81,15 @@ class RandomData
|
|
|
79
81
|
end
|
|
80
82
|
end
|
|
81
83
|
|
|
84
|
+
def logical_nextdata(schm, _d=0)
|
|
85
|
+
case schm.logical_type
|
|
86
|
+
when 'date'
|
|
87
|
+
Avro::LogicalTypes::IntDate.decode(rand_int)
|
|
88
|
+
when 'timestamp-millis', 'timestamp-micros'
|
|
89
|
+
Avro::LogicalTypes::TimestampMicros.decode(rand_long)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
82
93
|
CHARPOOL = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789'
|
|
83
94
|
BYTEPOOL = '12345abcd'
|
|
84
95
|
|
|
@@ -87,4 +98,12 @@ class RandomData
|
|
|
87
98
|
rand(length+1).times { str << chars[rand(chars.size)] }
|
|
88
99
|
str
|
|
89
100
|
end
|
|
101
|
+
|
|
102
|
+
def rand_int
|
|
103
|
+
rand(Avro::Schema::INT_MAX_VALUE - Avro::Schema::INT_MIN_VALUE) + Avro::Schema::INT_MIN_VALUE
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def rand_long
|
|
107
|
+
rand(Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) + Avro::Schema::LONG_MIN_VALUE
|
|
108
|
+
end
|
|
90
109
|
end
|
data/test/test_datafile.rb
CHANGED
|
@@ -20,13 +20,13 @@ require 'test_help'
|
|
|
20
20
|
class TestDataFile < Test::Unit::TestCase
|
|
21
21
|
HERE = File.expand_path File.dirname(__FILE__)
|
|
22
22
|
def setup
|
|
23
|
-
if File.
|
|
23
|
+
if File.exist?(HERE + '/data.avr')
|
|
24
24
|
File.unlink(HERE + '/data.avr')
|
|
25
25
|
end
|
|
26
26
|
end
|
|
27
27
|
|
|
28
28
|
def teardown
|
|
29
|
-
if File.
|
|
29
|
+
if File.exist?(HERE + '/data.avr')
|
|
30
30
|
File.unlink(HERE + '/data.avr')
|
|
31
31
|
end
|
|
32
32
|
end
|
|
@@ -38,7 +38,7 @@ class TestDataFile < Test::Unit::TestCase
|
|
|
38
38
|
"fields" : [
|
|
39
39
|
{"name": "username", "type": "string"},
|
|
40
40
|
{"name": "age", "type": "int"},
|
|
41
|
-
{"name": "verified", "type": "boolean", "default":
|
|
41
|
+
{"name": "verified", "type": "boolean", "default": false}
|
|
42
42
|
]}
|
|
43
43
|
JSON
|
|
44
44
|
|
data/test/test_io.rb
CHANGED
|
@@ -84,6 +84,17 @@ EOS
|
|
|
84
84
|
check_default(record_schema, '{"f": 11}', {"f" => 11})
|
|
85
85
|
end
|
|
86
86
|
|
|
87
|
+
def test_record_with_logical_type
|
|
88
|
+
record_schema = <<EOS
|
|
89
|
+
{"type": "record",
|
|
90
|
+
"name": "Test",
|
|
91
|
+
"fields": [{"name": "ts",
|
|
92
|
+
"type": {"type": "long",
|
|
93
|
+
"logicalType": "timestamp-micros"}}]}
|
|
94
|
+
EOS
|
|
95
|
+
check(record_schema)
|
|
96
|
+
end
|
|
97
|
+
|
|
87
98
|
def test_error
|
|
88
99
|
error_schema = <<EOS
|
|
89
100
|
{"type": "error",
|
|
@@ -115,6 +126,7 @@ EOS
|
|
|
115
126
|
def test_union
|
|
116
127
|
union_schema = <<EOS
|
|
117
128
|
["string",
|
|
129
|
+
{"type": "int", "logicalType": "date"},
|
|
118
130
|
"null",
|
|
119
131
|
"long",
|
|
120
132
|
{"type": "record",
|
|
@@ -146,10 +158,31 @@ EOS
|
|
|
146
158
|
check_default(fixed_schema, '"a"', "a")
|
|
147
159
|
end
|
|
148
160
|
|
|
161
|
+
def test_record_with_nil
|
|
162
|
+
schema = Avro::Schema.parse('{"type":"record", "name":"rec", "fields":[{"type":"int", "name":"i"}]}')
|
|
163
|
+
assert_raise(Avro::IO::AvroTypeError) do
|
|
164
|
+
write_datum(nil, schema)
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def test_array_with_nil
|
|
169
|
+
schema = Avro::Schema.parse('{"type":"array", "items":"int"}')
|
|
170
|
+
assert_raise(Avro::IO::AvroTypeError) do
|
|
171
|
+
write_datum(nil, schema)
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def test_map_with_nil
|
|
176
|
+
schema = Avro::Schema.parse('{"type":"map", "values":"long"}')
|
|
177
|
+
assert_raise(Avro::IO::AvroTypeError) do
|
|
178
|
+
write_datum(nil, schema)
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
|
|
149
182
|
def test_enum_with_duplicate
|
|
150
183
|
str = '{"type": "enum", "name": "Test","symbols" : ["AA", "AA"]}'
|
|
151
|
-
assert_raises(Avro::SchemaParseError) do
|
|
152
|
-
|
|
184
|
+
assert_raises(Avro::SchemaParseError.new('Duplicate symbol: ["AA", "AA"]')) do
|
|
185
|
+
Avro::Schema.parse str
|
|
153
186
|
end
|
|
154
187
|
end
|
|
155
188
|
|
|
@@ -256,7 +289,7 @@ EOS
|
|
|
256
289
|
end
|
|
257
290
|
|
|
258
291
|
def test_skip_long
|
|
259
|
-
for value_to_skip,
|
|
292
|
+
for value_to_skip, _hex_encoding in BINARY_INT_ENCODINGS
|
|
260
293
|
value_to_read = 6253
|
|
261
294
|
|
|
262
295
|
# write some data in binary to string buffer
|
|
@@ -281,7 +314,7 @@ EOS
|
|
|
281
314
|
end
|
|
282
315
|
|
|
283
316
|
def test_skip_int
|
|
284
|
-
for value_to_skip,
|
|
317
|
+
for value_to_skip, _hex_encoding in BINARY_INT_ENCODINGS
|
|
285
318
|
value_to_read = 6253
|
|
286
319
|
|
|
287
320
|
writer = StringIO.new
|
|
@@ -331,7 +364,7 @@ EOS
|
|
|
331
364
|
datum_to_write = 219
|
|
332
365
|
for rs in promotable_schemas[(i + 1)..-1]
|
|
333
366
|
readers_schema = Avro::Schema.parse(rs)
|
|
334
|
-
writer,
|
|
367
|
+
writer, _enc, _dw = write_datum(datum_to_write, writers_schema)
|
|
335
368
|
datum_read = read_datum(writer, writers_schema, readers_schema)
|
|
336
369
|
if datum_read != datum_to_write
|
|
337
370
|
incorrect += 1
|
|
@@ -341,6 +374,40 @@ EOS
|
|
|
341
374
|
end
|
|
342
375
|
end
|
|
343
376
|
|
|
377
|
+
def test_interchangeable_schemas
|
|
378
|
+
interchangeable_schemas = ['"string"', '"bytes"']
|
|
379
|
+
incorrect = 0
|
|
380
|
+
interchangeable_schemas.each_with_index do |ws, i|
|
|
381
|
+
writers_schema = Avro::Schema.parse(ws)
|
|
382
|
+
datum_to_write = 'foo'
|
|
383
|
+
readers_schema = Avro::Schema.parse(interchangeable_schemas[i == 0 ? 1 : 0])
|
|
384
|
+
writer, * = write_datum(datum_to_write, writers_schema)
|
|
385
|
+
datum_read = read_datum(writer, writers_schema, readers_schema)
|
|
386
|
+
if datum_read != datum_to_write
|
|
387
|
+
incorrect += 1
|
|
388
|
+
end
|
|
389
|
+
end
|
|
390
|
+
assert_equal(incorrect, 0)
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
def test_array_schema_promotion
|
|
394
|
+
writers_schema = Avro::Schema.parse('{"type":"array", "items":"int"}')
|
|
395
|
+
readers_schema = Avro::Schema.parse('{"type":"array", "items":"long"}')
|
|
396
|
+
datum_to_write = [1, 2]
|
|
397
|
+
writer, * = write_datum(datum_to_write, writers_schema)
|
|
398
|
+
datum_read = read_datum(writer, writers_schema, readers_schema)
|
|
399
|
+
assert_equal(datum_read, datum_to_write)
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
def test_map_schema_promotion
|
|
403
|
+
writers_schema = Avro::Schema.parse('{"type":"map", "values":"int"}')
|
|
404
|
+
readers_schema = Avro::Schema.parse('{"type":"map", "values":"long"}')
|
|
405
|
+
datum_to_write = { 'foo' => 1, 'bar' => 2 }
|
|
406
|
+
writer, * = write_datum(datum_to_write, writers_schema)
|
|
407
|
+
datum_read = read_datum(writer, writers_schema, readers_schema)
|
|
408
|
+
assert_equal(datum_read, datum_to_write)
|
|
409
|
+
end
|
|
410
|
+
|
|
344
411
|
def test_snappy_backward_compat
|
|
345
412
|
# a snappy-compressed block payload without the checksum
|
|
346
413
|
# this has no back-references, just one literal so the last 9
|
|
@@ -417,7 +484,7 @@ EOS
|
|
|
417
484
|
|
|
418
485
|
def checkser(schm, randomdata)
|
|
419
486
|
datum = randomdata.next
|
|
420
|
-
assert validate(schm, datum)
|
|
487
|
+
assert validate(schm, datum), 'datum is not valid for schema'
|
|
421
488
|
w = Avro::IO::DatumWriter.new(schm)
|
|
422
489
|
writer = StringIO.new "", "w"
|
|
423
490
|
w.write(datum, Avro::IO::BinaryEncoder.new(writer))
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
|
4
|
+
# distributed with this work for additional information
|
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
|
7
|
+
# "License"); you may not use this file except in compliance
|
|
8
|
+
# with the License. You may obtain a copy of the License at
|
|
9
|
+
#
|
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
#
|
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
# See the License for the specific language governing permissions and
|
|
16
|
+
# limitations under the License.
|
|
17
|
+
|
|
18
|
+
require 'test_help'
|
|
19
|
+
|
|
20
|
+
class TestLogicalTypes < Test::Unit::TestCase
|
|
21
|
+
def test_int_date
|
|
22
|
+
schema = Avro::Schema.parse <<-SCHEMA
|
|
23
|
+
{ "type": "int", "logicalType": "date" }
|
|
24
|
+
SCHEMA
|
|
25
|
+
|
|
26
|
+
assert_equal 'date', schema.logical_type
|
|
27
|
+
today = Date.today
|
|
28
|
+
assert_encode_and_decode today, schema
|
|
29
|
+
assert_preencoded Avro::LogicalTypes::IntDate.encode(today), schema, today
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def test_int_date_conversion
|
|
33
|
+
type = Avro::LogicalTypes::IntDate
|
|
34
|
+
|
|
35
|
+
assert_equal 5, type.encode(Date.new(1970, 1, 6))
|
|
36
|
+
assert_equal 0, type.encode(Date.new(1970, 1, 1))
|
|
37
|
+
assert_equal(-5, type.encode(Date.new(1969, 12, 27)))
|
|
38
|
+
|
|
39
|
+
assert_equal Date.new(1970, 1, 6), type.decode(5)
|
|
40
|
+
assert_equal Date.new(1970, 1, 1), type.decode(0)
|
|
41
|
+
assert_equal Date.new(1969, 12, 27), type.decode(-5)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def test_timestamp_millis_long
|
|
45
|
+
schema = Avro::Schema.parse <<-SCHEMA
|
|
46
|
+
{ "type": "long", "logicalType": "timestamp-millis" }
|
|
47
|
+
SCHEMA
|
|
48
|
+
|
|
49
|
+
# The Time.at format is (seconds, microseconds) since Epoch.
|
|
50
|
+
time = Time.at(628232400, 12000)
|
|
51
|
+
|
|
52
|
+
assert_equal 'timestamp-millis', schema.logical_type
|
|
53
|
+
assert_encode_and_decode time, schema
|
|
54
|
+
assert_preencoded Avro::LogicalTypes::TimestampMillis.encode(time), schema, time.utc
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def test_timestamp_millis_long_conversion
|
|
58
|
+
type = Avro::LogicalTypes::TimestampMillis
|
|
59
|
+
|
|
60
|
+
now = Time.now.utc
|
|
61
|
+
now_millis = Time.utc(now.year, now.month, now.day, now.hour, now.min, now.sec, now.usec / 1000 * 1000)
|
|
62
|
+
|
|
63
|
+
assert_equal now_millis, type.decode(type.encode(now_millis))
|
|
64
|
+
assert_equal 1432849613221, type.encode(Time.utc(2015, 5, 28, 21, 46, 53, 221000))
|
|
65
|
+
assert_equal 1432849613221, type.encode(DateTime.new(2015, 5, 28, 21, 46, 53.221))
|
|
66
|
+
assert_equal Time.utc(2015, 5, 28, 21, 46, 53, 221000), type.decode(1432849613221)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def test_timestamp_micros_long
|
|
70
|
+
schema = Avro::Schema.parse <<-SCHEMA
|
|
71
|
+
{ "type": "long", "logicalType": "timestamp-micros" }
|
|
72
|
+
SCHEMA
|
|
73
|
+
|
|
74
|
+
# The Time.at format is (seconds, microseconds) since Epoch.
|
|
75
|
+
time = Time.at(628232400, 12345)
|
|
76
|
+
|
|
77
|
+
assert_equal 'timestamp-micros', schema.logical_type
|
|
78
|
+
assert_encode_and_decode time, schema
|
|
79
|
+
assert_preencoded Avro::LogicalTypes::TimestampMicros.encode(time), schema, time.utc
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def test_timestamp_micros_long_conversion
|
|
83
|
+
type = Avro::LogicalTypes::TimestampMicros
|
|
84
|
+
|
|
85
|
+
now = Time.now.utc
|
|
86
|
+
|
|
87
|
+
assert_equal Time.utc(now.year, now.month, now.day, now.hour, now.min, now.sec, now.usec), type.decode(type.encode(now))
|
|
88
|
+
assert_equal 1432849613221843, type.encode(Time.utc(2015, 5, 28, 21, 46, 53, 221843))
|
|
89
|
+
assert_equal 1432849613221843, type.encode(DateTime.new(2015, 5, 28, 21, 46, 53.221843))
|
|
90
|
+
assert_equal Time.utc(2015, 5, 28, 21, 46, 53, 221843), type.decode(1432849613221843)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def test_parse_fixed_duration
|
|
94
|
+
schema = Avro::Schema.parse <<-SCHEMA
|
|
95
|
+
{ "type": "fixed", "size": 12, "name": "fixed_dur", "logicalType": "duration" }
|
|
96
|
+
SCHEMA
|
|
97
|
+
|
|
98
|
+
assert_equal 'duration', schema.logical_type
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def encode(datum, schema)
|
|
102
|
+
buffer = StringIO.new("")
|
|
103
|
+
encoder = Avro::IO::BinaryEncoder.new(buffer)
|
|
104
|
+
|
|
105
|
+
datum_writer = Avro::IO::DatumWriter.new(schema)
|
|
106
|
+
datum_writer.write(datum, encoder)
|
|
107
|
+
|
|
108
|
+
buffer.string
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def decode(encoded, schema)
|
|
112
|
+
buffer = StringIO.new(encoded)
|
|
113
|
+
decoder = Avro::IO::BinaryDecoder.new(buffer)
|
|
114
|
+
|
|
115
|
+
datum_reader = Avro::IO::DatumReader.new(schema, schema)
|
|
116
|
+
datum_reader.read(decoder)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def assert_encode_and_decode(datum, schema)
|
|
120
|
+
encoded = encode(datum, schema)
|
|
121
|
+
assert_equal datum, decode(encoded, schema)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def assert_preencoded(datum, schema, decoded)
|
|
125
|
+
encoded = encode(datum, schema)
|
|
126
|
+
assert_equal decoded, decode(encoded, schema)
|
|
127
|
+
end
|
|
128
|
+
end
|