avro 1.8.2 → 1.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest +6 -0
- data/avro.gemspec +21 -21
- data/lib/avro.rb +11 -0
- data/lib/avro/io.rb +44 -64
- data/lib/avro/ipc.rb +8 -8
- data/lib/avro/logical_types.rb +90 -0
- data/lib/avro/protocol.rb +11 -7
- data/lib/avro/schema.rb +89 -63
- data/lib/avro/schema_compatibility.rb +170 -0
- data/lib/avro/schema_validator.rb +242 -0
- data/test/random_data.rb +21 -2
- data/test/test_datafile.rb +3 -3
- data/test/test_io.rb +73 -6
- data/test/test_logical_types.rb +128 -0
- data/test/test_protocol.rb +36 -3
- data/test/test_schema.rb +323 -27
- data/test/test_schema_compatibility.rb +475 -0
- data/test/test_schema_validator.rb +554 -0
- data/test/tool.rb +0 -1
- metadata +20 -8
@@ -0,0 +1,242 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
# See the License for the specific language governing permissions and
|
15
|
+
# limitations under the License.
|
16
|
+
|
17
|
+
module Avro
|
18
|
+
class SchemaValidator
|
19
|
+
ROOT_IDENTIFIER = '.'.freeze
|
20
|
+
PATH_SEPARATOR = '.'.freeze
|
21
|
+
INT_RANGE = Schema::INT_MIN_VALUE..Schema::INT_MAX_VALUE
|
22
|
+
LONG_RANGE = Schema::LONG_MIN_VALUE..Schema::LONG_MAX_VALUE
|
23
|
+
COMPLEX_TYPES = [:array, :error, :map, :record, :request].freeze
|
24
|
+
BOOLEAN_VALUES = [true, false].freeze
|
25
|
+
|
26
|
+
class Result
|
27
|
+
attr_reader :errors
|
28
|
+
|
29
|
+
def initialize
|
30
|
+
@errors = []
|
31
|
+
end
|
32
|
+
|
33
|
+
def <<(error)
|
34
|
+
@errors << error
|
35
|
+
end
|
36
|
+
|
37
|
+
def add_error(path, message)
|
38
|
+
self << "at #{path} #{message}"
|
39
|
+
end
|
40
|
+
|
41
|
+
def failure?
|
42
|
+
@errors.any?
|
43
|
+
end
|
44
|
+
|
45
|
+
def to_s
|
46
|
+
errors.join("\n")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
class ValidationError < StandardError
|
51
|
+
attr_reader :result
|
52
|
+
|
53
|
+
def initialize(result = Result.new)
|
54
|
+
@result = result
|
55
|
+
super
|
56
|
+
end
|
57
|
+
|
58
|
+
def to_s
|
59
|
+
result.to_s
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
TypeMismatchError = Class.new(ValidationError)
|
64
|
+
|
65
|
+
class << self
|
66
|
+
def validate!(expected_schema, logical_datum, options = { recursive: true, encoded: false, fail_on_extra_fields: false })
|
67
|
+
options ||= {}
|
68
|
+
options[:recursive] = true unless options.key?(:recursive)
|
69
|
+
|
70
|
+
result = Result.new
|
71
|
+
if options[:recursive]
|
72
|
+
validate_recursive(expected_schema, logical_datum, ROOT_IDENTIFIER, result, options)
|
73
|
+
else
|
74
|
+
validate_simple(expected_schema, logical_datum, ROOT_IDENTIFIER, result, options)
|
75
|
+
end
|
76
|
+
fail ValidationError, result if result.failure?
|
77
|
+
result
|
78
|
+
end
|
79
|
+
|
80
|
+
private
|
81
|
+
|
82
|
+
def validate_recursive(expected_schema, logical_datum, path, result, options = {})
|
83
|
+
datum = resolve_datum(expected_schema, logical_datum, options[:encoded])
|
84
|
+
|
85
|
+
validate_simple(expected_schema, datum, path, result, encoded: true)
|
86
|
+
|
87
|
+
case expected_schema.type_sym
|
88
|
+
when :array
|
89
|
+
validate_array(expected_schema, datum, path, result, options)
|
90
|
+
when :map
|
91
|
+
validate_map(expected_schema, datum, path, result, options)
|
92
|
+
when :union
|
93
|
+
validate_union(expected_schema, datum, path, result, options)
|
94
|
+
when :record, :error, :request
|
95
|
+
fail TypeMismatchError unless datum.is_a?(Hash)
|
96
|
+
expected_schema.fields.each do |field|
|
97
|
+
deeper_path = deeper_path_for_hash(field.name, path)
|
98
|
+
validate_recursive(field.type, datum[field.name], deeper_path, result, options)
|
99
|
+
end
|
100
|
+
if options[:fail_on_extra_fields]
|
101
|
+
datum_fields = datum.keys.map(&:to_s)
|
102
|
+
schema_fields = expected_schema.fields.map(&:name)
|
103
|
+
(datum_fields - schema_fields).each do |extra_field|
|
104
|
+
result.add_error(path, "extra field '#{extra_field}' - not in schema")
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
rescue TypeMismatchError
|
109
|
+
result.add_error(path, "expected type #{expected_schema.type_sym}, got #{actual_value_message(datum)}")
|
110
|
+
end
|
111
|
+
|
112
|
+
def validate_simple(expected_schema, logical_datum, path, result, options = {})
|
113
|
+
datum = resolve_datum(expected_schema, logical_datum, options[:encoded])
|
114
|
+
validate_type(expected_schema)
|
115
|
+
|
116
|
+
case expected_schema.type_sym
|
117
|
+
when :null
|
118
|
+
fail TypeMismatchError unless datum.nil?
|
119
|
+
when :boolean
|
120
|
+
fail TypeMismatchError unless BOOLEAN_VALUES.include?(datum)
|
121
|
+
when :string, :bytes
|
122
|
+
fail TypeMismatchError unless datum.is_a?(String)
|
123
|
+
when :int
|
124
|
+
fail TypeMismatchError unless datum.is_a?(Integer)
|
125
|
+
result.add_error(path, "out of bound value #{datum}") unless INT_RANGE.cover?(datum)
|
126
|
+
when :long
|
127
|
+
fail TypeMismatchError unless datum.is_a?(Integer)
|
128
|
+
result.add_error(path, "out of bound value #{datum}") unless LONG_RANGE.cover?(datum)
|
129
|
+
when :float, :double
|
130
|
+
fail TypeMismatchError unless datum.is_a?(Float) || datum.is_a?(Integer)
|
131
|
+
when :fixed
|
132
|
+
if datum.is_a? String
|
133
|
+
result.add_error(path, fixed_string_message(expected_schema.size, datum)) unless datum.bytesize == expected_schema.size
|
134
|
+
else
|
135
|
+
result.add_error(path, "expected fixed with size #{expected_schema.size}, got #{actual_value_message(datum)}")
|
136
|
+
end
|
137
|
+
when :enum
|
138
|
+
result.add_error(path, enum_message(expected_schema.symbols, datum)) unless expected_schema.symbols.include?(datum)
|
139
|
+
end
|
140
|
+
rescue TypeMismatchError
|
141
|
+
result.add_error(path, "expected type #{expected_schema.type_sym}, got #{actual_value_message(datum)}")
|
142
|
+
end
|
143
|
+
|
144
|
+
def resolve_datum(expected_schema, logical_datum, encoded)
|
145
|
+
if encoded
|
146
|
+
logical_datum
|
147
|
+
else
|
148
|
+
expected_schema.type_adapter.encode(logical_datum) rescue nil
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def validate_type(expected_schema)
|
153
|
+
unless Avro::Schema::VALID_TYPES_SYM.include?(expected_schema.type_sym)
|
154
|
+
fail "Unexpected schema type #{expected_schema.type_sym} #{expected_schema.inspect}"
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def fixed_string_message(size, datum)
|
159
|
+
"expected fixed with size #{size}, got \"#{datum}\" with size #{datum.bytesize}"
|
160
|
+
end
|
161
|
+
|
162
|
+
def enum_message(symbols, datum)
|
163
|
+
"expected enum with values #{symbols}, got #{actual_value_message(datum)}"
|
164
|
+
end
|
165
|
+
|
166
|
+
def validate_array(expected_schema, datum, path, result, options = {})
|
167
|
+
fail TypeMismatchError unless datum.is_a?(Array)
|
168
|
+
datum.each_with_index do |d, i|
|
169
|
+
validate_recursive(expected_schema.items, d, path + "[#{i}]", result, options)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def validate_map(expected_schema, datum, path, result, options = {})
|
174
|
+
fail TypeMismatchError unless datum.is_a?(Hash)
|
175
|
+
datum.keys.each do |k|
|
176
|
+
result.add_error(path, "unexpected key type '#{ruby_to_avro_type(k.class)}' in map") unless k.is_a?(String)
|
177
|
+
end
|
178
|
+
datum.each do |k, v|
|
179
|
+
deeper_path = deeper_path_for_hash(k, path)
|
180
|
+
validate_recursive(expected_schema.values, v, deeper_path, result, options)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def validate_union(expected_schema, datum, path, result, options = {})
|
185
|
+
if expected_schema.schemas.size == 1
|
186
|
+
validate_recursive(expected_schema.schemas.first, datum, path, result, options)
|
187
|
+
return
|
188
|
+
end
|
189
|
+
failures = []
|
190
|
+
compatible_type = first_compatible_type(datum, expected_schema, path, failures, options)
|
191
|
+
return unless compatible_type.nil?
|
192
|
+
|
193
|
+
complex_type_failed = failures.detect { |r| COMPLEX_TYPES.include?(r[:type]) }
|
194
|
+
if complex_type_failed
|
195
|
+
complex_type_failed[:result].errors.each { |error| result << error }
|
196
|
+
else
|
197
|
+
types = expected_schema.schemas.map { |s| "'#{s.type_sym}'" }.join(', ')
|
198
|
+
result.add_error(path, "expected union of [#{types}], got #{actual_value_message(datum)}")
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def first_compatible_type(datum, expected_schema, path, failures, options = {})
|
203
|
+
expected_schema.schemas.find do |schema|
|
204
|
+
result = Result.new
|
205
|
+
validate_recursive(schema, datum, path, result, options)
|
206
|
+
failures << { type: schema.type_sym, result: result } if result.failure?
|
207
|
+
!result.failure?
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
def deeper_path_for_hash(sub_key, path)
|
212
|
+
"#{path}#{PATH_SEPARATOR}#{sub_key}".squeeze(PATH_SEPARATOR)
|
213
|
+
end
|
214
|
+
|
215
|
+
def actual_value_message(value)
|
216
|
+
avro_type = if value.is_a?(Integer)
|
217
|
+
ruby_integer_to_avro_type(value)
|
218
|
+
else
|
219
|
+
ruby_to_avro_type(value.class)
|
220
|
+
end
|
221
|
+
if value.nil?
|
222
|
+
avro_type
|
223
|
+
else
|
224
|
+
"#{avro_type} with value #{value.inspect}"
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
def ruby_to_avro_type(ruby_class)
|
229
|
+
{
|
230
|
+
NilClass => 'null',
|
231
|
+
String => 'string',
|
232
|
+
Float => 'float',
|
233
|
+
Hash => 'record'
|
234
|
+
}.fetch(ruby_class, ruby_class)
|
235
|
+
end
|
236
|
+
|
237
|
+
def ruby_integer_to_avro_type(value)
|
238
|
+
INT_RANGE.cover?(value) ? 'int' : 'long'
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
data/test/random_data.rb
CHANGED
@@ -27,15 +27,17 @@ class RandomData
|
|
27
27
|
end
|
28
28
|
|
29
29
|
def nextdata(schm, d=0)
|
30
|
+
return logical_nextdata(schm, d=0) unless schm.type_adapter.eql?(Avro::LogicalTypes::Identity)
|
31
|
+
|
30
32
|
case schm.type_sym
|
31
33
|
when :boolean
|
32
34
|
rand > 0.5
|
33
35
|
when :string
|
34
36
|
randstr()
|
35
37
|
when :int
|
36
|
-
|
38
|
+
rand_int
|
37
39
|
when :long
|
38
|
-
|
40
|
+
rand_long
|
39
41
|
when :float
|
40
42
|
(-1024 + 2048 * rand).round.to_f
|
41
43
|
when :double
|
@@ -79,6 +81,15 @@ class RandomData
|
|
79
81
|
end
|
80
82
|
end
|
81
83
|
|
84
|
+
def logical_nextdata(schm, _d=0)
|
85
|
+
case schm.logical_type
|
86
|
+
when 'date'
|
87
|
+
Avro::LogicalTypes::IntDate.decode(rand_int)
|
88
|
+
when 'timestamp-millis', 'timestamp-micros'
|
89
|
+
Avro::LogicalTypes::TimestampMicros.decode(rand_long)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
82
93
|
CHARPOOL = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789'
|
83
94
|
BYTEPOOL = '12345abcd'
|
84
95
|
|
@@ -87,4 +98,12 @@ class RandomData
|
|
87
98
|
rand(length+1).times { str << chars[rand(chars.size)] }
|
88
99
|
str
|
89
100
|
end
|
101
|
+
|
102
|
+
def rand_int
|
103
|
+
rand(Avro::Schema::INT_MAX_VALUE - Avro::Schema::INT_MIN_VALUE) + Avro::Schema::INT_MIN_VALUE
|
104
|
+
end
|
105
|
+
|
106
|
+
def rand_long
|
107
|
+
rand(Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) + Avro::Schema::LONG_MIN_VALUE
|
108
|
+
end
|
90
109
|
end
|
data/test/test_datafile.rb
CHANGED
@@ -20,13 +20,13 @@ require 'test_help'
|
|
20
20
|
class TestDataFile < Test::Unit::TestCase
|
21
21
|
HERE = File.expand_path File.dirname(__FILE__)
|
22
22
|
def setup
|
23
|
-
if File.
|
23
|
+
if File.exist?(HERE + '/data.avr')
|
24
24
|
File.unlink(HERE + '/data.avr')
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
28
28
|
def teardown
|
29
|
-
if File.
|
29
|
+
if File.exist?(HERE + '/data.avr')
|
30
30
|
File.unlink(HERE + '/data.avr')
|
31
31
|
end
|
32
32
|
end
|
@@ -38,7 +38,7 @@ class TestDataFile < Test::Unit::TestCase
|
|
38
38
|
"fields" : [
|
39
39
|
{"name": "username", "type": "string"},
|
40
40
|
{"name": "age", "type": "int"},
|
41
|
-
{"name": "verified", "type": "boolean", "default":
|
41
|
+
{"name": "verified", "type": "boolean", "default": false}
|
42
42
|
]}
|
43
43
|
JSON
|
44
44
|
|
data/test/test_io.rb
CHANGED
@@ -84,6 +84,17 @@ EOS
|
|
84
84
|
check_default(record_schema, '{"f": 11}', {"f" => 11})
|
85
85
|
end
|
86
86
|
|
87
|
+
def test_record_with_logical_type
|
88
|
+
record_schema = <<EOS
|
89
|
+
{"type": "record",
|
90
|
+
"name": "Test",
|
91
|
+
"fields": [{"name": "ts",
|
92
|
+
"type": {"type": "long",
|
93
|
+
"logicalType": "timestamp-micros"}}]}
|
94
|
+
EOS
|
95
|
+
check(record_schema)
|
96
|
+
end
|
97
|
+
|
87
98
|
def test_error
|
88
99
|
error_schema = <<EOS
|
89
100
|
{"type": "error",
|
@@ -115,6 +126,7 @@ EOS
|
|
115
126
|
def test_union
|
116
127
|
union_schema = <<EOS
|
117
128
|
["string",
|
129
|
+
{"type": "int", "logicalType": "date"},
|
118
130
|
"null",
|
119
131
|
"long",
|
120
132
|
{"type": "record",
|
@@ -146,10 +158,31 @@ EOS
|
|
146
158
|
check_default(fixed_schema, '"a"', "a")
|
147
159
|
end
|
148
160
|
|
161
|
+
def test_record_with_nil
|
162
|
+
schema = Avro::Schema.parse('{"type":"record", "name":"rec", "fields":[{"type":"int", "name":"i"}]}')
|
163
|
+
assert_raise(Avro::IO::AvroTypeError) do
|
164
|
+
write_datum(nil, schema)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def test_array_with_nil
|
169
|
+
schema = Avro::Schema.parse('{"type":"array", "items":"int"}')
|
170
|
+
assert_raise(Avro::IO::AvroTypeError) do
|
171
|
+
write_datum(nil, schema)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
def test_map_with_nil
|
176
|
+
schema = Avro::Schema.parse('{"type":"map", "values":"long"}')
|
177
|
+
assert_raise(Avro::IO::AvroTypeError) do
|
178
|
+
write_datum(nil, schema)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
149
182
|
def test_enum_with_duplicate
|
150
183
|
str = '{"type": "enum", "name": "Test","symbols" : ["AA", "AA"]}'
|
151
|
-
assert_raises(Avro::SchemaParseError) do
|
152
|
-
|
184
|
+
assert_raises(Avro::SchemaParseError.new('Duplicate symbol: ["AA", "AA"]')) do
|
185
|
+
Avro::Schema.parse str
|
153
186
|
end
|
154
187
|
end
|
155
188
|
|
@@ -256,7 +289,7 @@ EOS
|
|
256
289
|
end
|
257
290
|
|
258
291
|
def test_skip_long
|
259
|
-
for value_to_skip,
|
292
|
+
for value_to_skip, _hex_encoding in BINARY_INT_ENCODINGS
|
260
293
|
value_to_read = 6253
|
261
294
|
|
262
295
|
# write some data in binary to string buffer
|
@@ -281,7 +314,7 @@ EOS
|
|
281
314
|
end
|
282
315
|
|
283
316
|
def test_skip_int
|
284
|
-
for value_to_skip,
|
317
|
+
for value_to_skip, _hex_encoding in BINARY_INT_ENCODINGS
|
285
318
|
value_to_read = 6253
|
286
319
|
|
287
320
|
writer = StringIO.new
|
@@ -331,7 +364,7 @@ EOS
|
|
331
364
|
datum_to_write = 219
|
332
365
|
for rs in promotable_schemas[(i + 1)..-1]
|
333
366
|
readers_schema = Avro::Schema.parse(rs)
|
334
|
-
writer,
|
367
|
+
writer, _enc, _dw = write_datum(datum_to_write, writers_schema)
|
335
368
|
datum_read = read_datum(writer, writers_schema, readers_schema)
|
336
369
|
if datum_read != datum_to_write
|
337
370
|
incorrect += 1
|
@@ -341,6 +374,40 @@ EOS
|
|
341
374
|
end
|
342
375
|
end
|
343
376
|
|
377
|
+
def test_interchangeable_schemas
|
378
|
+
interchangeable_schemas = ['"string"', '"bytes"']
|
379
|
+
incorrect = 0
|
380
|
+
interchangeable_schemas.each_with_index do |ws, i|
|
381
|
+
writers_schema = Avro::Schema.parse(ws)
|
382
|
+
datum_to_write = 'foo'
|
383
|
+
readers_schema = Avro::Schema.parse(interchangeable_schemas[i == 0 ? 1 : 0])
|
384
|
+
writer, * = write_datum(datum_to_write, writers_schema)
|
385
|
+
datum_read = read_datum(writer, writers_schema, readers_schema)
|
386
|
+
if datum_read != datum_to_write
|
387
|
+
incorrect += 1
|
388
|
+
end
|
389
|
+
end
|
390
|
+
assert_equal(incorrect, 0)
|
391
|
+
end
|
392
|
+
|
393
|
+
def test_array_schema_promotion
|
394
|
+
writers_schema = Avro::Schema.parse('{"type":"array", "items":"int"}')
|
395
|
+
readers_schema = Avro::Schema.parse('{"type":"array", "items":"long"}')
|
396
|
+
datum_to_write = [1, 2]
|
397
|
+
writer, * = write_datum(datum_to_write, writers_schema)
|
398
|
+
datum_read = read_datum(writer, writers_schema, readers_schema)
|
399
|
+
assert_equal(datum_read, datum_to_write)
|
400
|
+
end
|
401
|
+
|
402
|
+
def test_map_schema_promotion
|
403
|
+
writers_schema = Avro::Schema.parse('{"type":"map", "values":"int"}')
|
404
|
+
readers_schema = Avro::Schema.parse('{"type":"map", "values":"long"}')
|
405
|
+
datum_to_write = { 'foo' => 1, 'bar' => 2 }
|
406
|
+
writer, * = write_datum(datum_to_write, writers_schema)
|
407
|
+
datum_read = read_datum(writer, writers_schema, readers_schema)
|
408
|
+
assert_equal(datum_read, datum_to_write)
|
409
|
+
end
|
410
|
+
|
344
411
|
def test_snappy_backward_compat
|
345
412
|
# a snappy-compressed block payload without the checksum
|
346
413
|
# this has no back-references, just one literal so the last 9
|
@@ -417,7 +484,7 @@ EOS
|
|
417
484
|
|
418
485
|
def checkser(schm, randomdata)
|
419
486
|
datum = randomdata.next
|
420
|
-
assert validate(schm, datum)
|
487
|
+
assert validate(schm, datum), 'datum is not valid for schema'
|
421
488
|
w = Avro::IO::DatumWriter.new(schm)
|
422
489
|
writer = StringIO.new "", "w"
|
423
490
|
w.write(datum, Avro::IO::BinaryEncoder.new(writer))
|
@@ -0,0 +1,128 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
4
|
+
# distributed with this work for additional information
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
7
|
+
# "License"); you may not use this file except in compliance
|
8
|
+
# with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
# See the License for the specific language governing permissions and
|
16
|
+
# limitations under the License.
|
17
|
+
|
18
|
+
require 'test_help'
|
19
|
+
|
20
|
+
class TestLogicalTypes < Test::Unit::TestCase
|
21
|
+
def test_int_date
|
22
|
+
schema = Avro::Schema.parse <<-SCHEMA
|
23
|
+
{ "type": "int", "logicalType": "date" }
|
24
|
+
SCHEMA
|
25
|
+
|
26
|
+
assert_equal 'date', schema.logical_type
|
27
|
+
today = Date.today
|
28
|
+
assert_encode_and_decode today, schema
|
29
|
+
assert_preencoded Avro::LogicalTypes::IntDate.encode(today), schema, today
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_int_date_conversion
|
33
|
+
type = Avro::LogicalTypes::IntDate
|
34
|
+
|
35
|
+
assert_equal 5, type.encode(Date.new(1970, 1, 6))
|
36
|
+
assert_equal 0, type.encode(Date.new(1970, 1, 1))
|
37
|
+
assert_equal(-5, type.encode(Date.new(1969, 12, 27)))
|
38
|
+
|
39
|
+
assert_equal Date.new(1970, 1, 6), type.decode(5)
|
40
|
+
assert_equal Date.new(1970, 1, 1), type.decode(0)
|
41
|
+
assert_equal Date.new(1969, 12, 27), type.decode(-5)
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_timestamp_millis_long
|
45
|
+
schema = Avro::Schema.parse <<-SCHEMA
|
46
|
+
{ "type": "long", "logicalType": "timestamp-millis" }
|
47
|
+
SCHEMA
|
48
|
+
|
49
|
+
# The Time.at format is (seconds, microseconds) since Epoch.
|
50
|
+
time = Time.at(628232400, 12000)
|
51
|
+
|
52
|
+
assert_equal 'timestamp-millis', schema.logical_type
|
53
|
+
assert_encode_and_decode time, schema
|
54
|
+
assert_preencoded Avro::LogicalTypes::TimestampMillis.encode(time), schema, time.utc
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_timestamp_millis_long_conversion
|
58
|
+
type = Avro::LogicalTypes::TimestampMillis
|
59
|
+
|
60
|
+
now = Time.now.utc
|
61
|
+
now_millis = Time.utc(now.year, now.month, now.day, now.hour, now.min, now.sec, now.usec / 1000 * 1000)
|
62
|
+
|
63
|
+
assert_equal now_millis, type.decode(type.encode(now_millis))
|
64
|
+
assert_equal 1432849613221, type.encode(Time.utc(2015, 5, 28, 21, 46, 53, 221000))
|
65
|
+
assert_equal 1432849613221, type.encode(DateTime.new(2015, 5, 28, 21, 46, 53.221))
|
66
|
+
assert_equal Time.utc(2015, 5, 28, 21, 46, 53, 221000), type.decode(1432849613221)
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_timestamp_micros_long
|
70
|
+
schema = Avro::Schema.parse <<-SCHEMA
|
71
|
+
{ "type": "long", "logicalType": "timestamp-micros" }
|
72
|
+
SCHEMA
|
73
|
+
|
74
|
+
# The Time.at format is (seconds, microseconds) since Epoch.
|
75
|
+
time = Time.at(628232400, 12345)
|
76
|
+
|
77
|
+
assert_equal 'timestamp-micros', schema.logical_type
|
78
|
+
assert_encode_and_decode time, schema
|
79
|
+
assert_preencoded Avro::LogicalTypes::TimestampMicros.encode(time), schema, time.utc
|
80
|
+
end
|
81
|
+
|
82
|
+
def test_timestamp_micros_long_conversion
|
83
|
+
type = Avro::LogicalTypes::TimestampMicros
|
84
|
+
|
85
|
+
now = Time.now.utc
|
86
|
+
|
87
|
+
assert_equal Time.utc(now.year, now.month, now.day, now.hour, now.min, now.sec, now.usec), type.decode(type.encode(now))
|
88
|
+
assert_equal 1432849613221843, type.encode(Time.utc(2015, 5, 28, 21, 46, 53, 221843))
|
89
|
+
assert_equal 1432849613221843, type.encode(DateTime.new(2015, 5, 28, 21, 46, 53.221843))
|
90
|
+
assert_equal Time.utc(2015, 5, 28, 21, 46, 53, 221843), type.decode(1432849613221843)
|
91
|
+
end
|
92
|
+
|
93
|
+
def test_parse_fixed_duration
|
94
|
+
schema = Avro::Schema.parse <<-SCHEMA
|
95
|
+
{ "type": "fixed", "size": 12, "name": "fixed_dur", "logicalType": "duration" }
|
96
|
+
SCHEMA
|
97
|
+
|
98
|
+
assert_equal 'duration', schema.logical_type
|
99
|
+
end
|
100
|
+
|
101
|
+
def encode(datum, schema)
|
102
|
+
buffer = StringIO.new("")
|
103
|
+
encoder = Avro::IO::BinaryEncoder.new(buffer)
|
104
|
+
|
105
|
+
datum_writer = Avro::IO::DatumWriter.new(schema)
|
106
|
+
datum_writer.write(datum, encoder)
|
107
|
+
|
108
|
+
buffer.string
|
109
|
+
end
|
110
|
+
|
111
|
+
def decode(encoded, schema)
|
112
|
+
buffer = StringIO.new(encoded)
|
113
|
+
decoder = Avro::IO::BinaryDecoder.new(buffer)
|
114
|
+
|
115
|
+
datum_reader = Avro::IO::DatumReader.new(schema, schema)
|
116
|
+
datum_reader.read(decoder)
|
117
|
+
end
|
118
|
+
|
119
|
+
def assert_encode_and_decode(datum, schema)
|
120
|
+
encoded = encode(datum, schema)
|
121
|
+
assert_equal datum, decode(encoded, schema)
|
122
|
+
end
|
123
|
+
|
124
|
+
def assert_preencoded(datum, schema, decoded)
|
125
|
+
encoded = encode(datum, schema)
|
126
|
+
assert_equal decoded, decode(encoded, schema)
|
127
|
+
end
|
128
|
+
end
|