avro 1.9.0 → 1.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/LICENSE +2 -2
- data/Manifest +1 -1
- data/NOTICE +2 -2
- data/Rakefile +21 -30
- data/avro.gemspec +36 -29
- data/interop/test_interop.rb +14 -3
- data/lib/avro/VERSION.txt +1 -0
- data/lib/avro/data_file.rb +30 -6
- data/lib/avro/io.rb +42 -36
- data/lib/avro/ipc.rb +12 -8
- data/lib/avro/logical_types.rb +187 -3
- data/lib/avro/protocol.rb +2 -1
- data/lib/avro/schema.rb +234 -32
- data/lib/avro/schema_compatibility.rb +32 -21
- data/lib/avro/schema_normalization.rb +2 -1
- data/lib/avro/schema_validator.rb +41 -35
- data/lib/avro.rb +16 -3
- data/test/case_finder.rb +10 -4
- data/test/random_data.rb +9 -7
- data/test/sample_ipc_client.rb +2 -1
- data/test/sample_ipc_http_client.rb +2 -1
- data/test/sample_ipc_http_server.rb +2 -1
- data/test/sample_ipc_server.rb +2 -1
- data/test/test_datafile.rb +15 -1
- data/test/test_fingerprints.rb +21 -1
- data/test/test_help.rb +2 -1
- data/test/test_io.rb +114 -18
- data/test/test_logical_types.rb +139 -2
- data/test/test_protocol.rb +3 -2
- data/test/test_schema.rb +399 -1
- data/test/test_schema_compatibility.rb +177 -1
- data/test/test_schema_normalization.rb +3 -1
- data/test/test_schema_validator.rb +27 -6
- data/test/test_socket_transport.rb +2 -1
- data/test/tool.rb +10 -9
- metadata +32 -34
- data/CHANGELOG +0 -1
data/test/test_io.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
# Licensed to the Apache Software Foundation (ASF) under one
|
2
3
|
# or more contributor license agreements. See the NOTICE file
|
3
4
|
# distributed with this work for additional information
|
@@ -6,7 +7,7 @@
|
|
6
7
|
# "License"); you may not use this file except in compliance
|
7
8
|
# with the License. You may obtain a copy of the License at
|
8
9
|
#
|
9
|
-
#
|
10
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
10
11
|
#
|
11
12
|
# Unless required by applicable law or agreed to in writing, software
|
12
13
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
@@ -90,7 +91,10 @@ EOS
|
|
90
91
|
"name": "Test",
|
91
92
|
"fields": [{"name": "ts",
|
92
93
|
"type": {"type": "long",
|
93
|
-
"logicalType": "timestamp-micros"}}
|
94
|
+
"logicalType": "timestamp-micros"}},
|
95
|
+
{"name": "ts2",
|
96
|
+
"type": {"type": "long",
|
97
|
+
"logicalType": "timestamp-millis"}}]}
|
94
98
|
EOS
|
95
99
|
check(record_schema)
|
96
100
|
end
|
@@ -112,6 +116,13 @@ EOS
|
|
112
116
|
check_default(enum_schema, '"B"', "B")
|
113
117
|
end
|
114
118
|
|
119
|
+
def test_enum_with_default
|
120
|
+
enum_schema = '{"type": "enum", "name": "Test", "symbols": ["A", "B"], "default": "A"}'
|
121
|
+
check(enum_schema)
|
122
|
+
# Field default is used for missing field.
|
123
|
+
check_default(enum_schema, '"B"', "B")
|
124
|
+
end
|
125
|
+
|
115
126
|
def test_recursive
|
116
127
|
recursive_schema = <<EOS
|
117
128
|
{"type": "record",
|
@@ -158,6 +169,17 @@ EOS
|
|
158
169
|
check_default(fixed_schema, '"a"', "a")
|
159
170
|
end
|
160
171
|
|
172
|
+
def test_record_variable_key_types
|
173
|
+
datum = { sym: "foo", "str"=>"bar"}
|
174
|
+
ret_val = { "sym"=> "foo", "str"=>"bar"}
|
175
|
+
schema = Schema.parse('{"type":"record", "name":"rec", "fields":[{"name":"sym", "type":"string"}, {"name":"str", "type":"string"}]}')
|
176
|
+
|
177
|
+
writer, _encoder, _datum_writer = write_datum(datum, schema)
|
178
|
+
|
179
|
+
ret_datum = read_datum(writer, schema)
|
180
|
+
assert_equal ret_datum, ret_val
|
181
|
+
end
|
182
|
+
|
161
183
|
def test_record_with_nil
|
162
184
|
schema = Avro::Schema.parse('{"type":"record", "name":"rec", "fields":[{"type":"int", "name":"i"}]}')
|
163
185
|
assert_raise(Avro::IO::AvroTypeError) do
|
@@ -196,7 +218,7 @@ EOS
|
|
196
218
|
[64, '80 01'],
|
197
219
|
[8192, '80 80 01'],
|
198
220
|
[-8193, '81 80 01'],
|
199
|
-
]
|
221
|
+
].freeze
|
200
222
|
|
201
223
|
def avro_hexlify(reader)
|
202
224
|
bytes = []
|
@@ -245,46 +267,46 @@ EOS
|
|
245
267
|
|
246
268
|
def test_utf8_string_encoding
|
247
269
|
[
|
248
|
-
"\xC3"
|
249
|
-
"\xC3\x83"
|
270
|
+
String.new("\xC3", encoding: 'ISO-8859-1'),
|
271
|
+
String.new("\xC3\x83", encoding: 'UTF-8')
|
250
272
|
].each do |value|
|
251
|
-
output =
|
273
|
+
output = String.new('', encoding: 'BINARY')
|
252
274
|
encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output))
|
253
275
|
datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"string"'))
|
254
276
|
datum_writer.write(value, encoder)
|
255
277
|
|
256
|
-
assert_equal "\x04\xc3\x83"
|
278
|
+
assert_equal String.new("\x04\xc3\x83", encoding: 'BINARY'), output
|
257
279
|
end
|
258
280
|
end
|
259
281
|
|
260
282
|
def test_bytes_encoding
|
261
283
|
[
|
262
|
-
"\xC3\x83"
|
263
|
-
"\xC3\x83"
|
264
|
-
"\xC3\x83"
|
284
|
+
String.new("\xC3\x83", encoding: 'BINARY'),
|
285
|
+
String.new("\xC3\x83", encoding: 'ISO-8859-1'),
|
286
|
+
String.new("\xC3\x83", encoding: 'UTF-8')
|
265
287
|
].each do |value|
|
266
|
-
output =
|
288
|
+
output = String.new('', encoding: 'BINARY')
|
267
289
|
encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output))
|
268
290
|
datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"bytes"'))
|
269
291
|
datum_writer.write(value, encoder)
|
270
292
|
|
271
|
-
assert_equal "\x04\xc3\x83"
|
293
|
+
assert_equal String.new("\x04\xc3\x83", encoding: 'BINARY'), output
|
272
294
|
end
|
273
295
|
end
|
274
296
|
|
275
297
|
def test_fixed_encoding
|
276
298
|
[
|
277
|
-
"\xC3\x83"
|
278
|
-
"\xC3\x83"
|
279
|
-
"\xC3\x83"
|
299
|
+
String.new("\xC3\x83", encoding: 'BINARY'),
|
300
|
+
String.new("\xC3\x83", encoding: 'ISO-8859-1'),
|
301
|
+
String.new("\xC3\x83", encoding: 'UTF-8')
|
280
302
|
].each do |value|
|
281
|
-
output =
|
303
|
+
output = String.new('', encoding: 'BINARY')
|
282
304
|
encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output))
|
283
305
|
schema = '{"type": "fixed", "name": "TwoBytes", "size": 2}'
|
284
306
|
datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse(schema))
|
285
307
|
datum_writer.write(value, encoder)
|
286
308
|
|
287
|
-
assert_equal "\
|
309
|
+
assert_equal String.new("\xC3\x83", encoding: 'BINARY'), output
|
288
310
|
end
|
289
311
|
end
|
290
312
|
|
@@ -390,6 +412,50 @@ EOS
|
|
390
412
|
assert_equal(incorrect, 0)
|
391
413
|
end
|
392
414
|
|
415
|
+
def test_unknown_enum_symbol
|
416
|
+
writers_schema = Avro::Schema.parse(<<-SCHEMA)
|
417
|
+
{
|
418
|
+
"type": "enum",
|
419
|
+
"name": "test",
|
420
|
+
"symbols": ["B", "C"]
|
421
|
+
}
|
422
|
+
SCHEMA
|
423
|
+
readers_schema = Avro::Schema.parse(<<-SCHEMA)
|
424
|
+
{
|
425
|
+
"type": "enum",
|
426
|
+
"name": "test",
|
427
|
+
"symbols": ["A", "B"]
|
428
|
+
}
|
429
|
+
SCHEMA
|
430
|
+
datum_to_write = "C"
|
431
|
+
writer, * = write_datum(datum_to_write, writers_schema)
|
432
|
+
datum_read = read_datum(writer, writers_schema, readers_schema)
|
433
|
+
# Ruby implementation did not follow the spec and returns the writer's symbol here
|
434
|
+
assert_equal(datum_read, datum_to_write)
|
435
|
+
end
|
436
|
+
|
437
|
+
def test_unknown_enum_symbol_with_enum_default
|
438
|
+
writers_schema = Avro::Schema.parse(<<-SCHEMA)
|
439
|
+
{
|
440
|
+
"type": "enum",
|
441
|
+
"name": "test",
|
442
|
+
"symbols": ["B", "C"]
|
443
|
+
}
|
444
|
+
SCHEMA
|
445
|
+
readers_schema = Avro::Schema.parse(<<-SCHEMA)
|
446
|
+
{
|
447
|
+
"type": "enum",
|
448
|
+
"name": "test",
|
449
|
+
"symbols": ["A", "B", "UNKNOWN"],
|
450
|
+
"default": "UNKNOWN"
|
451
|
+
}
|
452
|
+
SCHEMA
|
453
|
+
datum_to_write = "C"
|
454
|
+
writer, * = write_datum(datum_to_write, writers_schema)
|
455
|
+
datum_read = read_datum(writer, writers_schema, readers_schema)
|
456
|
+
assert_equal(datum_read, "UNKNOWN")
|
457
|
+
end
|
458
|
+
|
393
459
|
def test_array_schema_promotion
|
394
460
|
writers_schema = Avro::Schema.parse('{"type":"array", "items":"int"}')
|
395
461
|
readers_schema = Avro::Schema.parse('{"type":"array", "items":"long"}')
|
@@ -408,6 +474,36 @@ EOS
|
|
408
474
|
assert_equal(datum_read, datum_to_write)
|
409
475
|
end
|
410
476
|
|
477
|
+
def test_aliased
|
478
|
+
writers_schema = Avro::Schema.parse(<<-SCHEMA)
|
479
|
+
{"type":"record", "name":"Rec1", "fields":[
|
480
|
+
{"name":"field1", "type":"int"}
|
481
|
+
]}
|
482
|
+
SCHEMA
|
483
|
+
readers_schema = Avro::Schema.parse(<<-SCHEMA)
|
484
|
+
{"type":"record", "name":"Rec2", "aliases":["Rec1"], "fields":[
|
485
|
+
{"name":"field2", "aliases":["field1"], "type":"int"}
|
486
|
+
]}
|
487
|
+
SCHEMA
|
488
|
+
writer, * = write_datum({ 'field1' => 1 }, writers_schema)
|
489
|
+
datum_read = read_datum(writer, writers_schema, readers_schema)
|
490
|
+
assert_equal(datum_read, { 'field2' => 1 })
|
491
|
+
end
|
492
|
+
|
493
|
+
def test_big_decimal_datum_for_float
|
494
|
+
writers_schema = Avro::Schema.parse('"float"')
|
495
|
+
writer, * = write_datum(BigDecimal('1.2'), writers_schema)
|
496
|
+
datum_read = read_datum(writer, writers_schema)
|
497
|
+
assert_in_delta(1.2, datum_read)
|
498
|
+
end
|
499
|
+
|
500
|
+
def test_big_decimal_datum_for_double
|
501
|
+
writers_schema = Avro::Schema.parse('"double"')
|
502
|
+
writer, * = write_datum(BigDecimal("1.2"), writers_schema)
|
503
|
+
datum_read = read_datum(writer, writers_schema)
|
504
|
+
assert_in_delta(1.2, datum_read)
|
505
|
+
end
|
506
|
+
|
411
507
|
def test_snappy_backward_compat
|
412
508
|
# a snappy-compressed block payload without the checksum
|
413
509
|
# this has no back-references, just one literal so the last 9
|
@@ -486,7 +582,7 @@ EOS
|
|
486
582
|
datum = randomdata.next
|
487
583
|
assert validate(schm, datum), 'datum is not valid for schema'
|
488
584
|
w = Avro::IO::DatumWriter.new(schm)
|
489
|
-
writer = StringIO.new
|
585
|
+
writer = StringIO.new(+"", "w")
|
490
586
|
w.write(datum, Avro::IO::BinaryEncoder.new(writer))
|
491
587
|
r = datum_reader(schm)
|
492
588
|
reader = StringIO.new(writer.string)
|
data/test/test_logical_types.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
|
+
# frozen_string_literal: true
|
2
3
|
# Licensed to the Apache Software Foundation (ASF) under one
|
3
4
|
# or more contributor license agreements. See the NOTICE file
|
4
5
|
# distributed with this work for additional information
|
@@ -7,7 +8,7 @@
|
|
7
8
|
# "License"); you may not use this file except in compliance
|
8
9
|
# with the License. You may obtain a copy of the License at
|
9
10
|
#
|
10
|
-
#
|
11
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
11
12
|
#
|
12
13
|
# Unless required by applicable law or agreed to in writing, software
|
13
14
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
@@ -16,6 +17,7 @@
|
|
16
17
|
# limitations under the License.
|
17
18
|
|
18
19
|
require 'test_help'
|
20
|
+
require 'memory_profiler'
|
19
21
|
|
20
22
|
class TestLogicalTypes < Test::Unit::TestCase
|
21
23
|
def test_int_date
|
@@ -98,8 +100,143 @@ class TestLogicalTypes < Test::Unit::TestCase
|
|
98
100
|
assert_equal 'duration', schema.logical_type
|
99
101
|
end
|
100
102
|
|
103
|
+
def test_bytes_decimal
|
104
|
+
schema = Avro::Schema.parse <<-SCHEMA
|
105
|
+
{ "type": "bytes", "logicalType": "decimal", "precision": 9, "scale": 6 }
|
106
|
+
SCHEMA
|
107
|
+
|
108
|
+
assert_equal 'decimal', schema.logical_type
|
109
|
+
assert_equal 9, schema.precision
|
110
|
+
assert_equal 6, schema.scale
|
111
|
+
|
112
|
+
assert_encode_and_decode BigDecimal('-3.4562'), schema
|
113
|
+
assert_encode_and_decode BigDecimal('3.4562'), schema
|
114
|
+
assert_encode_and_decode 15.123, schema
|
115
|
+
assert_encode_and_decode 15, schema
|
116
|
+
assert_encode_and_decode BigDecimal('0.123456'), schema
|
117
|
+
assert_encode_and_decode BigDecimal('0'), schema
|
118
|
+
assert_encode_and_decode BigDecimal('1'), schema
|
119
|
+
assert_encode_and_decode BigDecimal('-1'), schema
|
120
|
+
|
121
|
+
assert_raise ArgumentError do
|
122
|
+
type = Avro::LogicalTypes::BytesDecimal.new(schema)
|
123
|
+
type.encode('1.23')
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def test_bytes_decimal_range_errors
|
128
|
+
schema = Avro::Schema.parse <<-SCHEMA
|
129
|
+
{ "type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2 }
|
130
|
+
SCHEMA
|
131
|
+
|
132
|
+
type = Avro::LogicalTypes::BytesDecimal.new(schema)
|
133
|
+
|
134
|
+
assert_raises RangeError do
|
135
|
+
type.encode(BigDecimal('345'))
|
136
|
+
end
|
137
|
+
|
138
|
+
assert_raises RangeError do
|
139
|
+
type.encode(BigDecimal('1.5342'))
|
140
|
+
end
|
141
|
+
|
142
|
+
assert_raises RangeError do
|
143
|
+
type.encode(BigDecimal('-1.5342'))
|
144
|
+
end
|
145
|
+
|
146
|
+
assert_raises RangeError do
|
147
|
+
type.encode(BigDecimal('-100.2'))
|
148
|
+
end
|
149
|
+
|
150
|
+
assert_raises RangeError do
|
151
|
+
type.encode(BigDecimal('-99.991'))
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
def test_bytes_decimal_conversion
|
156
|
+
schema = Avro::Schema.parse <<-SCHEMA
|
157
|
+
{ "type": "bytes", "logicalType": "decimal", "precision": 12, "scale": 6 }
|
158
|
+
SCHEMA
|
159
|
+
|
160
|
+
type = Avro::LogicalTypes::BytesDecimal.new(schema)
|
161
|
+
|
162
|
+
enc = "\xcb\x43\x38".dup.force_encoding('BINARY')
|
163
|
+
assert_equal enc, type.encode(BigDecimal('-3.4562'))
|
164
|
+
assert_equal BigDecimal('-3.4562'), type.decode(enc)
|
165
|
+
|
166
|
+
assert_equal "\x34\xbc\xc8".dup.force_encoding('BINARY'), type.encode(BigDecimal('3.4562'))
|
167
|
+
assert_equal BigDecimal('3.4562'), type.decode("\x34\xbc\xc8".dup.force_encoding('BINARY'))
|
168
|
+
|
169
|
+
assert_equal "\x6a\x33\x0e\x87\x00".dup.force_encoding('BINARY'), type.encode(BigDecimal('456123.123456'))
|
170
|
+
assert_equal BigDecimal('456123.123456'), type.decode("\x6a\x33\x0e\x87\x00".dup.force_encoding('BINARY'))
|
171
|
+
end
|
172
|
+
|
173
|
+
def test_logical_type_with_schema
|
174
|
+
exception = assert_raises(ArgumentError) do
|
175
|
+
Avro::LogicalTypes::LogicalTypeWithSchema.new(nil)
|
176
|
+
end
|
177
|
+
assert_equal exception.to_s, 'schema is required'
|
178
|
+
|
179
|
+
schema = Avro::Schema.parse <<-SCHEMA
|
180
|
+
{ "type": "bytes", "logicalType": "decimal", "precision": 12, "scale": 6 }
|
181
|
+
SCHEMA
|
182
|
+
|
183
|
+
assert_nothing_raised do
|
184
|
+
Avro::LogicalTypes::LogicalTypeWithSchema.new(schema)
|
185
|
+
end
|
186
|
+
|
187
|
+
assert_raises NotImplementedError do
|
188
|
+
Avro::LogicalTypes::LogicalTypeWithSchema.new(schema).encode(BigDecimal('2'))
|
189
|
+
end
|
190
|
+
|
191
|
+
assert_raises NotImplementedError do
|
192
|
+
Avro::LogicalTypes::LogicalTypeWithSchema.new(schema).decode('foo')
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
def test_bytes_decimal_object_allocations_encode
|
197
|
+
schema = Avro::Schema.parse <<-SCHEMA
|
198
|
+
{ "type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2 }
|
199
|
+
SCHEMA
|
200
|
+
|
201
|
+
type = Avro::LogicalTypes::BytesDecimal.new(schema)
|
202
|
+
|
203
|
+
positive_value = BigDecimal('5.2')
|
204
|
+
negative_value = BigDecimal('-5.2')
|
205
|
+
|
206
|
+
[positive_value, negative_value].each do |value|
|
207
|
+
report = MemoryProfiler.report do
|
208
|
+
type.encode(value)
|
209
|
+
end
|
210
|
+
|
211
|
+
assert_equal 5, report.total_allocated
|
212
|
+
# Ruby 2.7 does not retain anything. Ruby 2.6 retains 1
|
213
|
+
assert_operator 1, :>=, report.total_retained
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
def test_bytes_decimal_object_allocations_decode
|
218
|
+
schema = Avro::Schema.parse <<-SCHEMA
|
219
|
+
{ "type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2 }
|
220
|
+
SCHEMA
|
221
|
+
|
222
|
+
type = Avro::LogicalTypes::BytesDecimal.new(schema)
|
223
|
+
|
224
|
+
positive_enc = "\x02\b".dup.force_encoding('BINARY')
|
225
|
+
negative_enc = "\xFD\xF8".dup.force_encoding('BINARY')
|
226
|
+
|
227
|
+
[positive_enc, negative_enc].each do |encoded|
|
228
|
+
report = MemoryProfiler.report do
|
229
|
+
type.decode(encoded)
|
230
|
+
end
|
231
|
+
|
232
|
+
assert_equal 5, report.total_allocated
|
233
|
+
# Ruby 2.7 does not retain anything. Ruby 2.6 retains 1
|
234
|
+
assert_operator 1, :>=, report.total_retained
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
101
238
|
def encode(datum, schema)
|
102
|
-
buffer = StringIO.new
|
239
|
+
buffer = StringIO.new
|
103
240
|
encoder = Avro::IO::BinaryEncoder.new(buffer)
|
104
241
|
|
105
242
|
datum_writer = Avro::IO::DatumWriter.new(schema)
|
data/test/test_protocol.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
# Licensed to the Apache Software Foundation (ASF) under one
|
2
3
|
# or more contributor license agreements. See the NOTICE file
|
3
4
|
# distributed with this work for additional information
|
@@ -6,7 +7,7 @@
|
|
6
7
|
# "License"); you may not use this file except in compliance
|
7
8
|
# with the License. You may obtain a copy of the License at
|
8
9
|
#
|
9
|
-
#
|
10
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
10
11
|
#
|
11
12
|
# Unless required by applicable law or agreed to in writing, software
|
12
13
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
@@ -184,7 +185,7 @@ EOS
|
|
184
185
|
}
|
185
186
|
}
|
186
187
|
EOS
|
187
|
-
]
|
188
|
+
].freeze
|
188
189
|
|
189
190
|
Protocol = Avro::Protocol
|
190
191
|
def test_parse
|