avro-salsify-fork 1.9.0.2 → 1.9.0.3.pre1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +1 -1
- data/avro-salsify-fork.gemspec +5 -5
- data/avro.gemspec +1 -1
- data/lib/avro.rb +1 -0
- data/lib/avro/data_file.rb +18 -1
- data/lib/avro/io.rb +7 -42
- data/lib/avro/schema.rb +25 -7
- data/test/test_io.rb +66 -0
- data/test/test_schema.rb +51 -0
- data/test/test_schema_compatibility.rb +463 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f26367215fdb88d9e6a8acebb159d7f30f226d4e
|
4
|
+
data.tar.gz: cef6d73aed57d1731a58ccfb5793759150b81fc1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8b2419d791ef4e26b27aa61017c0f71867af272480d612d8da0e244a28767af71e55d1a4a5746af982645c79825d31731ac98ad9819f8cbde594b6c20e456502
|
7
|
+
data.tar.gz: 104f099f07aea4dc5aa19d36e14f536ea6620896d5039ba4f8bffa1bdc872a4b48853a951a1f59af492eefa46f96a5089e2649a966bbca0737c9e6b83e141ca0
|
data/Rakefile
CHANGED
data/avro-salsify-fork.gemspec
CHANGED
@@ -1,25 +1,25 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
|
-
# stub: avro-salsify-fork 1.9.0.
|
2
|
+
# stub: avro-salsify-fork 1.9.0.3.pre1 ruby lib
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "avro-salsify-fork".freeze
|
6
|
-
s.version = "1.9.0.
|
6
|
+
s.version = "1.9.0.3.pre1"
|
7
7
|
|
8
8
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2".freeze) if s.respond_to? :required_rubygems_version=
|
9
9
|
s.require_paths = ["lib".freeze]
|
10
10
|
s.authors = ["Apache Software Foundation / Salsify Engineering".freeze]
|
11
|
-
s.date = "2016-
|
11
|
+
s.date = "2016-12-15"
|
12
12
|
s.description = "Avro is a data serialization and RPC format.\nThis release contains the changes submitted in https://github.com/apache/avro/pull/116\nto support logical types in the Ruby gem.".freeze
|
13
13
|
s.email = "engineering@salsify.com".freeze
|
14
14
|
s.extra_rdoc_files = ["CHANGELOG".freeze, "LICENSE".freeze, "lib/avro.rb".freeze, "lib/avro/data_file.rb".freeze, "lib/avro/io.rb".freeze, "lib/avro/ipc.rb".freeze, "lib/avro/logical_types.rb".freeze, "lib/avro/protocol.rb".freeze, "lib/avro/schema.rb".freeze, "lib/avro/schema_normalization.rb".freeze]
|
15
|
-
s.files = ["CHANGELOG".freeze, "LICENSE".freeze, "Manifest".freeze, "NOTICE".freeze, "Rakefile".freeze, "avro-salsify-fork.gemspec".freeze, "avro.gemspec".freeze, "interop/test_interop.rb".freeze, "lib/avro.rb".freeze, "lib/avro/data_file.rb".freeze, "lib/avro/io.rb".freeze, "lib/avro/ipc.rb".freeze, "lib/avro/logical_types.rb".freeze, "lib/avro/protocol.rb".freeze, "lib/avro/schema.rb".freeze, "lib/avro/schema_normalization.rb".freeze, "test/case_finder.rb".freeze, "test/random_data.rb".freeze, "test/sample_ipc_client.rb".freeze, "test/sample_ipc_http_client.rb".freeze, "test/sample_ipc_http_server.rb".freeze, "test/sample_ipc_server.rb".freeze, "test/test_datafile.rb".freeze, "test/test_fingerprints.rb".freeze, "test/test_help.rb".freeze, "test/test_io.rb".freeze, "test/test_logical_types.rb".freeze, "test/test_protocol.rb".freeze, "test/test_schema.rb".freeze, "test/test_schema_normalization.rb".freeze, "test/test_socket_transport.rb".freeze, "test/tool.rb".freeze]
|
15
|
+
s.files = ["CHANGELOG".freeze, "LICENSE".freeze, "Manifest".freeze, "NOTICE".freeze, "Rakefile".freeze, "avro-salsify-fork.gemspec".freeze, "avro.gemspec".freeze, "interop/test_interop.rb".freeze, "lib/avro.rb".freeze, "lib/avro/data_file.rb".freeze, "lib/avro/io.rb".freeze, "lib/avro/ipc.rb".freeze, "lib/avro/logical_types.rb".freeze, "lib/avro/protocol.rb".freeze, "lib/avro/schema.rb".freeze, "lib/avro/schema_normalization.rb".freeze, "test/case_finder.rb".freeze, "test/random_data.rb".freeze, "test/sample_ipc_client.rb".freeze, "test/sample_ipc_http_client.rb".freeze, "test/sample_ipc_http_server.rb".freeze, "test/sample_ipc_server.rb".freeze, "test/test_datafile.rb".freeze, "test/test_fingerprints.rb".freeze, "test/test_help.rb".freeze, "test/test_io.rb".freeze, "test/test_logical_types.rb".freeze, "test/test_protocol.rb".freeze, "test/test_schema.rb".freeze, "test/test_schema_compatibility.rb".freeze, "test/test_schema_normalization.rb".freeze, "test/test_socket_transport.rb".freeze, "test/tool.rb".freeze]
|
16
16
|
s.homepage = "https://github.com/salsify/avro".freeze
|
17
17
|
s.licenses = ["Apache License 2.0 (Apache-2.0)".freeze]
|
18
18
|
s.rdoc_options = ["--line-numbers".freeze, "--title".freeze, "Avro-salsify-fork".freeze]
|
19
19
|
s.rubyforge_project = "avro-salsify-fork".freeze
|
20
20
|
s.rubygems_version = "2.6.6".freeze
|
21
21
|
s.summary = "Apache Avro for Ruby with logical types patch".freeze
|
22
|
-
s.test_files = ["test/test_datafile.rb".freeze, "test/test_fingerprints.rb".freeze, "test/test_help.rb".freeze, "test/test_io.rb".freeze, "test/test_logical_types.rb".freeze, "test/test_protocol.rb".freeze, "test/test_schema.rb".freeze, "test/test_schema_normalization.rb".freeze, "test/test_socket_transport.rb".freeze]
|
22
|
+
s.test_files = ["test/test_datafile.rb".freeze, "test/test_fingerprints.rb".freeze, "test/test_help.rb".freeze, "test/test_io.rb".freeze, "test/test_logical_types.rb".freeze, "test/test_protocol.rb".freeze, "test/test_schema.rb".freeze, "test/test_schema_compatibility.rb".freeze, "test/test_schema_normalization.rb".freeze, "test/test_socket_transport.rb".freeze]
|
23
23
|
|
24
24
|
if s.respond_to? :specification_version then
|
25
25
|
s.specification_version = 4
|
data/avro.gemspec
CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
|
|
19
19
|
s.rubyforge_project = "avro".freeze
|
20
20
|
s.rubygems_version = "2.6.6".freeze
|
21
21
|
s.summary = "Apache Avro for Ruby".freeze
|
22
|
-
s.test_files = ["test/test_datafile.rb".freeze, "test/test_fingerprints.rb".freeze, "test/test_help.rb".freeze, "test/test_io.rb".freeze, "test/test_logical_types.rb".freeze, "test/test_protocol.rb".freeze, "test/test_schema.rb".freeze, "test/test_schema_normalization.rb".freeze, "test/test_socket_transport.rb".freeze]
|
22
|
+
s.test_files = ["test/test_datafile.rb".freeze, "test/test_fingerprints.rb".freeze, "test/test_help.rb".freeze, "test/test_io.rb".freeze, "test/test_logical_types.rb".freeze, "test/test_protocol.rb".freeze, "test/test_schema.rb".freeze, "test/test_schema_normalization.rb".freeze, "test/test_schema_compatibility.rb".freeze, "test/test_socket_transport.rb".freeze]
|
23
23
|
|
24
24
|
if s.respond_to? :specification_version then
|
25
25
|
s.specification_version = 4
|
data/lib/avro.rb
CHANGED
data/lib/avro/data_file.rb
CHANGED
@@ -338,12 +338,29 @@ module Avro
|
|
338
338
|
|
339
339
|
def decompress(data)
|
340
340
|
load_snappy!
|
341
|
+
crc32 = data.slice(-4..-1).unpack('N').first
|
342
|
+
uncompressed = Snappy.inflate(data.slice(0..-5))
|
343
|
+
|
344
|
+
if crc32 == Zlib.crc32(uncompressed)
|
345
|
+
uncompressed
|
346
|
+
else
|
347
|
+
# older versions of avro-ruby didn't write the checksum, so if it
|
348
|
+
# doesn't match this must assume that it wasn't there and return
|
349
|
+
# the entire payload uncompressed.
|
350
|
+
Snappy.inflate(data)
|
351
|
+
end
|
352
|
+
rescue Snappy::Error
|
353
|
+
# older versions of avro-ruby didn't write the checksum, so removing
|
354
|
+
# the last 4 bytes may cause Snappy to fail. recover by assuming the
|
355
|
+
# payload is from an older file and uncompress the entire buffer.
|
341
356
|
Snappy.inflate(data)
|
342
357
|
end
|
343
358
|
|
344
359
|
def compress(data)
|
345
360
|
load_snappy!
|
346
|
-
|
361
|
+
crc32 = Zlib.crc32(data)
|
362
|
+
compressed = Snappy.deflate(data)
|
363
|
+
[compressed, crc32].pack('a*N')
|
347
364
|
end
|
348
365
|
|
349
366
|
private
|
data/lib/avro/io.rb
CHANGED
@@ -221,46 +221,7 @@ module Avro
|
|
221
221
|
|
222
222
|
class DatumReader
|
223
223
|
def self.match_schemas(writers_schema, readers_schema)
|
224
|
-
|
225
|
-
r_type = readers_schema.type_sym
|
226
|
-
|
227
|
-
# This conditional is begging for some OO love.
|
228
|
-
if w_type == :union || r_type == :union
|
229
|
-
return true
|
230
|
-
end
|
231
|
-
|
232
|
-
if w_type == r_type
|
233
|
-
return true if Schema::PRIMITIVE_TYPES_SYM.include?(r_type)
|
234
|
-
|
235
|
-
case r_type
|
236
|
-
when :record
|
237
|
-
return writers_schema.fullname == readers_schema.fullname
|
238
|
-
when :error
|
239
|
-
return writers_schema.fullname == readers_schema.fullname
|
240
|
-
when :request
|
241
|
-
return true
|
242
|
-
when :fixed
|
243
|
-
return writers_schema.fullname == readers_schema.fullname &&
|
244
|
-
writers_schema.size == readers_schema.size
|
245
|
-
when :enum
|
246
|
-
return writers_schema.fullname == readers_schema.fullname
|
247
|
-
when :map
|
248
|
-
return writers_schema.values.type == readers_schema.values.type
|
249
|
-
when :array
|
250
|
-
return writers_schema.items.type == readers_schema.items.type
|
251
|
-
end
|
252
|
-
end
|
253
|
-
|
254
|
-
# Handle schema promotion
|
255
|
-
if w_type == :int && [:long, :float, :double].include?(r_type)
|
256
|
-
return true
|
257
|
-
elsif w_type == :long && [:float, :double].include?(r_type)
|
258
|
-
return true
|
259
|
-
elsif w_type == :float && r_type == :double
|
260
|
-
return true
|
261
|
-
end
|
262
|
-
|
263
|
-
return false
|
224
|
+
Avro::SchemaCompatibility.match_schemas(writers_schema, readers_schema)
|
264
225
|
end
|
265
226
|
|
266
227
|
attr_accessor :writers_schema, :readers_schema
|
@@ -395,11 +356,11 @@ module Avro
|
|
395
356
|
writers_fields_hash = writers_schema.fields_hash
|
396
357
|
readers_fields_hash.each do |field_name, field|
|
397
358
|
unless writers_fields_hash.has_key? field_name
|
398
|
-
if
|
359
|
+
if field.default?
|
399
360
|
field_val = read_default_value(field.type, field.default)
|
400
361
|
read_record[field.name] = field_val
|
401
362
|
else
|
402
|
-
#
|
363
|
+
raise AvroError, "Missing data for #{field.type} with no default"
|
403
364
|
end
|
404
365
|
end
|
405
366
|
end
|
@@ -409,6 +370,10 @@ module Avro
|
|
409
370
|
end
|
410
371
|
|
411
372
|
def read_default_value(field_schema, default_value)
|
373
|
+
if default_value == :no_default
|
374
|
+
raise AvroError, "Missing data for #{field_schema} with no default"
|
375
|
+
end
|
376
|
+
|
412
377
|
# Basically a JSON Decoder?
|
413
378
|
case field_schema.type_sym
|
414
379
|
when :null
|
data/lib/avro/schema.rb
CHANGED
@@ -164,6 +164,18 @@ module Avro
|
|
164
164
|
Digest::SHA256.hexdigest(parsing_form).to_i(16)
|
165
165
|
end
|
166
166
|
|
167
|
+
def read?(writers_schema)
|
168
|
+
SchemaCompatibility.can_read?(writers_schema, self)
|
169
|
+
end
|
170
|
+
|
171
|
+
def be_read?(other_schema)
|
172
|
+
other_schema.read?(self)
|
173
|
+
end
|
174
|
+
|
175
|
+
def mutual_read?(other_schema)
|
176
|
+
SchemaCompatibility.mutual_read?(other_schema, self)
|
177
|
+
end
|
178
|
+
|
167
179
|
def ==(other, seen=nil)
|
168
180
|
other.is_a?(Schema) && type_sym == other.type_sym
|
169
181
|
end
|
@@ -228,7 +240,7 @@ module Avro
|
|
228
240
|
if field.respond_to?(:[]) # TODO(jmhodges) wtffffff
|
229
241
|
type = field['type']
|
230
242
|
name = field['name']
|
231
|
-
default = field['default']
|
243
|
+
default = field.key?('default') ? field['default'] : :no_default
|
232
244
|
order = field['order']
|
233
245
|
new_field = Field.new(type, name, default, order, names, namespace)
|
234
246
|
# make sure field name has not been used yet
|
@@ -251,7 +263,11 @@ module Avro
|
|
251
263
|
else
|
252
264
|
super(schema_type, name, namespace, names)
|
253
265
|
end
|
254
|
-
@fields =
|
266
|
+
@fields = if fields
|
267
|
+
RecordSchema.make_field_objects(fields, names, self.namespace)
|
268
|
+
else
|
269
|
+
{}
|
270
|
+
end
|
255
271
|
end
|
256
272
|
|
257
273
|
def fields_hash
|
@@ -302,8 +318,7 @@ module Avro
|
|
302
318
|
def initialize(schemas, names=nil, default_namespace=nil)
|
303
319
|
super(:union)
|
304
320
|
|
305
|
-
|
306
|
-
schemas.each_with_index do |schema, i|
|
321
|
+
@schemas = schemas.each_with_object([]) do |schema, schema_objects|
|
307
322
|
new_schema = subparse(schema, names, default_namespace)
|
308
323
|
ns_type = new_schema.type_sym
|
309
324
|
|
@@ -316,7 +331,6 @@ module Avro
|
|
316
331
|
else
|
317
332
|
schema_objects << new_schema
|
318
333
|
end
|
319
|
-
@schemas = schema_objects
|
320
334
|
end
|
321
335
|
end
|
322
336
|
|
@@ -380,16 +394,20 @@ module Avro
|
|
380
394
|
class Field < Schema
|
381
395
|
attr_reader :type, :name, :default, :order
|
382
396
|
|
383
|
-
def initialize(type, name, default
|
397
|
+
def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil)
|
384
398
|
@type = subparse(type, names, namespace)
|
385
399
|
@name = name
|
386
400
|
@default = default
|
387
401
|
@order = order
|
388
402
|
end
|
389
403
|
|
404
|
+
def default?
|
405
|
+
@default != :no_default
|
406
|
+
end
|
407
|
+
|
390
408
|
def to_avro(names=Set.new)
|
391
409
|
{'name' => name, 'type' => type.to_avro(names)}.tap do |avro|
|
392
|
-
avro['default'] = default if default
|
410
|
+
avro['default'] = default if default?
|
393
411
|
avro['order'] = order if order
|
394
412
|
end
|
395
413
|
end
|
data/test/test_io.rb
CHANGED
@@ -352,8 +352,71 @@ EOS
|
|
352
352
|
assert_equal(incorrect, 0)
|
353
353
|
end
|
354
354
|
end
|
355
|
+
|
356
|
+
def test_interchangeable_schemas
|
357
|
+
interchangeable_schemas = ['"string"', '"bytes"']
|
358
|
+
incorrect = 0
|
359
|
+
interchangeable_schemas.each_with_index do |ws, i|
|
360
|
+
writers_schema = Avro::Schema.parse(ws)
|
361
|
+
datum_to_write = 'foo'
|
362
|
+
readers_schema = Avro::Schema.parse(interchangeable_schemas[i == 0 ? 1 : 0])
|
363
|
+
writer, * = write_datum(datum_to_write, writers_schema)
|
364
|
+
datum_read = read_datum(writer, writers_schema, readers_schema)
|
365
|
+
if datum_read != datum_to_write
|
366
|
+
incorrect += 1
|
367
|
+
end
|
368
|
+
end
|
369
|
+
assert_equal(incorrect, 0)
|
370
|
+
end
|
371
|
+
|
372
|
+
def test_array_schema_promotion
|
373
|
+
writers_schema = Avro::Schema.parse('{"type":"array", "items":"int"}')
|
374
|
+
readers_schema = Avro::Schema.parse('{"type":"array", "items":"long"}')
|
375
|
+
datum_to_write = [1, 2]
|
376
|
+
writer, * = write_datum(datum_to_write, writers_schema)
|
377
|
+
datum_read = read_datum(writer, writers_schema, readers_schema)
|
378
|
+
assert_equal(datum_read, datum_to_write)
|
379
|
+
end
|
380
|
+
|
381
|
+
def test_map_schema_promotion
|
382
|
+
writers_schema = Avro::Schema.parse('{"type":"map", "values":"int"}')
|
383
|
+
readers_schema = Avro::Schema.parse('{"type":"map", "values":"long"}')
|
384
|
+
datum_to_write = { 'foo' => 1, 'bar' => 2 }
|
385
|
+
writer, * = write_datum(datum_to_write, writers_schema)
|
386
|
+
datum_read = read_datum(writer, writers_schema, readers_schema)
|
387
|
+
assert_equal(datum_read, datum_to_write)
|
388
|
+
end
|
389
|
+
|
390
|
+
def test_snappy_backward_compat
|
391
|
+
# a snappy-compressed block payload without the checksum
|
392
|
+
# this has no back-references, just one literal so the last 9
|
393
|
+
# bytes are the uncompressed payload.
|
394
|
+
old_snappy_bytes = "\x09\x20\x02\x06\x02\x0a\x67\x72\x65\x65\x6e"
|
395
|
+
uncompressed_bytes = "\x02\x06\x02\x0a\x67\x72\x65\x65\x6e"
|
396
|
+
snappy = Avro::DataFile::SnappyCodec.new
|
397
|
+
assert_equal(uncompressed_bytes, snappy.decompress(old_snappy_bytes))
|
398
|
+
end
|
399
|
+
|
355
400
|
private
|
356
401
|
|
402
|
+
def check_no_default(schema_json)
|
403
|
+
actual_schema = '{"type": "record", "name": "Foo", "fields": []}'
|
404
|
+
actual = Avro::Schema.parse(actual_schema)
|
405
|
+
|
406
|
+
expected_schema = <<EOS
|
407
|
+
{"type": "record",
|
408
|
+
"name": "Foo",
|
409
|
+
"fields": [{"name": "f", "type": #{schema_json}}]}
|
410
|
+
EOS
|
411
|
+
expected = Avro::Schema.parse(expected_schema)
|
412
|
+
|
413
|
+
reader = Avro::IO::DatumReader.new(actual, expected)
|
414
|
+
assert_raise Avro::AvroError do
|
415
|
+
value = reader.read(Avro::IO::BinaryDecoder.new(StringIO.new))
|
416
|
+
assert_not_equal(value, :no_default) # should never return this
|
417
|
+
end
|
418
|
+
end
|
419
|
+
|
357
420
|
def check_default(schema_json, default_json, default_value)
|
358
421
|
actual_schema = '{"type": "record", "name": "Foo", "fields": []}'
|
359
422
|
actual = Avro::Schema.parse(actual_schema)
|
@@ -393,6 +456,9 @@ EOS
|
|
393
456
|
|
394
457
|
# test writing of data to file
|
395
458
|
check_datafile(schema)
|
459
|
+
|
460
|
+
# check that AvroError is raised when there is no default
|
461
|
+
check_no_default(str)
|
396
462
|
end
|
397
463
|
|
398
464
|
def checkser(schm, randomdata)
|
data/test/test_schema.rb
CHANGED
@@ -158,4 +158,55 @@ class TestSchema < Test::Unit::TestCase
|
|
158
158
|
|
159
159
|
assert_equal '"MissingType" is not a schema we know about.', error.message
|
160
160
|
end
|
161
|
+
|
162
|
+
def test_to_avro_handles_falsey_defaults
|
163
|
+
schema = Avro::Schema.parse <<-SCHEMA
|
164
|
+
{"type": "record", "name": "Record", "namespace": "my.name.space",
|
165
|
+
"fields": [
|
166
|
+
{"name": "is_usable", "type": "boolean", "default": false}
|
167
|
+
]
|
168
|
+
}
|
169
|
+
SCHEMA
|
170
|
+
|
171
|
+
assert_equal schema.to_avro, {
|
172
|
+
'type' => 'record', 'name' => 'Record', 'namespace' => 'my.name.space',
|
173
|
+
'fields' => [
|
174
|
+
{'name' => 'is_usable', 'type' => 'boolean', 'default' => false}
|
175
|
+
]
|
176
|
+
}
|
177
|
+
end
|
178
|
+
|
179
|
+
def test_empty_record
|
180
|
+
schema = Avro::Schema.parse('{"type":"record", "name":"Empty"}')
|
181
|
+
assert_empty(schema.fields)
|
182
|
+
end
|
183
|
+
|
184
|
+
def test_empty_union
|
185
|
+
schema = Avro::Schema.parse('[]')
|
186
|
+
assert_equal(schema.to_s, '[]')
|
187
|
+
end
|
188
|
+
|
189
|
+
def test_read
|
190
|
+
schema = Avro::Schema.parse('"string"')
|
191
|
+
writer_schema = Avro::Schema.parse('"int"')
|
192
|
+
assert_false(schema.read?(writer_schema))
|
193
|
+
assert_true(schema.read?(schema))
|
194
|
+
end
|
195
|
+
|
196
|
+
def test_be_read
|
197
|
+
schema = Avro::Schema.parse('"string"')
|
198
|
+
writer_schema = Avro::Schema.parse('"int"')
|
199
|
+
assert_false(schema.be_read?(writer_schema))
|
200
|
+
assert_true(schema.be_read?(schema))
|
201
|
+
end
|
202
|
+
|
203
|
+
def test_mutual_read
|
204
|
+
schema = Avro::Schema.parse('"string"')
|
205
|
+
writer_schema = Avro::Schema.parse('"int"')
|
206
|
+
default1 = Avro::Schema.parse('{"type":"record", "name":"Default", "fields":[{"name":"i", "type":"int", "default": 1}]}')
|
207
|
+
default2 = Avro::Schema.parse('{"type":"record", "name":"Default", "fields":[{"name:":"s", "type":"string", "default": ""}]}')
|
208
|
+
assert_false(schema.mutual_read?(writer_schema))
|
209
|
+
assert_true(schema.mutual_read?(schema))
|
210
|
+
assert_true(default1.mutual_read?(default2))
|
211
|
+
end
|
161
212
|
end
|
@@ -0,0 +1,463 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
# See the License for the specific language governing permissions and
|
15
|
+
# limitations under the License.
|
16
|
+
|
17
|
+
require 'test_help'
|
18
|
+
|
19
|
+
class TestSchemaCompatibility < Test::Unit::TestCase
|
20
|
+
|
21
|
+
def test_primitive_schema_compatibility
|
22
|
+
Avro::Schema::PRIMITIVE_TYPES.each do |schema_type|
|
23
|
+
assert_true(can_read?(send("#{schema_type}_schema"), send("#{schema_type}_schema")))
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_compatible_reader_writer_pairs
|
28
|
+
[
|
29
|
+
long_schema, int_schema,
|
30
|
+
float_schema, int_schema,
|
31
|
+
float_schema, long_schema,
|
32
|
+
double_schema, long_schema,
|
33
|
+
double_schema, int_schema,
|
34
|
+
double_schema, float_schema,
|
35
|
+
|
36
|
+
int_array_schema, int_array_schema,
|
37
|
+
long_array_schema, int_array_schema,
|
38
|
+
int_map_schema, int_map_schema,
|
39
|
+
long_map_schema, int_map_schema,
|
40
|
+
|
41
|
+
enum1_ab_schema, enum1_ab_schema,
|
42
|
+
enum1_abc_schema, enum1_ab_schema,
|
43
|
+
|
44
|
+
string_schema, bytes_schema,
|
45
|
+
bytes_schema, string_schema,
|
46
|
+
|
47
|
+
empty_union_schema, empty_union_schema,
|
48
|
+
int_union_schema, int_union_schema,
|
49
|
+
int_string_union_schema, string_int_union_schema,
|
50
|
+
int_union_schema, empty_union_schema,
|
51
|
+
long_union_schema, int_union_schema,
|
52
|
+
|
53
|
+
int_union_schema, int_schema,
|
54
|
+
int_schema, int_union_schema,
|
55
|
+
|
56
|
+
empty_record1_schema, empty_record1_schema,
|
57
|
+
empty_record1_schema, a_int_record1_schema,
|
58
|
+
|
59
|
+
a_int_record1_schema, a_int_record1_schema,
|
60
|
+
a_dint_record1_schema, a_int_record1_schema,
|
61
|
+
a_dint_record1_schema, a_dint_record1_schema,
|
62
|
+
a_int_record1_schema, a_dint_record1_schema,
|
63
|
+
|
64
|
+
a_long_record1_schema, a_int_record1_schema,
|
65
|
+
|
66
|
+
a_int_record1_schema, a_int_b_int_record1_schema,
|
67
|
+
a_dint_record1_schema, a_int_b_int_record1_schema,
|
68
|
+
|
69
|
+
a_int_b_dint_record1_schema, a_int_record1_schema,
|
70
|
+
a_dint_b_dint_record1_schema, empty_record1_schema,
|
71
|
+
a_dint_b_dint_record1_schema, a_int_record1_schema,
|
72
|
+
a_int_b_int_record1_schema, a_dint_b_dint_record1_schema,
|
73
|
+
|
74
|
+
int_list_record_schema, int_list_record_schema,
|
75
|
+
long_list_record_schema, long_list_record_schema,
|
76
|
+
long_list_record_schema, int_list_record_schema,
|
77
|
+
|
78
|
+
null_schema, null_schema
|
79
|
+
].each_slice(2) do |(reader, writer)|
|
80
|
+
assert_true(can_read?(writer, reader), "expecting #{reader} to read #{writer}")
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_broken
|
85
|
+
assert_false(can_read?(int_string_union_schema, int_union_schema))
|
86
|
+
end
|
87
|
+
|
88
|
+
def test_incompatible_reader_writer_pairs
|
89
|
+
[
|
90
|
+
null_schema, int_schema,
|
91
|
+
null_schema, long_schema,
|
92
|
+
|
93
|
+
boolean_schema, int_schema,
|
94
|
+
|
95
|
+
int_schema, null_schema,
|
96
|
+
int_schema, boolean_schema,
|
97
|
+
int_schema, long_schema,
|
98
|
+
int_schema, float_schema,
|
99
|
+
int_schema, double_schema,
|
100
|
+
|
101
|
+
long_schema, float_schema,
|
102
|
+
long_schema, double_schema,
|
103
|
+
|
104
|
+
float_schema, double_schema,
|
105
|
+
|
106
|
+
string_schema, boolean_schema,
|
107
|
+
string_schema, int_schema,
|
108
|
+
|
109
|
+
bytes_schema, null_schema,
|
110
|
+
bytes_schema, int_schema,
|
111
|
+
|
112
|
+
int_array_schema, long_array_schema,
|
113
|
+
int_map_schema, int_array_schema,
|
114
|
+
int_array_schema, int_map_schema,
|
115
|
+
int_map_schema, long_map_schema,
|
116
|
+
|
117
|
+
enum1_ab_schema, enum1_abc_schema,
|
118
|
+
enum1_bc_schema, enum1_abc_schema,
|
119
|
+
|
120
|
+
enum1_ab_schema, enum2_ab_schema,
|
121
|
+
int_schema, enum2_ab_schema,
|
122
|
+
enum2_ab_schema, int_schema,
|
123
|
+
|
124
|
+
int_union_schema, int_string_union_schema,
|
125
|
+
string_union_schema, int_string_union_schema,
|
126
|
+
|
127
|
+
empty_record2_schema, empty_record1_schema,
|
128
|
+
a_int_record1_schema, empty_record1_schema,
|
129
|
+
a_int_b_dint_record1_schema, empty_record1_schema,
|
130
|
+
|
131
|
+
int_list_record_schema, long_list_record_schema,
|
132
|
+
|
133
|
+
null_schema, int_schema
|
134
|
+
].each_slice(2) do |(reader, writer)|
|
135
|
+
assert_false(can_read?(writer, reader), "expecting #{reader} not to read #{writer}")
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def writer_schema
|
140
|
+
Avro::Schema.parse <<-SCHEMA
|
141
|
+
{"type":"record", "name":"Record", "fields":[
|
142
|
+
{"name":"oldfield1", "type":"int"},
|
143
|
+
{"name":"oldfield2", "type":"string"}
|
144
|
+
]}
|
145
|
+
SCHEMA
|
146
|
+
end
|
147
|
+
|
148
|
+
def test_missing_field
|
149
|
+
reader_schema = Avro::Schema.parse <<-SCHEMA
|
150
|
+
{"type":"record", "name":"Record", "fields":[
|
151
|
+
{"name":"oldfield1", "type":"int"}
|
152
|
+
]}
|
153
|
+
SCHEMA
|
154
|
+
assert_true(can_read?(writer_schema, reader_schema))
|
155
|
+
assert_false(can_read?(reader_schema, writer_schema))
|
156
|
+
end
|
157
|
+
|
158
|
+
def test_missing_second_field
|
159
|
+
reader_schema = Avro::Schema.parse <<-SCHEMA
|
160
|
+
{"type":"record", "name":"Record", "fields":[
|
161
|
+
{"name":"oldfield2", "type":"string"}
|
162
|
+
]}
|
163
|
+
SCHEMA
|
164
|
+
assert_true(can_read?(writer_schema, reader_schema))
|
165
|
+
assert_false(can_read?(reader_schema, writer_schema))
|
166
|
+
end
|
167
|
+
|
168
|
+
def test_all_fields
|
169
|
+
reader_schema = Avro::Schema.parse <<-SCHEMA
|
170
|
+
{"type":"record", "name":"Record", "fields":[
|
171
|
+
{"name":"oldfield1", "type":"int"},
|
172
|
+
{"name":"oldfield2", "type":"string"}
|
173
|
+
]}
|
174
|
+
SCHEMA
|
175
|
+
assert_true(can_read?(writer_schema, reader_schema))
|
176
|
+
assert_true(can_read?(reader_schema, writer_schema))
|
177
|
+
end
|
178
|
+
|
179
|
+
def test_new_field_with_default
|
180
|
+
reader_schema = Avro::Schema.parse <<-SCHEMA
|
181
|
+
{"type":"record", "name":"Record", "fields":[
|
182
|
+
{"name":"oldfield1", "type":"int"},
|
183
|
+
{"name":"newfield1", "type":"int", "default":42}
|
184
|
+
]}
|
185
|
+
SCHEMA
|
186
|
+
assert_true(can_read?(writer_schema, reader_schema))
|
187
|
+
assert_false(can_read?(reader_schema, writer_schema))
|
188
|
+
end
|
189
|
+
|
190
|
+
def test_new_field
|
191
|
+
reader_schema = Avro::Schema.parse <<-SCHEMA
|
192
|
+
{"type":"record", "name":"Record", "fields":[
|
193
|
+
{"name":"oldfield1", "type":"int"},
|
194
|
+
{"name":"newfield1", "type":"int"}
|
195
|
+
]}
|
196
|
+
SCHEMA
|
197
|
+
assert_false(can_read?(writer_schema, reader_schema))
|
198
|
+
assert_false(can_read?(reader_schema, writer_schema))
|
199
|
+
end
|
200
|
+
|
201
|
+
def test_array_writer_schema
|
202
|
+
valid_reader = string_array_schema
|
203
|
+
invalid_reader = string_map_schema
|
204
|
+
|
205
|
+
assert_true(can_read?(string_array_schema, valid_reader))
|
206
|
+
assert_false(can_read?(string_array_schema, invalid_reader))
|
207
|
+
end
|
208
|
+
|
209
|
+
def test_primitive_writer_schema
|
210
|
+
valid_reader = string_schema
|
211
|
+
assert_true(can_read?(string_schema, valid_reader))
|
212
|
+
assert_false(can_read?(int_schema, string_schema))
|
213
|
+
end
|
214
|
+
|
215
|
+
def test_union_reader_writer_subset_incompatiblity
|
216
|
+
# reader union schema must contain all writer union branches
|
217
|
+
union_writer = union_schema(int_schema, string_schema)
|
218
|
+
union_reader = union_schema(string_schema)
|
219
|
+
|
220
|
+
assert_false(can_read?(union_writer, union_reader))
|
221
|
+
assert_true(can_read?(union_reader, union_writer))
|
222
|
+
end
|
223
|
+
|
224
|
+
def test_incompatible_record_field
|
225
|
+
string_schema = Avro::Schema.parse <<-SCHEMA
|
226
|
+
{"type":"record", "name":"MyRecord", "namespace":"ns", "fields": [
|
227
|
+
{"name":"field1", "type":"string"}
|
228
|
+
]}
|
229
|
+
SCHEMA
|
230
|
+
int_schema = Avro::Schema.parse <<-SCHEMA2
|
231
|
+
{"type":"record", "name":"MyRecord", "namespace":"ns", "fields": [
|
232
|
+
{"name":"field1", "type":"int"}
|
233
|
+
]}
|
234
|
+
SCHEMA2
|
235
|
+
assert_false(can_read?(string_schema, int_schema))
|
236
|
+
end
|
237
|
+
|
238
|
+
def test_enum_symbols
|
239
|
+
enum_schema1 = Avro::Schema.parse <<-SCHEMA
|
240
|
+
{"type":"enum", "name":"MyEnum", "symbols":["A","B"]}
|
241
|
+
SCHEMA
|
242
|
+
enum_schema2 = Avro::Schema.parse <<-SCHEMA
|
243
|
+
{"type":"enum", "name":"MyEnum", "symbols":["A","B","C"]}
|
244
|
+
SCHEMA
|
245
|
+
assert_false(can_read?(enum_schema2, enum_schema1))
|
246
|
+
assert_true(can_read?(enum_schema1, enum_schema2))
|
247
|
+
end
|
248
|
+
|
249
|
+
# Tests from lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator2.java
|
250
|
+
|
251
|
+
def point_2d_schema
|
252
|
+
Avro::Schema.parse <<-SCHEMA
|
253
|
+
{"type":"record", "name":"Point2D", "fields":[
|
254
|
+
{"name":"x", "type":"double"},
|
255
|
+
{"name":"y", "type":"double"}
|
256
|
+
]}
|
257
|
+
SCHEMA
|
258
|
+
end
|
259
|
+
|
260
|
+
def point_2d_fullname_schema
|
261
|
+
Avro::Schema.parse <<-SCHEMA
|
262
|
+
{"type":"record", "name":"Point", "namespace":"written", "fields":[
|
263
|
+
{"name":"x", "type":"double"},
|
264
|
+
{"name":"y", "type":"double"}
|
265
|
+
]}
|
266
|
+
SCHEMA
|
267
|
+
end
|
268
|
+
|
269
|
+
def point_3d_no_default_schema
|
270
|
+
Avro::Schema.parse <<-SCHEMA
|
271
|
+
{"type":"record", "name":"Point", "fields":[
|
272
|
+
{"name":"x", "type":"double"},
|
273
|
+
{"name":"y", "type":"double"},
|
274
|
+
{"name":"z", "type":"double"}
|
275
|
+
]}
|
276
|
+
SCHEMA
|
277
|
+
end
|
278
|
+
|
279
|
+
def point_3d_schema
|
280
|
+
Avro::Schema.parse <<-SCHEMA
|
281
|
+
{"type":"record", "name":"Point3D", "fields":[
|
282
|
+
{"name":"x", "type":"double"},
|
283
|
+
{"name":"y", "type":"double"},
|
284
|
+
{"name":"z", "type":"double", "default": 0.0}
|
285
|
+
]}
|
286
|
+
SCHEMA
|
287
|
+
end
|
288
|
+
|
289
|
+
def point_3d_match_name_schema
|
290
|
+
Avro::Schema.parse <<-SCHEMA
|
291
|
+
{"type":"record", "name":"Point", "fields":[
|
292
|
+
{"name":"x", "type":"double"},
|
293
|
+
{"name":"y", "type":"double"},
|
294
|
+
{"name":"z", "type":"double", "default": 0.0}
|
295
|
+
]}
|
296
|
+
SCHEMA
|
297
|
+
end
|
298
|
+
|
299
|
+
def test_union_resolution_no_structure_match
|
300
|
+
# short name match, but no structure match
|
301
|
+
read_schema = union_schema(null_schema, point_3d_no_default_schema)
|
302
|
+
assert_false(can_read?(point_2d_fullname_schema, read_schema))
|
303
|
+
end
|
304
|
+
|
305
|
+
def test_union_resolution_first_structure_match_2d
|
306
|
+
# multiple structure matches with no name matches
|
307
|
+
read_schema = union_schema(null_schema, point_3d_no_default_schema, point_2d_schema, point_3d_schema)
|
308
|
+
assert_false(can_read?(point_2d_fullname_schema, read_schema))
|
309
|
+
end
|
310
|
+
|
311
|
+
def test_union_resolution_first_structure_match_3d
|
312
|
+
# multiple structure matches with no name matches
|
313
|
+
read_schema = union_schema(null_schema, point_3d_no_default_schema, point_3d_schema, point_2d_schema)
|
314
|
+
assert_false(can_read?(point_2d_fullname_schema, read_schema))
|
315
|
+
end
|
316
|
+
|
317
|
+
def test_union_resolution_named_structure_match
|
318
|
+
# multiple structure matches with a short name match
|
319
|
+
read_schema = union_schema(null_schema, point_2d_schema, point_3d_match_name_schema, point_3d_schema)
|
320
|
+
assert_false(can_read?(point_2d_fullname_schema, read_schema))
|
321
|
+
end
|
322
|
+
|
323
|
+
def test_union_resolution_full_name_match
|
324
|
+
# there is a full name match that should be chosen
|
325
|
+
read_schema = union_schema(null_schema, point_2d_schema, point_3d_match_name_schema, point_3d_schema, point_2d_fullname_schema)
|
326
|
+
assert_true(can_read?(point_2d_fullname_schema, read_schema))
|
327
|
+
end
|
328
|
+
|
329
|
+
def can_read?(writer, reader)
|
330
|
+
Avro::SchemaCompatibility.can_read?(writer, reader)
|
331
|
+
end
|
332
|
+
|
333
|
+
def union_schema(*schemas)
|
334
|
+
schemas ||= []
|
335
|
+
Avro::Schema.parse("[#{schemas.map(&:to_s).join(',')}]")
|
336
|
+
end
|
337
|
+
|
338
|
+
Avro::Schema::PRIMITIVE_TYPES.each do |schema_type|
|
339
|
+
define_method("#{schema_type}_schema") do
|
340
|
+
Avro::Schema.parse("\"#{schema_type}\"")
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
def int_array_schema
|
345
|
+
Avro::Schema.parse('{"type":"array", "items":"int"}')
|
346
|
+
end
|
347
|
+
|
348
|
+
def long_array_schema
|
349
|
+
Avro::Schema.parse('{"type":"array", "items":"long"}')
|
350
|
+
end
|
351
|
+
|
352
|
+
def string_array_schema
|
353
|
+
Avro::Schema.parse('{"type":"array", "items":"string"}')
|
354
|
+
end
|
355
|
+
|
356
|
+
def int_map_schema
|
357
|
+
Avro::Schema.parse('{"type":"map", "values":"int"}')
|
358
|
+
end
|
359
|
+
|
360
|
+
def long_map_schema
|
361
|
+
Avro::Schema.parse('{"type":"map", "values":"long"}')
|
362
|
+
end
|
363
|
+
|
364
|
+
def string_map_schema
|
365
|
+
Avro::Schema.parse('{"type":"map", "values":"string"}')
|
366
|
+
end
|
367
|
+
|
368
|
+
def enum1_ab_schema
|
369
|
+
Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["A","B"]}')
|
370
|
+
end
|
371
|
+
|
372
|
+
def enum1_abc_schema
|
373
|
+
Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["A","B","C"]}')
|
374
|
+
end
|
375
|
+
|
376
|
+
def enum1_bc_schema
|
377
|
+
Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["B","C"]}')
|
378
|
+
end
|
379
|
+
|
380
|
+
def enum2_ab_schema
|
381
|
+
Avro::Schema.parse('{"type":"enum", "name":"Enum2", "symbols":["A","B"]}')
|
382
|
+
end
|
383
|
+
|
384
|
+
def empty_record1_schema
|
385
|
+
Avro::Schema.parse('{"type":"record", "name":"Record1"}')
|
386
|
+
end
|
387
|
+
|
388
|
+
def empty_record2_schema
|
389
|
+
Avro::Schema.parse('{"type":"record", "name":"Record2"}')
|
390
|
+
end
|
391
|
+
|
392
|
+
def a_int_record1_schema
|
393
|
+
Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int"}]}')
|
394
|
+
end
|
395
|
+
|
396
|
+
def a_long_record1_schema
|
397
|
+
Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"long"}]}')
|
398
|
+
end
|
399
|
+
|
400
|
+
def a_int_b_int_record1_schema
|
401
|
+
Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int"}, {"name":"b", "type":"int"}]}')
|
402
|
+
end
|
403
|
+
|
404
|
+
def a_dint_record1_schema
|
405
|
+
Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int", "default":0}]}')
|
406
|
+
end
|
407
|
+
|
408
|
+
def a_int_b_dint_record1_schema
|
409
|
+
Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int"}, {"name":"b", "type":"int", "default":0}]}')
|
410
|
+
end
|
411
|
+
|
412
|
+
def a_dint_b_dint_record1_schema
|
413
|
+
Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int", "default":0}, {"name":"b", "type":"int", "default":0}]}')
|
414
|
+
end
|
415
|
+
|
416
|
+
def int_list_record_schema
|
417
|
+
Avro::Schema.parse <<-SCHEMA
|
418
|
+
{
|
419
|
+
"type":"record", "name":"List", "fields": [
|
420
|
+
{"name": "head", "type": "int"},
|
421
|
+
{"name": "tail", "type": "List"}
|
422
|
+
]}
|
423
|
+
SCHEMA
|
424
|
+
end
|
425
|
+
|
426
|
+
def long_list_record_schema
|
427
|
+
Avro::Schema.parse <<-SCHEMA
|
428
|
+
{
|
429
|
+
"type":"record", "name":"List", "fields": [
|
430
|
+
{"name": "head", "type": "long"},
|
431
|
+
{"name": "tail", "type": "List"}
|
432
|
+
]}
|
433
|
+
SCHEMA
|
434
|
+
end
|
435
|
+
|
436
|
+
def empty_union_schema
|
437
|
+
union_schema
|
438
|
+
end
|
439
|
+
|
440
|
+
def null_union_schema
|
441
|
+
union_schema(null_schema)
|
442
|
+
end
|
443
|
+
|
444
|
+
def int_union_schema
|
445
|
+
union_schema(int_schema)
|
446
|
+
end
|
447
|
+
|
448
|
+
def long_union_schema
|
449
|
+
union_schema(long_schema)
|
450
|
+
end
|
451
|
+
|
452
|
+
def string_union_schema
|
453
|
+
union_schema(string_schema)
|
454
|
+
end
|
455
|
+
|
456
|
+
def int_string_union_schema
|
457
|
+
union_schema(int_schema, string_schema)
|
458
|
+
end
|
459
|
+
|
460
|
+
def string_int_union_schema
|
461
|
+
union_schema(string_schema, int_schema)
|
462
|
+
end
|
463
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: avro-salsify-fork
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.9.0.
|
4
|
+
version: 1.9.0.3.pre1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Apache Software Foundation / Salsify Engineering
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-12-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: multi_json
|
@@ -72,6 +72,7 @@ files:
|
|
72
72
|
- test/test_logical_types.rb
|
73
73
|
- test/test_protocol.rb
|
74
74
|
- test/test_schema.rb
|
75
|
+
- test/test_schema_compatibility.rb
|
75
76
|
- test/test_schema_normalization.rb
|
76
77
|
- test/test_socket_transport.rb
|
77
78
|
- test/tool.rb
|
@@ -110,5 +111,6 @@ test_files:
|
|
110
111
|
- test/test_logical_types.rb
|
111
112
|
- test/test_protocol.rb
|
112
113
|
- test/test_schema.rb
|
114
|
+
- test/test_schema_compatibility.rb
|
113
115
|
- test/test_schema_normalization.rb
|
114
116
|
- test/test_socket_transport.rb
|