avro 1.9.1 → 1.10.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest +1 -0
- data/Rakefile +10 -13
- data/avro.gemspec +10 -10
- data/interop/test_interop.rb +12 -2
- data/lib/avro.rb +14 -2
- data/lib/avro/VERSION.txt +1 -0
- data/lib/avro/data_file.rb +23 -0
- data/lib/avro/io.rb +23 -21
- data/lib/avro/ipc.rb +2 -2
- data/lib/avro/schema.rb +170 -27
- data/lib/avro/schema_compatibility.rb +16 -11
- data/test/case_finder.rb +8 -3
- data/test/random_data.rb +3 -2
- data/test/sample_ipc_client.rb +0 -0
- data/test/sample_ipc_http_client.rb +0 -0
- data/test/sample_ipc_http_server.rb +0 -0
- data/test/sample_ipc_server.rb +0 -0
- data/test/test_datafile.rb +13 -0
- data/test/test_fingerprints.rb +19 -0
- data/test/test_io.rb +82 -1
- data/test/test_schema.rb +268 -0
- data/test/test_schema_compatibility.rb +68 -0
- data/test/test_schema_normalization.rb +1 -0
- data/test/test_schema_validator.rb +2 -2
- data/test/tool.rb +3 -3
- metadata +16 -14
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 25662687b72649ae6bbc6a18a975ada73965b60d
|
4
|
+
data.tar.gz: 8ffb726920396bc1644c499440a1f7b844deb404
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0542c4933a9cd95411c76b26cb88e456c1cd733749eccff6125058ecbf64fd184f64f7547f0b24376b80ba3b8349937926dfbd85a159d7b51e587ec439e59ef8
|
7
|
+
data.tar.gz: 76e8c8d2f1f30199a343fccfda5a4f6b972c6b33cef99b324a61a97554baebae59b62b8eec14682e6f10015d48c4740fb00f3c58fefdff39a313bdbbebbc9066
|
data/Manifest
CHANGED
data/Rakefile
CHANGED
@@ -17,14 +17,15 @@
|
|
17
17
|
require 'rubygems'
|
18
18
|
require 'echoe'
|
19
19
|
VERSION = File.open('../../share/VERSION.txt').read.sub('-SNAPSHOT', '.pre1').chomp
|
20
|
+
File.write("lib/avro/VERSION.txt", VERSION)
|
20
21
|
Echoe.new('avro', VERSION) do |p|
|
21
22
|
p.author = "Apache Software Foundation"
|
22
23
|
p.email = "dev@avro.apache.org"
|
23
24
|
p.summary = "Apache Avro for Ruby"
|
24
25
|
p.description = "Avro is a data serialization and RPC format"
|
25
26
|
p.url = "https://avro.apache.org/"
|
26
|
-
p.runtime_dependencies =
|
27
|
-
p.licenses = ["Apache
|
27
|
+
p.runtime_dependencies = ["multi_json ~>1"]
|
28
|
+
p.licenses = ["Apache-2.0"]
|
28
29
|
end
|
29
30
|
|
30
31
|
t = Rake::TestTask.new(:interop)
|
@@ -38,17 +39,13 @@ task :generate_interop do
|
|
38
39
|
|
39
40
|
schema = Avro::Schema.parse(File.read(SCHEMAS + '/interop.avsc'))
|
40
41
|
r = RandomData.new(schema, ENV['SEED'])
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
end
|
49
|
-
|
50
|
-
Avro::DataFile.open(BUILD + '/interop/data/ruby_deflate.avro', 'w', schema.to_s, :deflate) do |writer|
|
51
|
-
20.times { writer << r.next }
|
42
|
+
Avro::DataFile.codecs.each do |name, codec|
|
43
|
+
next unless codec
|
44
|
+
filename = name == 'null' ? 'ruby.avro' : "ruby_#{name}.avro"
|
45
|
+
path = File.join(BUILD, 'interop/data', filename)
|
46
|
+
Avro::DataFile.open(path, 'w', schema.to_s, name) do |writer|
|
47
|
+
writer << r.next
|
48
|
+
end
|
52
49
|
end
|
53
50
|
end
|
54
51
|
|
data/avro.gemspec
CHANGED
@@ -1,35 +1,35 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
|
-
# stub: avro 1.
|
2
|
+
# stub: avro 1.10.1 ruby lib
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "avro".freeze
|
6
|
-
s.version = "1.
|
6
|
+
s.version = "1.10.1"
|
7
7
|
|
8
8
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2".freeze) if s.respond_to? :required_rubygems_version=
|
9
9
|
s.require_paths = ["lib".freeze]
|
10
10
|
s.authors = ["Apache Software Foundation".freeze]
|
11
|
-
s.date = "
|
11
|
+
s.date = "2020-11-18"
|
12
12
|
s.description = "Avro is a data serialization and RPC format".freeze
|
13
13
|
s.email = "dev@avro.apache.org".freeze
|
14
|
-
s.extra_rdoc_files = ["CHANGELOG".freeze, "LICENSE".freeze, "lib/avro.rb".freeze, "lib/avro/data_file.rb".freeze, "lib/avro/io.rb".freeze, "lib/avro/ipc.rb".freeze, "lib/avro/logical_types.rb".freeze, "lib/avro/protocol.rb".freeze, "lib/avro/schema.rb".freeze, "lib/avro/schema_compatibility.rb".freeze, "lib/avro/schema_normalization.rb".freeze, "lib/avro/schema_validator.rb".freeze]
|
15
|
-
s.files = ["CHANGELOG".freeze, "LICENSE".freeze, "Manifest".freeze, "NOTICE".freeze, "Rakefile".freeze, "avro.gemspec".freeze, "interop/test_interop.rb".freeze, "lib/avro.rb".freeze, "lib/avro/data_file.rb".freeze, "lib/avro/io.rb".freeze, "lib/avro/ipc.rb".freeze, "lib/avro/logical_types.rb".freeze, "lib/avro/protocol.rb".freeze, "lib/avro/schema.rb".freeze, "lib/avro/schema_compatibility.rb".freeze, "lib/avro/schema_normalization.rb".freeze, "lib/avro/schema_validator.rb".freeze, "test/case_finder.rb".freeze, "test/random_data.rb".freeze, "test/sample_ipc_client.rb".freeze, "test/sample_ipc_http_client.rb".freeze, "test/sample_ipc_http_server.rb".freeze, "test/sample_ipc_server.rb".freeze, "test/test_datafile.rb".freeze, "test/test_fingerprints.rb".freeze, "test/test_help.rb".freeze, "test/test_io.rb".freeze, "test/test_logical_types.rb".freeze, "test/test_protocol.rb".freeze, "test/test_schema.rb".freeze, "test/test_schema_compatibility.rb".freeze, "test/test_schema_normalization.rb".freeze, "test/test_schema_validator.rb".freeze, "test/test_socket_transport.rb".freeze, "test/tool.rb".freeze]
|
14
|
+
s.extra_rdoc_files = ["CHANGELOG".freeze, "LICENSE".freeze, "lib/avro.rb".freeze, "lib/avro/VERSION.txt".freeze, "lib/avro/data_file.rb".freeze, "lib/avro/io.rb".freeze, "lib/avro/ipc.rb".freeze, "lib/avro/logical_types.rb".freeze, "lib/avro/protocol.rb".freeze, "lib/avro/schema.rb".freeze, "lib/avro/schema_compatibility.rb".freeze, "lib/avro/schema_normalization.rb".freeze, "lib/avro/schema_validator.rb".freeze]
|
15
|
+
s.files = ["CHANGELOG".freeze, "LICENSE".freeze, "Manifest".freeze, "NOTICE".freeze, "Rakefile".freeze, "avro.gemspec".freeze, "interop/test_interop.rb".freeze, "lib/avro.rb".freeze, "lib/avro/VERSION.txt".freeze, "lib/avro/data_file.rb".freeze, "lib/avro/io.rb".freeze, "lib/avro/ipc.rb".freeze, "lib/avro/logical_types.rb".freeze, "lib/avro/protocol.rb".freeze, "lib/avro/schema.rb".freeze, "lib/avro/schema_compatibility.rb".freeze, "lib/avro/schema_normalization.rb".freeze, "lib/avro/schema_validator.rb".freeze, "test/case_finder.rb".freeze, "test/random_data.rb".freeze, "test/sample_ipc_client.rb".freeze, "test/sample_ipc_http_client.rb".freeze, "test/sample_ipc_http_server.rb".freeze, "test/sample_ipc_server.rb".freeze, "test/test_datafile.rb".freeze, "test/test_fingerprints.rb".freeze, "test/test_help.rb".freeze, "test/test_io.rb".freeze, "test/test_logical_types.rb".freeze, "test/test_protocol.rb".freeze, "test/test_schema.rb".freeze, "test/test_schema_compatibility.rb".freeze, "test/test_schema_normalization.rb".freeze, "test/test_schema_validator.rb".freeze, "test/test_socket_transport.rb".freeze, "test/tool.rb".freeze]
|
16
16
|
s.homepage = "https://avro.apache.org/".freeze
|
17
|
-
s.licenses = ["Apache
|
17
|
+
s.licenses = ["Apache-2.0".freeze]
|
18
18
|
s.rdoc_options = ["--line-numbers".freeze, "--title".freeze, "Avro".freeze]
|
19
19
|
s.rubyforge_project = "avro".freeze
|
20
20
|
s.rubygems_version = "2.5.2.1".freeze
|
21
21
|
s.summary = "Apache Avro for Ruby".freeze
|
22
|
-
s.test_files = ["test/
|
22
|
+
s.test_files = ["test/test_schema.rb".freeze, "test/test_socket_transport.rb".freeze, "test/test_io.rb".freeze, "test/test_logical_types.rb".freeze, "test/test_help.rb".freeze, "test/test_datafile.rb".freeze, "test/test_protocol.rb".freeze, "test/test_schema_validator.rb".freeze, "test/test_schema_compatibility.rb".freeze, "test/test_schema_normalization.rb".freeze, "test/test_fingerprints.rb".freeze]
|
23
23
|
|
24
24
|
if s.respond_to? :specification_version then
|
25
25
|
s.specification_version = 4
|
26
26
|
|
27
27
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
28
|
-
s.add_runtime_dependency(%q<multi_json>.freeze, ["
|
28
|
+
s.add_runtime_dependency(%q<multi_json>.freeze, ["~> 1"])
|
29
29
|
else
|
30
|
-
s.add_dependency(%q<multi_json>.freeze, ["
|
30
|
+
s.add_dependency(%q<multi_json>.freeze, ["~> 1"])
|
31
31
|
end
|
32
32
|
else
|
33
|
-
s.add_dependency(%q<multi_json>.freeze, ["
|
33
|
+
s.add_dependency(%q<multi_json>.freeze, ["~> 1"])
|
34
34
|
end
|
35
35
|
end
|
data/interop/test_interop.rb
CHANGED
@@ -19,12 +19,22 @@ require 'rubygems'
|
|
19
19
|
require 'test/unit'
|
20
20
|
require 'avro'
|
21
21
|
|
22
|
+
CODECS_TO_VALIDATE = ['deflate', 'snappy', 'zstandard'] # The 'null' codec is implicitly included
|
23
|
+
|
22
24
|
class TestInterop < Test::Unit::TestCase
|
23
25
|
HERE = File.expand_path(File.dirname(__FILE__))
|
24
26
|
SHARE = HERE + '/../../../share'
|
25
27
|
SCHEMAS = SHARE + '/test/schemas'
|
26
|
-
|
27
|
-
|
28
|
+
|
29
|
+
files = Dir[HERE + '/../../../build/interop/data/*.avro'].select do |fn|
|
30
|
+
sep, codec = File.basename(fn, '.avro').rpartition('_')[1, 2]
|
31
|
+
sep.empty? || CODECS_TO_VALIDATE.include?(codec)
|
32
|
+
end
|
33
|
+
puts "The following files will be tested:"
|
34
|
+
puts files
|
35
|
+
|
36
|
+
files.each do |fn|
|
37
|
+
define_method("test_read_#{File.basename(fn, '.avro')}") do
|
28
38
|
projection = Avro::Schema.parse(File.read(SCHEMAS+'/interop.avsc'))
|
29
39
|
|
30
40
|
File.open(fn) do |f|
|
data/lib/avro.rb
CHANGED
@@ -22,24 +22,36 @@ require 'stringio'
|
|
22
22
|
require 'zlib'
|
23
23
|
|
24
24
|
module Avro
|
25
|
-
VERSION = "
|
25
|
+
VERSION = File.read("#{__dir__}/avro/VERSION.txt").freeze
|
26
26
|
|
27
27
|
class AvroError < StandardError; end
|
28
28
|
|
29
29
|
class AvroTypeError < Avro::AvroError
|
30
30
|
def initialize(schm=nil, datum=nil, msg=nil)
|
31
|
-
msg ||= "Not a #{schm
|
31
|
+
msg ||= "Not a #{schm}: #{datum}"
|
32
32
|
super(msg)
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
36
36
|
class << self
|
37
|
+
attr_writer :disable_enum_symbol_validation
|
37
38
|
attr_writer :disable_field_default_validation
|
39
|
+
attr_writer :disable_schema_name_validation
|
40
|
+
|
41
|
+
def disable_enum_symbol_validation
|
42
|
+
@disable_enum_symbol_validation ||=
|
43
|
+
ENV.fetch('AVRO_DISABLE_ENUM_SYMBOL_VALIDATION', '') != ''
|
44
|
+
end
|
38
45
|
|
39
46
|
def disable_field_default_validation
|
40
47
|
@disable_field_default_validation ||=
|
41
48
|
ENV.fetch('AVRO_DISABLE_FIELD_DEFAULT_VALIDATION', '') != ''
|
42
49
|
end
|
50
|
+
|
51
|
+
def disable_schema_name_validation
|
52
|
+
@disable_schema_name_validation ||=
|
53
|
+
ENV.fetch('AVRO_DISABLE_SCHEMA_NAME_VALIDATION', '') != ''
|
54
|
+
end
|
43
55
|
end
|
44
56
|
end
|
45
57
|
|
@@ -0,0 +1 @@
|
|
1
|
+
1.10.1
|
data/lib/avro/data_file.rb
CHANGED
@@ -372,9 +372,32 @@ module Avro
|
|
372
372
|
end
|
373
373
|
end
|
374
374
|
|
375
|
+
class ZstandardCodec
|
376
|
+
def codec_name; 'zstandard'; end
|
377
|
+
|
378
|
+
def decompress(data)
|
379
|
+
load_zstandard!
|
380
|
+
Zstd.decompress(data)
|
381
|
+
end
|
382
|
+
|
383
|
+
def compress(data)
|
384
|
+
load_zstandard!
|
385
|
+
Zstd.compress(data)
|
386
|
+
end
|
387
|
+
|
388
|
+
private
|
389
|
+
|
390
|
+
def load_zstandard!
|
391
|
+
require 'zstd-ruby' unless defined?(Zstd)
|
392
|
+
rescue LoadError
|
393
|
+
raise LoadError, "Zstandard compression is not available, please install the `zstd-ruby` gem."
|
394
|
+
end
|
395
|
+
end
|
396
|
+
|
375
397
|
DataFile.register_codec NullCodec
|
376
398
|
DataFile.register_codec DeflateCodec
|
377
399
|
DataFile.register_codec SnappyCodec
|
400
|
+
DataFile.register_codec ZstandardCodec
|
378
401
|
|
379
402
|
# TODO this constant won't be updated if you register another codec.
|
380
403
|
# Deprecated in favor of Avro::DataFile::codecs
|
data/lib/avro/io.rb
CHANGED
@@ -172,7 +172,7 @@ module Avro
|
|
172
172
|
end
|
173
173
|
|
174
174
|
# null is written as zero bytes
|
175
|
-
def write_null(
|
175
|
+
def write_null(_datum)
|
176
176
|
nil
|
177
177
|
end
|
178
178
|
|
@@ -292,7 +292,7 @@ module Avro
|
|
292
292
|
readers_schema.type_adapter.decode(datum)
|
293
293
|
end
|
294
294
|
|
295
|
-
def read_fixed(writers_schema,
|
295
|
+
def read_fixed(writers_schema, _readers_schema, decoder)
|
296
296
|
decoder.read(writers_schema.size)
|
297
297
|
end
|
298
298
|
|
@@ -300,12 +300,12 @@ module Avro
|
|
300
300
|
index_of_symbol = decoder.read_int
|
301
301
|
read_symbol = writers_schema.symbols[index_of_symbol]
|
302
302
|
|
303
|
-
|
304
|
-
|
305
|
-
unless readers_schema.symbols.include?(read_symbol)
|
306
|
-
# 'unset' here
|
303
|
+
if !readers_schema.symbols.include?(read_symbol) && readers_schema.default
|
304
|
+
read_symbol = readers_schema.default
|
307
305
|
end
|
308
306
|
|
307
|
+
# This implementation deviates from the spec by always returning
|
308
|
+
# a symbol.
|
309
309
|
read_symbol
|
310
310
|
end
|
311
311
|
|
@@ -359,26 +359,28 @@ module Avro
|
|
359
359
|
readers_fields_hash = readers_schema.fields_hash
|
360
360
|
read_record = {}
|
361
361
|
writers_schema.fields.each do |field|
|
362
|
-
|
362
|
+
readers_field = readers_fields_hash[field.name]
|
363
|
+
if readers_field
|
363
364
|
field_val = read_data(field.type, readers_field.type, decoder)
|
364
365
|
read_record[field.name] = field_val
|
366
|
+
elsif readers_schema.fields_by_alias.key?(field.name)
|
367
|
+
readers_field = readers_schema.fields_by_alias[field.name]
|
368
|
+
field_val = read_data(field.type, readers_field.type, decoder)
|
369
|
+
read_record[readers_field.name] = field_val
|
365
370
|
else
|
366
371
|
skip_data(field.type, decoder)
|
367
372
|
end
|
368
373
|
end
|
369
374
|
|
370
375
|
# fill in the default values
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
raise AvroError, "Missing data for #{field.type} with no default"
|
380
|
-
end
|
381
|
-
end
|
376
|
+
readers_fields_hash.each do |field_name, field|
|
377
|
+
next if read_record.key?(field_name)
|
378
|
+
|
379
|
+
if field.default?
|
380
|
+
field_val = read_default_value(field.type, field.default)
|
381
|
+
read_record[field.name] = field_val
|
382
|
+
else
|
383
|
+
raise AvroError, "Missing data for #{field.type} with no default"
|
382
384
|
end
|
383
385
|
end
|
384
386
|
|
@@ -468,7 +470,7 @@ module Avro
|
|
468
470
|
decoder.skip(writers_schema.size)
|
469
471
|
end
|
470
472
|
|
471
|
-
def skip_enum(
|
473
|
+
def skip_enum(_writers_schema, decoder)
|
472
474
|
decoder.skip_int
|
473
475
|
end
|
474
476
|
|
@@ -545,7 +547,7 @@ module Avro
|
|
545
547
|
end
|
546
548
|
end
|
547
549
|
|
548
|
-
def write_fixed(
|
550
|
+
def write_fixed(_writers_schema, datum, encoder)
|
549
551
|
encoder.write(datum)
|
550
552
|
end
|
551
553
|
|
@@ -591,7 +593,7 @@ module Avro
|
|
591
593
|
def write_record(writers_schema, datum, encoder)
|
592
594
|
raise AvroTypeError.new(writers_schema, datum) unless datum.is_a?(Hash)
|
593
595
|
writers_schema.fields.each do |field|
|
594
|
-
write_data(field.type, datum[field.name], encoder)
|
596
|
+
write_data(field.type, datum.key?(field.name) ? datum[field.name] : datum[field.name.to_sym], encoder)
|
595
597
|
end
|
596
598
|
end
|
597
599
|
end # DatumWriter
|
data/lib/avro/ipc.rb
CHANGED
@@ -278,7 +278,7 @@ module Avro::IPC
|
|
278
278
|
response = call(local_message, request)
|
279
279
|
rescue AvroRemoteError => e
|
280
280
|
error = e
|
281
|
-
rescue Exception => e
|
281
|
+
rescue Exception => e # rubocop:disable Lint/RescueException
|
282
282
|
error = AvroRemoteError.new(e.to_s)
|
283
283
|
end
|
284
284
|
|
@@ -350,7 +350,7 @@ module Avro::IPC
|
|
350
350
|
remote_protocol
|
351
351
|
end
|
352
352
|
|
353
|
-
def call(
|
353
|
+
def call(_local_message, _request)
|
354
354
|
# Actual work done by server: cf. handler in thrift.
|
355
355
|
raise NotImplementedError
|
356
356
|
end
|
data/lib/avro/schema.rb
CHANGED
@@ -29,6 +29,8 @@ module Avro
|
|
29
29
|
NAMED_TYPES_SYM = Set.new(NAMED_TYPES.map(&:to_sym))
|
30
30
|
VALID_TYPES_SYM = Set.new(VALID_TYPES.map(&:to_sym))
|
31
31
|
|
32
|
+
NAME_REGEX = /^([A-Za-z_][A-Za-z0-9_]*)(\.([A-Za-z_][A-Za-z0-9_]*))*$/
|
33
|
+
|
32
34
|
INT_MIN_VALUE = -(1 << 31)
|
33
35
|
INT_MAX_VALUE = (1 << 31) - 1
|
34
36
|
LONG_MIN_VALUE = -(1 << 63)
|
@@ -53,23 +55,34 @@ module Avro
|
|
53
55
|
|
54
56
|
type_sym = type.to_sym
|
55
57
|
if PRIMITIVE_TYPES_SYM.include?(type_sym)
|
56
|
-
|
57
|
-
|
58
|
+
case type_sym
|
59
|
+
when :bytes
|
60
|
+
precision = json_obj['precision']
|
61
|
+
scale = json_obj['scale']
|
62
|
+
return BytesSchema.new(type_sym, logical_type, precision, scale)
|
63
|
+
else
|
64
|
+
return PrimitiveSchema.new(type_sym, logical_type)
|
65
|
+
end
|
58
66
|
elsif NAMED_TYPES_SYM.include? type_sym
|
59
67
|
name = json_obj['name']
|
68
|
+
if !Avro.disable_schema_name_validation && name !~ NAME_REGEX
|
69
|
+
raise SchemaParseError, "Name #{name} is invalid for type #{type}!"
|
70
|
+
end
|
60
71
|
namespace = json_obj.include?('namespace') ? json_obj['namespace'] : default_namespace
|
72
|
+
aliases = json_obj['aliases']
|
61
73
|
case type_sym
|
62
74
|
when :fixed
|
63
75
|
size = json_obj['size']
|
64
|
-
return FixedSchema.new(name, namespace, size, names, logical_type)
|
76
|
+
return FixedSchema.new(name, namespace, size, names, logical_type, aliases)
|
65
77
|
when :enum
|
66
78
|
symbols = json_obj['symbols']
|
67
79
|
doc = json_obj['doc']
|
68
|
-
|
80
|
+
default = json_obj['default']
|
81
|
+
return EnumSchema.new(name, namespace, symbols, names, doc, default, aliases)
|
69
82
|
when :record, :error
|
70
83
|
fields = json_obj['fields']
|
71
84
|
doc = json_obj['doc']
|
72
|
-
return RecordSchema.new(name, namespace, fields, names, type_sym, doc)
|
85
|
+
return RecordSchema.new(name, namespace, fields, names, type_sym, doc, aliases)
|
73
86
|
else
|
74
87
|
raise SchemaParseError.new("Unknown named type: #{type}")
|
75
88
|
end
|
@@ -131,6 +144,49 @@ module Avro
|
|
131
144
|
Digest::SHA256.hexdigest(parsing_form).to_i(16)
|
132
145
|
end
|
133
146
|
|
147
|
+
CRC_EMPTY = 0xc15d213aa4d7a795
|
148
|
+
|
149
|
+
# The java library caches this value after initialized, so this pattern
|
150
|
+
# mimics that.
|
151
|
+
@@fp_table = nil
|
152
|
+
def initFPTable
|
153
|
+
@@fp_table = Array.new(256)
|
154
|
+
256.times do |i|
|
155
|
+
fp = i
|
156
|
+
8.times do
|
157
|
+
fp = (fp >> 1) ^ ( CRC_EMPTY & -( fp & 1 ) )
|
158
|
+
end
|
159
|
+
@@fp_table[i] = fp
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def crc_64_avro_fingerprint
|
164
|
+
parsing_form = Avro::SchemaNormalization.to_parsing_form(self)
|
165
|
+
data_bytes = parsing_form.unpack("C*")
|
166
|
+
|
167
|
+
initFPTable unless @@fp_table
|
168
|
+
|
169
|
+
fp = CRC_EMPTY
|
170
|
+
data_bytes.each do |b|
|
171
|
+
fp = (fp >> 8) ^ @@fp_table[ (fp ^ b) & 0xff ]
|
172
|
+
end
|
173
|
+
fp
|
174
|
+
end
|
175
|
+
|
176
|
+
SINGLE_OBJECT_MAGIC_NUMBER = [0xC3, 0x01]
|
177
|
+
def single_object_encoding_header
|
178
|
+
[SINGLE_OBJECT_MAGIC_NUMBER, single_object_schema_fingerprint].flatten
|
179
|
+
end
|
180
|
+
def single_object_schema_fingerprint
|
181
|
+
working = crc_64_avro_fingerprint
|
182
|
+
bytes = Array.new(8)
|
183
|
+
8.times do |i|
|
184
|
+
bytes[i] = (working & 0xff)
|
185
|
+
working = working >> 8
|
186
|
+
end
|
187
|
+
bytes
|
188
|
+
end
|
189
|
+
|
134
190
|
def read?(writers_schema)
|
135
191
|
SchemaCompatibility.can_read?(writers_schema, self)
|
136
192
|
end
|
@@ -143,11 +199,11 @@ module Avro
|
|
143
199
|
SchemaCompatibility.mutual_read?(other_schema, self)
|
144
200
|
end
|
145
201
|
|
146
|
-
def ==(other,
|
202
|
+
def ==(other, _seen=nil)
|
147
203
|
other.is_a?(Schema) && type_sym == other.type_sym
|
148
204
|
end
|
149
205
|
|
150
|
-
def hash(
|
206
|
+
def hash(_seen=nil)
|
151
207
|
type_sym.hash
|
152
208
|
end
|
153
209
|
|
@@ -165,7 +221,7 @@ module Avro
|
|
165
221
|
end
|
166
222
|
end
|
167
223
|
|
168
|
-
def to_avro(
|
224
|
+
def to_avro(_names=nil)
|
169
225
|
props = {'type' => type}
|
170
226
|
props['logicalType'] = logical_type if logical_type
|
171
227
|
props
|
@@ -175,14 +231,26 @@ module Avro
|
|
175
231
|
MultiJson.dump to_avro
|
176
232
|
end
|
177
233
|
|
234
|
+
def validate_aliases!
|
235
|
+
unless aliases.nil? ||
|
236
|
+
(aliases.is_a?(Array) && aliases.all? { |a| a.is_a?(String) })
|
237
|
+
|
238
|
+
raise Avro::SchemaParseError,
|
239
|
+
"Invalid aliases value #{aliases.inspect} for #{type} #{name}. Must be an array of strings."
|
240
|
+
end
|
241
|
+
end
|
242
|
+
private :validate_aliases!
|
243
|
+
|
178
244
|
class NamedSchema < Schema
|
179
|
-
attr_reader :name, :namespace
|
245
|
+
attr_reader :name, :namespace, :aliases
|
180
246
|
|
181
|
-
def initialize(type, name, namespace=nil, names=nil, doc=nil, logical_type=nil)
|
247
|
+
def initialize(type, name, namespace=nil, names=nil, doc=nil, logical_type=nil, aliases=nil)
|
182
248
|
super(type, logical_type)
|
183
249
|
@name, @namespace = Name.extract_namespace(name, namespace)
|
184
|
-
@doc
|
185
|
-
|
250
|
+
@doc = doc
|
251
|
+
@aliases = aliases
|
252
|
+
validate_aliases! if aliases
|
253
|
+
Name.add_name(names, self)
|
186
254
|
end
|
187
255
|
|
188
256
|
def to_avro(names=Set.new)
|
@@ -192,33 +260,53 @@ module Avro
|
|
192
260
|
end
|
193
261
|
props = {'name' => @name}
|
194
262
|
props.merge!('namespace' => @namespace) if @namespace
|
195
|
-
props
|
263
|
+
props['namespace'] = @namespace if @namespace
|
264
|
+
props['doc'] = @doc if @doc
|
265
|
+
props['aliases'] = aliases if aliases && aliases.any?
|
196
266
|
super.merge props
|
197
267
|
end
|
198
268
|
|
199
269
|
def fullname
|
200
270
|
@fullname ||= Name.make_fullname(@name, @namespace)
|
201
271
|
end
|
272
|
+
|
273
|
+
def fullname_aliases
|
274
|
+
@fullname_aliases ||= if aliases
|
275
|
+
aliases.map { |a| Name.make_fullname(a, namespace) }
|
276
|
+
else
|
277
|
+
[]
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
def match_fullname?(name)
|
282
|
+
name == fullname || fullname_aliases.include?(name)
|
283
|
+
end
|
202
284
|
end
|
203
285
|
|
204
286
|
class RecordSchema < NamedSchema
|
205
287
|
attr_reader :fields, :doc
|
206
288
|
|
207
289
|
def self.make_field_objects(field_data, names, namespace=nil)
|
208
|
-
field_objects, field_names = [], Set.new
|
209
|
-
field_data.
|
290
|
+
field_objects, field_names, alias_names = [], Set.new, Set.new
|
291
|
+
field_data.each do |field|
|
210
292
|
if field.respond_to?(:[]) # TODO(jmhodges) wtffffff
|
211
293
|
type = field['type']
|
212
294
|
name = field['name']
|
213
295
|
default = field.key?('default') ? field['default'] : :no_default
|
214
296
|
order = field['order']
|
215
297
|
doc = field['doc']
|
216
|
-
|
298
|
+
aliases = field['aliases']
|
299
|
+
new_field = Field.new(type, name, default, order, names, namespace, doc, aliases)
|
217
300
|
# make sure field name has not been used yet
|
218
301
|
if field_names.include?(new_field.name)
|
219
302
|
raise SchemaParseError, "Field name #{new_field.name.inspect} is already in use"
|
220
303
|
end
|
221
304
|
field_names << new_field.name
|
305
|
+
# make sure alias has not be been used yet
|
306
|
+
if new_field.aliases && alias_names.intersect?(new_field.aliases.to_set)
|
307
|
+
raise SchemaParseError, "Alias #{(alias_names & new_field.aliases).to_a} already in use"
|
308
|
+
end
|
309
|
+
alias_names.merge(new_field.aliases) if new_field.aliases
|
222
310
|
else
|
223
311
|
raise SchemaParseError, "Not a valid field: #{field}"
|
224
312
|
end
|
@@ -227,14 +315,14 @@ module Avro
|
|
227
315
|
field_objects
|
228
316
|
end
|
229
317
|
|
230
|
-
def initialize(name, namespace, fields, names=nil, schema_type=:record, doc=nil)
|
318
|
+
def initialize(name, namespace, fields, names=nil, schema_type=:record, doc=nil, aliases=nil)
|
231
319
|
if schema_type == :request || schema_type == 'request'
|
232
320
|
@type_sym = schema_type.to_sym
|
233
321
|
@namespace = namespace
|
234
322
|
@name = nil
|
235
323
|
@doc = nil
|
236
324
|
else
|
237
|
-
super(schema_type, name, namespace, names, doc)
|
325
|
+
super(schema_type, name, namespace, names, doc, nil, aliases)
|
238
326
|
end
|
239
327
|
@fields = if fields
|
240
328
|
RecordSchema.make_field_objects(fields, names, self.namespace)
|
@@ -247,6 +335,16 @@ module Avro
|
|
247
335
|
@fields_hash ||= fields.inject({}){|hsh, field| hsh[field.name] = field; hsh }
|
248
336
|
end
|
249
337
|
|
338
|
+
def fields_by_alias
|
339
|
+
@fields_by_alias ||= fields.each_with_object({}) do |field, hash|
|
340
|
+
if field.aliases
|
341
|
+
field.aliases.each do |a|
|
342
|
+
hash[a] = field
|
343
|
+
end
|
344
|
+
end
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
250
348
|
def to_avro(names=Set.new)
|
251
349
|
hsh = super
|
252
350
|
return hsh unless hsh.is_a?(Hash)
|
@@ -313,20 +411,41 @@ module Avro
|
|
313
411
|
end
|
314
412
|
|
315
413
|
class EnumSchema < NamedSchema
|
316
|
-
|
414
|
+
SYMBOL_REGEX = /^[A-Za-z_][A-Za-z0-9_]*$/
|
415
|
+
|
416
|
+
attr_reader :symbols, :doc, :default
|
317
417
|
|
318
|
-
def initialize(name, space, symbols, names=nil, doc=nil)
|
418
|
+
def initialize(name, space, symbols, names=nil, doc=nil, default=nil, aliases=nil)
|
319
419
|
if symbols.uniq.length < symbols.length
|
320
420
|
fail_msg = "Duplicate symbol: #{symbols}"
|
321
421
|
raise Avro::SchemaParseError, fail_msg
|
322
422
|
end
|
323
|
-
|
423
|
+
|
424
|
+
if !Avro.disable_enum_symbol_validation
|
425
|
+
invalid_symbols = symbols.select { |symbol| symbol !~ SYMBOL_REGEX }
|
426
|
+
|
427
|
+
if invalid_symbols.any?
|
428
|
+
raise SchemaParseError,
|
429
|
+
"Invalid symbols for #{name}: #{invalid_symbols.join(', ')} don't match #{SYMBOL_REGEX.inspect}"
|
430
|
+
end
|
431
|
+
end
|
432
|
+
|
433
|
+
if default && !symbols.include?(default)
|
434
|
+
raise Avro::SchemaParseError, "Default '#{default}' is not a valid symbol for enum #{name}"
|
435
|
+
end
|
436
|
+
|
437
|
+
super(:enum, name, space, names, doc, nil, aliases)
|
438
|
+
@default = default
|
324
439
|
@symbols = symbols
|
325
440
|
end
|
326
441
|
|
327
|
-
def to_avro(
|
442
|
+
def to_avro(_names=Set.new)
|
328
443
|
avro = super
|
329
|
-
avro.is_a?(Hash)
|
444
|
+
if avro.is_a?(Hash)
|
445
|
+
avro['symbols'] = symbols
|
446
|
+
avro['default'] = default if default
|
447
|
+
end
|
448
|
+
avro
|
330
449
|
end
|
331
450
|
end
|
332
451
|
|
@@ -348,14 +467,32 @@ module Avro
|
|
348
467
|
end
|
349
468
|
end
|
350
469
|
|
470
|
+
class BytesSchema < PrimitiveSchema
|
471
|
+
attr_reader :precision, :scale
|
472
|
+
def initialize(type, logical_type=nil, precision=nil, scale=nil)
|
473
|
+
super(type.to_sym, logical_type)
|
474
|
+
@precision = precision
|
475
|
+
@scale = scale
|
476
|
+
end
|
477
|
+
|
478
|
+
def to_avro(names=nil)
|
479
|
+
avro = super
|
480
|
+
return avro if avro.is_a?(String)
|
481
|
+
|
482
|
+
avro['precision'] = precision if precision
|
483
|
+
avro['scale'] = scale if scale
|
484
|
+
avro
|
485
|
+
end
|
486
|
+
end
|
487
|
+
|
351
488
|
class FixedSchema < NamedSchema
|
352
489
|
attr_reader :size
|
353
|
-
def initialize(name, space, size, names=nil, logical_type=nil)
|
490
|
+
def initialize(name, space, size, names=nil, logical_type=nil, aliases=nil)
|
354
491
|
# Ensure valid cto args
|
355
492
|
unless size.is_a?(Integer)
|
356
493
|
raise AvroError, 'Fixed Schema requires a valid integer for size property.'
|
357
494
|
end
|
358
|
-
super(:fixed, name, space, names, nil, logical_type)
|
495
|
+
super(:fixed, name, space, names, nil, logical_type, aliases)
|
359
496
|
@size = size
|
360
497
|
end
|
361
498
|
|
@@ -366,14 +503,16 @@ module Avro
|
|
366
503
|
end
|
367
504
|
|
368
505
|
class Field < Schema
|
369
|
-
attr_reader :type, :name, :default, :order, :doc
|
506
|
+
attr_reader :type, :name, :default, :order, :doc, :aliases
|
370
507
|
|
371
|
-
def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil, doc=nil)
|
508
|
+
def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil, doc=nil, aliases=nil)
|
372
509
|
@type = subparse(type, names, namespace)
|
373
510
|
@name = name
|
374
511
|
@default = default
|
375
512
|
@order = order
|
376
513
|
@doc = doc
|
514
|
+
@aliases = aliases
|
515
|
+
validate_aliases! if aliases
|
377
516
|
validate_default! if default? && !Avro.disable_field_default_validation
|
378
517
|
end
|
379
518
|
|
@@ -389,6 +528,10 @@ module Avro
|
|
389
528
|
end
|
390
529
|
end
|
391
530
|
|
531
|
+
def alias_names
|
532
|
+
@alias_names ||= Array(aliases)
|
533
|
+
end
|
534
|
+
|
392
535
|
private
|
393
536
|
|
394
537
|
def validate_default!
|