avro 1.8.1 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +2 -2
- data/Manifest +6 -0
- data/NOTICE +1 -1
- data/Rakefile +11 -15
- data/avro.gemspec +21 -21
- data/interop/test_interop.rb +13 -3
- data/lib/avro.rb +25 -2
- data/lib/avro/data_file.rb +43 -3
- data/lib/avro/io.rb +66 -80
- data/lib/avro/ipc.rb +11 -11
- data/lib/avro/logical_types.rb +90 -0
- data/lib/avro/protocol.rb +12 -8
- data/lib/avro/schema.rb +244 -75
- data/lib/avro/schema_compatibility.rb +175 -0
- data/lib/avro/schema_normalization.rb +1 -1
- data/lib/avro/schema_validator.rb +242 -0
- data/test/case_finder.rb +9 -4
- data/test/random_data.rb +24 -4
- data/test/sample_ipc_client.rb +1 -1
- data/test/sample_ipc_http_client.rb +1 -1
- data/test/sample_ipc_http_server.rb +1 -1
- data/test/sample_ipc_server.rb +1 -1
- data/test/test_datafile.rb +17 -4
- data/test/test_fingerprints.rb +20 -1
- data/test/test_help.rb +1 -1
- data/test/test_io.rb +187 -7
- data/test/test_logical_types.rb +128 -0
- data/test/test_protocol.rb +37 -4
- data/test/test_schema.rb +609 -28
- data/test/test_schema_compatibility.rb +543 -0
- data/test/test_schema_normalization.rb +2 -1
- data/test/test_schema_validator.rb +554 -0
- data/test/test_socket_transport.rb +1 -1
- data/test/tool.rb +4 -5
- metadata +25 -13
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: eab84afb7b30d99dee71a7e360dba2a36957500b
|
|
4
|
+
data.tar.gz: 8211aea9ad1e00a4f94bb3edc2aeaf9ab06edffa
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c51889575e3cd689f7288bdd7a0d8250a8147a8eaf568805f726991f9f115c28f8ca9fa396262c687021859cbd9a26c5f196b7f73a174a2da9dd7f22764fe8d8
|
|
7
|
+
data.tar.gz: 0d936920e8b8ea8edd70583164bd519ac46108de4ab9f7d359d6edb7e8d1941eb3bad06ed8462cdc6bac2d3152c16faba49833d115cc6240e26d3cf51f746c08
|
data/LICENSE
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
|
|
2
2
|
Apache License
|
|
3
3
|
Version 2.0, January 2004
|
|
4
|
-
|
|
4
|
+
https://www.apache.org/licenses/
|
|
5
5
|
|
|
6
6
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
7
7
|
|
|
@@ -193,7 +193,7 @@
|
|
|
193
193
|
you may not use this file except in compliance with the License.
|
|
194
194
|
You may obtain a copy of the License at
|
|
195
195
|
|
|
196
|
-
|
|
196
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
|
197
197
|
|
|
198
198
|
Unless required by applicable law or agreed to in writing, software
|
|
199
199
|
distributed under the License is distributed on an "AS IS" BASIS,
|
data/Manifest
CHANGED
|
@@ -9,9 +9,12 @@ lib/avro.rb
|
|
|
9
9
|
lib/avro/data_file.rb
|
|
10
10
|
lib/avro/io.rb
|
|
11
11
|
lib/avro/ipc.rb
|
|
12
|
+
lib/avro/logical_types.rb
|
|
12
13
|
lib/avro/protocol.rb
|
|
13
14
|
lib/avro/schema.rb
|
|
15
|
+
lib/avro/schema_compatibility.rb
|
|
14
16
|
lib/avro/schema_normalization.rb
|
|
17
|
+
lib/avro/schema_validator.rb
|
|
15
18
|
test/case_finder.rb
|
|
16
19
|
test/random_data.rb
|
|
17
20
|
test/sample_ipc_client.rb
|
|
@@ -22,8 +25,11 @@ test/test_datafile.rb
|
|
|
22
25
|
test/test_fingerprints.rb
|
|
23
26
|
test/test_help.rb
|
|
24
27
|
test/test_io.rb
|
|
28
|
+
test/test_logical_types.rb
|
|
25
29
|
test/test_protocol.rb
|
|
26
30
|
test/test_schema.rb
|
|
31
|
+
test/test_schema_compatibility.rb
|
|
27
32
|
test/test_schema_normalization.rb
|
|
33
|
+
test/test_schema_validator.rb
|
|
28
34
|
test/test_socket_transport.rb
|
|
29
35
|
test/tool.rb
|
data/NOTICE
CHANGED
data/Rakefile
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# "License"); you may not use this file except in compliance
|
|
7
7
|
# with the License. You may obtain a copy of the License at
|
|
8
8
|
#
|
|
9
|
-
#
|
|
9
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
10
10
|
#
|
|
11
11
|
# Unless required by applicable law or agreed to in writing, software
|
|
12
12
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
@@ -22,9 +22,9 @@ Echoe.new('avro', VERSION) do |p|
|
|
|
22
22
|
p.email = "dev@avro.apache.org"
|
|
23
23
|
p.summary = "Apache Avro for Ruby"
|
|
24
24
|
p.description = "Avro is a data serialization and RPC format"
|
|
25
|
-
p.url = "
|
|
26
|
-
p.runtime_dependencies =
|
|
27
|
-
p.licenses = ["Apache
|
|
25
|
+
p.url = "https://avro.apache.org/"
|
|
26
|
+
p.runtime_dependencies = ["multi_json ~>1"]
|
|
27
|
+
p.licenses = ["Apache-2.0"]
|
|
28
28
|
end
|
|
29
29
|
|
|
30
30
|
t = Rake::TestTask.new(:interop)
|
|
@@ -38,17 +38,13 @@ task :generate_interop do
|
|
|
38
38
|
|
|
39
39
|
schema = Avro::Schema.parse(File.read(SCHEMAS + '/interop.avsc'))
|
|
40
40
|
r = RandomData.new(schema, ENV['SEED'])
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
Avro::DataFile.open(BUILD + '/interop/data/ruby_deflate.avro', 'w', schema.to_s, :deflate) do |writer|
|
|
51
|
-
20.times { writer << r.next }
|
|
41
|
+
Avro::DataFile.codecs.each do |name, codec|
|
|
42
|
+
next unless codec
|
|
43
|
+
filename = name == 'null' ? 'ruby.avro' : "ruby_#{name}.avro"
|
|
44
|
+
path = File.join(BUILD, 'interop/data', filename)
|
|
45
|
+
Avro::DataFile.open(path, 'w', schema.to_s, name) do |writer|
|
|
46
|
+
writer << r.next
|
|
47
|
+
end
|
|
52
48
|
end
|
|
53
49
|
end
|
|
54
50
|
|
data/avro.gemspec
CHANGED
|
@@ -1,35 +1,35 @@
|
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
|
2
|
-
# stub: avro 1.
|
|
2
|
+
# stub: avro 1.10.0 ruby lib
|
|
3
3
|
|
|
4
4
|
Gem::Specification.new do |s|
|
|
5
|
-
s.name = "avro"
|
|
6
|
-
s.version = "1.
|
|
5
|
+
s.name = "avro".freeze
|
|
6
|
+
s.version = "1.10.0"
|
|
7
7
|
|
|
8
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
|
9
|
-
s.require_paths = ["lib"]
|
|
10
|
-
s.authors = ["Apache Software Foundation"]
|
|
11
|
-
s.date = "
|
|
12
|
-
s.description = "Avro is a data serialization and RPC format"
|
|
13
|
-
s.email = "dev@avro.apache.org"
|
|
14
|
-
s.extra_rdoc_files = ["CHANGELOG", "LICENSE", "lib/avro.rb", "lib/avro/data_file.rb", "lib/avro/io.rb", "lib/avro/ipc.rb", "lib/avro/protocol.rb", "lib/avro/schema.rb", "lib/avro/schema_normalization.rb"]
|
|
15
|
-
s.files = ["CHANGELOG", "LICENSE", "Manifest", "NOTICE", "Rakefile", "avro.gemspec", "interop/test_interop.rb", "lib/avro.rb", "lib/avro/data_file.rb", "lib/avro/io.rb", "lib/avro/ipc.rb", "lib/avro/protocol.rb", "lib/avro/schema.rb", "lib/avro/schema_normalization.rb", "test/case_finder.rb", "test/random_data.rb", "test/sample_ipc_client.rb", "test/sample_ipc_http_client.rb", "test/sample_ipc_http_server.rb", "test/sample_ipc_server.rb", "test/test_datafile.rb", "test/test_fingerprints.rb", "test/test_help.rb", "test/test_io.rb", "test/test_protocol.rb", "test/test_schema.rb", "test/test_schema_normalization.rb", "test/test_socket_transport.rb", "test/tool.rb"]
|
|
16
|
-
s.homepage = "
|
|
17
|
-
s.licenses = ["Apache
|
|
18
|
-
s.rdoc_options = ["--line-numbers", "--title", "Avro"]
|
|
19
|
-
s.rubyforge_project = "avro"
|
|
20
|
-
s.rubygems_version = "2.2.
|
|
21
|
-
s.summary = "Apache Avro for Ruby"
|
|
22
|
-
s.test_files = ["test/
|
|
8
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2".freeze) if s.respond_to? :required_rubygems_version=
|
|
9
|
+
s.require_paths = ["lib".freeze]
|
|
10
|
+
s.authors = ["Apache Software Foundation".freeze]
|
|
11
|
+
s.date = "2020-06-22"
|
|
12
|
+
s.description = "Avro is a data serialization and RPC format".freeze
|
|
13
|
+
s.email = "dev@avro.apache.org".freeze
|
|
14
|
+
s.extra_rdoc_files = ["CHANGELOG".freeze, "LICENSE".freeze, "lib/avro.rb".freeze, "lib/avro/data_file.rb".freeze, "lib/avro/io.rb".freeze, "lib/avro/ipc.rb".freeze, "lib/avro/logical_types.rb".freeze, "lib/avro/protocol.rb".freeze, "lib/avro/schema.rb".freeze, "lib/avro/schema_compatibility.rb".freeze, "lib/avro/schema_normalization.rb".freeze, "lib/avro/schema_validator.rb".freeze]
|
|
15
|
+
s.files = ["CHANGELOG".freeze, "LICENSE".freeze, "Manifest".freeze, "NOTICE".freeze, "Rakefile".freeze, "avro.gemspec".freeze, "interop/test_interop.rb".freeze, "lib/avro.rb".freeze, "lib/avro/data_file.rb".freeze, "lib/avro/io.rb".freeze, "lib/avro/ipc.rb".freeze, "lib/avro/logical_types.rb".freeze, "lib/avro/protocol.rb".freeze, "lib/avro/schema.rb".freeze, "lib/avro/schema_compatibility.rb".freeze, "lib/avro/schema_normalization.rb".freeze, "lib/avro/schema_validator.rb".freeze, "test/case_finder.rb".freeze, "test/random_data.rb".freeze, "test/sample_ipc_client.rb".freeze, "test/sample_ipc_http_client.rb".freeze, "test/sample_ipc_http_server.rb".freeze, "test/sample_ipc_server.rb".freeze, "test/test_datafile.rb".freeze, "test/test_fingerprints.rb".freeze, "test/test_help.rb".freeze, "test/test_io.rb".freeze, "test/test_logical_types.rb".freeze, "test/test_protocol.rb".freeze, "test/test_schema.rb".freeze, "test/test_schema_compatibility.rb".freeze, "test/test_schema_normalization.rb".freeze, "test/test_schema_validator.rb".freeze, "test/test_socket_transport.rb".freeze, "test/tool.rb".freeze]
|
|
16
|
+
s.homepage = "https://avro.apache.org/".freeze
|
|
17
|
+
s.licenses = ["Apache-2.0".freeze]
|
|
18
|
+
s.rdoc_options = ["--line-numbers".freeze, "--title".freeze, "Avro".freeze]
|
|
19
|
+
s.rubyforge_project = "avro".freeze
|
|
20
|
+
s.rubygems_version = "2.5.2.1".freeze
|
|
21
|
+
s.summary = "Apache Avro for Ruby".freeze
|
|
22
|
+
s.test_files = ["test/test_help.rb".freeze, "test/test_io.rb".freeze, "test/test_socket_transport.rb".freeze, "test/test_fingerprints.rb".freeze, "test/test_schema.rb".freeze, "test/test_schema_compatibility.rb".freeze, "test/test_schema_validator.rb".freeze, "test/test_schema_normalization.rb".freeze, "test/test_protocol.rb".freeze, "test/test_datafile.rb".freeze, "test/test_logical_types.rb".freeze]
|
|
23
23
|
|
|
24
24
|
if s.respond_to? :specification_version then
|
|
25
25
|
s.specification_version = 4
|
|
26
26
|
|
|
27
27
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
|
28
|
-
s.add_runtime_dependency(%q<multi_json
|
|
28
|
+
s.add_runtime_dependency(%q<multi_json>.freeze, ["~> 1"])
|
|
29
29
|
else
|
|
30
|
-
s.add_dependency(%q<multi_json
|
|
30
|
+
s.add_dependency(%q<multi_json>.freeze, ["~> 1"])
|
|
31
31
|
end
|
|
32
32
|
else
|
|
33
|
-
s.add_dependency(%q<multi_json
|
|
33
|
+
s.add_dependency(%q<multi_json>.freeze, ["~> 1"])
|
|
34
34
|
end
|
|
35
35
|
end
|
data/interop/test_interop.rb
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
# "License"); you may not use this file except in compliance
|
|
8
8
|
# with the License. You may obtain a copy of the License at
|
|
9
9
|
#
|
|
10
|
-
#
|
|
10
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
11
11
|
#
|
|
12
12
|
# Unless required by applicable law or agreed to in writing, software
|
|
13
13
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
@@ -19,12 +19,22 @@ require 'rubygems'
|
|
|
19
19
|
require 'test/unit'
|
|
20
20
|
require 'avro'
|
|
21
21
|
|
|
22
|
+
CODECS_TO_VALIDATE = ['deflate', 'snappy', 'zstandard'] # The 'null' codec is implicitly included
|
|
23
|
+
|
|
22
24
|
class TestInterop < Test::Unit::TestCase
|
|
23
25
|
HERE = File.expand_path(File.dirname(__FILE__))
|
|
24
26
|
SHARE = HERE + '/../../../share'
|
|
25
27
|
SCHEMAS = SHARE + '/test/schemas'
|
|
26
|
-
|
|
27
|
-
|
|
28
|
+
|
|
29
|
+
files = Dir[HERE + '/../../../build/interop/data/*.avro'].select do |fn|
|
|
30
|
+
sep, codec = File.basename(fn, '.avro').rpartition('_')[1, 2]
|
|
31
|
+
sep.empty? || CODECS_TO_VALIDATE.include?(codec)
|
|
32
|
+
end
|
|
33
|
+
puts "The following files will be tested:"
|
|
34
|
+
puts files
|
|
35
|
+
|
|
36
|
+
files.each do |fn|
|
|
37
|
+
define_method("test_read_#{File.basename(fn, '.avro')}") do
|
|
28
38
|
projection = Avro::Schema.parse(File.read(SCHEMAS+'/interop.avsc'))
|
|
29
39
|
|
|
30
40
|
File.open(fn) do |f|
|
data/lib/avro.rb
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# "License"); you may not use this file except in compliance
|
|
7
7
|
# with the License. You may obtain a copy of the License at
|
|
8
8
|
#
|
|
9
|
-
#
|
|
9
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
10
10
|
#
|
|
11
11
|
# Unless required by applicable law or agreed to in writing, software
|
|
12
12
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
@@ -28,10 +28,31 @@ module Avro
|
|
|
28
28
|
|
|
29
29
|
class AvroTypeError < Avro::AvroError
|
|
30
30
|
def initialize(schm=nil, datum=nil, msg=nil)
|
|
31
|
-
msg ||= "Not a #{schm
|
|
31
|
+
msg ||= "Not a #{schm}: #{datum}"
|
|
32
32
|
super(msg)
|
|
33
33
|
end
|
|
34
34
|
end
|
|
35
|
+
|
|
36
|
+
class << self
|
|
37
|
+
attr_writer :disable_enum_symbol_validation
|
|
38
|
+
attr_writer :disable_field_default_validation
|
|
39
|
+
attr_writer :disable_schema_name_validation
|
|
40
|
+
|
|
41
|
+
def disable_enum_symbol_validation
|
|
42
|
+
@disable_enum_symbol_validation ||=
|
|
43
|
+
ENV.fetch('AVRO_DISABLE_ENUM_SYMBOL_VALIDATION', '') != ''
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def disable_field_default_validation
|
|
47
|
+
@disable_field_default_validation ||=
|
|
48
|
+
ENV.fetch('AVRO_DISABLE_FIELD_DEFAULT_VALIDATION', '') != ''
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def disable_schema_name_validation
|
|
52
|
+
@disable_schema_name_validation ||=
|
|
53
|
+
ENV.fetch('AVRO_DISABLE_SCHEMA_NAME_VALIDATION', '') != ''
|
|
54
|
+
end
|
|
55
|
+
end
|
|
35
56
|
end
|
|
36
57
|
|
|
37
58
|
require 'avro/schema'
|
|
@@ -40,3 +61,5 @@ require 'avro/data_file'
|
|
|
40
61
|
require 'avro/protocol'
|
|
41
62
|
require 'avro/ipc'
|
|
42
63
|
require 'avro/schema_normalization'
|
|
64
|
+
require 'avro/schema_validator'
|
|
65
|
+
require 'avro/schema_compatibility'
|
data/lib/avro/data_file.rb
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# "License"); you may not use this file except in compliance
|
|
7
7
|
# with the License. You may obtain a copy of the License at
|
|
8
8
|
#
|
|
9
|
-
#
|
|
9
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
10
10
|
#
|
|
11
11
|
# Unless required by applicable law or agreed to in writing, software
|
|
12
12
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
@@ -316,7 +316,7 @@ module Avro
|
|
|
316
316
|
def decompress(compressed)
|
|
317
317
|
# Passing a negative number to Inflate puts it into "raw" RFC1951 mode
|
|
318
318
|
# (without the RFC1950 header & checksum). See the docs for
|
|
319
|
-
# inflateInit2 in
|
|
319
|
+
# inflateInit2 in https://www.zlib.net/manual.html
|
|
320
320
|
zstream = Zlib::Inflate.new(-Zlib::MAX_WBITS)
|
|
321
321
|
data = zstream.inflate(compressed)
|
|
322
322
|
data << zstream.finish
|
|
@@ -338,12 +338,29 @@ module Avro
|
|
|
338
338
|
|
|
339
339
|
def decompress(data)
|
|
340
340
|
load_snappy!
|
|
341
|
+
crc32 = data.slice(-4..-1).unpack('N').first
|
|
342
|
+
uncompressed = Snappy.inflate(data.slice(0..-5))
|
|
343
|
+
|
|
344
|
+
if crc32 == Zlib.crc32(uncompressed)
|
|
345
|
+
uncompressed
|
|
346
|
+
else
|
|
347
|
+
# older versions of avro-ruby didn't write the checksum, so if it
|
|
348
|
+
# doesn't match this must assume that it wasn't there and return
|
|
349
|
+
# the entire payload uncompressed.
|
|
350
|
+
Snappy.inflate(data)
|
|
351
|
+
end
|
|
352
|
+
rescue Snappy::Error
|
|
353
|
+
# older versions of avro-ruby didn't write the checksum, so removing
|
|
354
|
+
# the last 4 bytes may cause Snappy to fail. recover by assuming the
|
|
355
|
+
# payload is from an older file and uncompress the entire buffer.
|
|
341
356
|
Snappy.inflate(data)
|
|
342
357
|
end
|
|
343
358
|
|
|
344
359
|
def compress(data)
|
|
345
360
|
load_snappy!
|
|
346
|
-
|
|
361
|
+
crc32 = Zlib.crc32(data)
|
|
362
|
+
compressed = Snappy.deflate(data)
|
|
363
|
+
[compressed, crc32].pack('a*N')
|
|
347
364
|
end
|
|
348
365
|
|
|
349
366
|
private
|
|
@@ -355,9 +372,32 @@ module Avro
|
|
|
355
372
|
end
|
|
356
373
|
end
|
|
357
374
|
|
|
375
|
+
class ZstandardCodec
|
|
376
|
+
def codec_name; 'zstandard'; end
|
|
377
|
+
|
|
378
|
+
def decompress(data)
|
|
379
|
+
load_zstandard!
|
|
380
|
+
Zstd.decompress(data)
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
def compress(data)
|
|
384
|
+
load_zstandard!
|
|
385
|
+
Zstd.compress(data)
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
private
|
|
389
|
+
|
|
390
|
+
def load_zstandard!
|
|
391
|
+
require 'zstd-ruby' unless defined?(Zstd)
|
|
392
|
+
rescue LoadError
|
|
393
|
+
raise LoadError, "Zstandard compression is not available, please install the `zstd-ruby` gem."
|
|
394
|
+
end
|
|
395
|
+
end
|
|
396
|
+
|
|
358
397
|
DataFile.register_codec NullCodec
|
|
359
398
|
DataFile.register_codec DeflateCodec
|
|
360
399
|
DataFile.register_codec SnappyCodec
|
|
400
|
+
DataFile.register_codec ZstandardCodec
|
|
361
401
|
|
|
362
402
|
# TODO this constant won't be updated if you register another codec.
|
|
363
403
|
# Deprecated in favor of Avro::DataFile::codecs
|
data/lib/avro/io.rb
CHANGED
|
@@ -5,9 +5,9 @@
|
|
|
5
5
|
# to you under the Apache License, Version 2.0 (the
|
|
6
6
|
# "License"); you may not use this file except in compliance
|
|
7
7
|
# with the License. You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
#
|
|
10
|
-
#
|
|
8
|
+
#
|
|
9
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
11
|
# Unless required by applicable law or agreed to in writing, software
|
|
12
12
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
13
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
@@ -43,9 +43,9 @@ module Avro
|
|
|
43
43
|
end
|
|
44
44
|
|
|
45
45
|
def byte!
|
|
46
|
-
@reader.
|
|
46
|
+
@reader.readbyte
|
|
47
47
|
end
|
|
48
|
-
|
|
48
|
+
|
|
49
49
|
def read_null
|
|
50
50
|
# null is written as zero byte's
|
|
51
51
|
nil
|
|
@@ -76,7 +76,7 @@ module Avro
|
|
|
76
76
|
# The float is converted into a 32-bit integer using a method
|
|
77
77
|
# equivalent to Java's floatToIntBits and then encoded in
|
|
78
78
|
# little-endian format.
|
|
79
|
-
|
|
79
|
+
read_and_unpack(4, 'e'.freeze)
|
|
80
80
|
end
|
|
81
81
|
|
|
82
82
|
def read_double
|
|
@@ -84,7 +84,7 @@ module Avro
|
|
|
84
84
|
# The double is converted into a 64-bit integer using a method
|
|
85
85
|
# equivalent to Java's doubleToLongBits and then encoded in
|
|
86
86
|
# little-endian format.
|
|
87
|
-
|
|
87
|
+
read_and_unpack(8, 'E'.freeze)
|
|
88
88
|
end
|
|
89
89
|
|
|
90
90
|
def read_bytes
|
|
@@ -97,7 +97,7 @@ module Avro
|
|
|
97
97
|
# A string is encoded as a long followed by that many bytes of
|
|
98
98
|
# UTF-8 encoded character data.
|
|
99
99
|
read_bytes.tap do |string|
|
|
100
|
-
string.force_encoding(
|
|
100
|
+
string.force_encoding('UTF-8'.freeze) if string.respond_to? :force_encoding
|
|
101
101
|
end
|
|
102
102
|
end
|
|
103
103
|
|
|
@@ -144,6 +144,23 @@ module Avro
|
|
|
144
144
|
def skip(n)
|
|
145
145
|
reader.seek(reader.tell() + n)
|
|
146
146
|
end
|
|
147
|
+
|
|
148
|
+
private
|
|
149
|
+
|
|
150
|
+
# Optimize unpacking strings when `unpack1` is available (ruby >= 2.4)
|
|
151
|
+
if String.instance_methods.include?(:unpack1)
|
|
152
|
+
|
|
153
|
+
def read_and_unpack(byte_count, format)
|
|
154
|
+
@reader.read(byte_count).unpack1(format)
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
else
|
|
158
|
+
|
|
159
|
+
def read_and_unpack(byte_count, format)
|
|
160
|
+
@reader.read(byte_count).unpack(format)[0]
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
end
|
|
147
164
|
end
|
|
148
165
|
|
|
149
166
|
# Write leaf values
|
|
@@ -155,11 +172,11 @@ module Avro
|
|
|
155
172
|
end
|
|
156
173
|
|
|
157
174
|
# null is written as zero bytes
|
|
158
|
-
def write_null(
|
|
175
|
+
def write_null(_datum)
|
|
159
176
|
nil
|
|
160
177
|
end
|
|
161
178
|
|
|
162
|
-
# a boolean is written as a single byte
|
|
179
|
+
# a boolean is written as a single byte
|
|
163
180
|
# whose value is either 0 (false) or 1 (true).
|
|
164
181
|
def write_boolean(datum)
|
|
165
182
|
on_disk = datum ? 1.chr : 0.chr
|
|
@@ -175,7 +192,6 @@ module Avro
|
|
|
175
192
|
# int and long values are written using variable-length,
|
|
176
193
|
# zig-zag coding.
|
|
177
194
|
def write_long(n)
|
|
178
|
-
foo = n
|
|
179
195
|
n = (n << 1) ^ (n >> 63)
|
|
180
196
|
while (n & ~0x7F) != 0
|
|
181
197
|
@writer.write(((n & 0x7f) | 0x80).chr)
|
|
@@ -189,7 +205,7 @@ module Avro
|
|
|
189
205
|
# equivalent to Java's floatToIntBits and then encoded in
|
|
190
206
|
# little-endian format.
|
|
191
207
|
def write_float(datum)
|
|
192
|
-
@writer.write([datum].pack('e'))
|
|
208
|
+
@writer.write([datum].pack('e'.freeze))
|
|
193
209
|
end
|
|
194
210
|
|
|
195
211
|
# A double is written as 8 bytes.
|
|
@@ -197,7 +213,7 @@ module Avro
|
|
|
197
213
|
# equivalent to Java's doubleToLongBits and then encoded in
|
|
198
214
|
# little-endian format.
|
|
199
215
|
def write_double(datum)
|
|
200
|
-
@writer.write([datum].pack('E'))
|
|
216
|
+
@writer.write([datum].pack('E'.freeze))
|
|
201
217
|
end
|
|
202
218
|
|
|
203
219
|
# Bytes are encoded as a long followed by that many bytes of data.
|
|
@@ -209,7 +225,7 @@ module Avro
|
|
|
209
225
|
# A string is encoded as a long followed by that many bytes of
|
|
210
226
|
# UTF-8 encoded character data
|
|
211
227
|
def write_string(datum)
|
|
212
|
-
datum = datum.encode('utf-8') if datum.respond_to? :encode
|
|
228
|
+
datum = datum.encode('utf-8'.freeze) if datum.respond_to? :encode
|
|
213
229
|
write_bytes(datum)
|
|
214
230
|
end
|
|
215
231
|
|
|
@@ -221,46 +237,7 @@ module Avro
|
|
|
221
237
|
|
|
222
238
|
class DatumReader
|
|
223
239
|
def self.match_schemas(writers_schema, readers_schema)
|
|
224
|
-
|
|
225
|
-
r_type = readers_schema.type_sym
|
|
226
|
-
|
|
227
|
-
# This conditional is begging for some OO love.
|
|
228
|
-
if w_type == :union || r_type == :union
|
|
229
|
-
return true
|
|
230
|
-
end
|
|
231
|
-
|
|
232
|
-
if w_type == r_type
|
|
233
|
-
return true if Schema::PRIMITIVE_TYPES_SYM.include?(r_type)
|
|
234
|
-
|
|
235
|
-
case r_type
|
|
236
|
-
when :record
|
|
237
|
-
return writers_schema.fullname == readers_schema.fullname
|
|
238
|
-
when :error
|
|
239
|
-
return writers_schema.fullname == readers_schema.fullname
|
|
240
|
-
when :request
|
|
241
|
-
return true
|
|
242
|
-
when :fixed
|
|
243
|
-
return writers_schema.fullname == readers_schema.fullname &&
|
|
244
|
-
writers_schema.size == readers_schema.size
|
|
245
|
-
when :enum
|
|
246
|
-
return writers_schema.fullname == readers_schema.fullname
|
|
247
|
-
when :map
|
|
248
|
-
return writers_schema.values.type == readers_schema.values.type
|
|
249
|
-
when :array
|
|
250
|
-
return writers_schema.items.type == readers_schema.items.type
|
|
251
|
-
end
|
|
252
|
-
end
|
|
253
|
-
|
|
254
|
-
# Handle schema promotion
|
|
255
|
-
if w_type == :int && [:long, :float, :double].include?(r_type)
|
|
256
|
-
return true
|
|
257
|
-
elsif w_type == :long && [:float, :double].include?(r_type)
|
|
258
|
-
return true
|
|
259
|
-
elsif w_type == :float && r_type == :double
|
|
260
|
-
return true
|
|
261
|
-
end
|
|
262
|
-
|
|
263
|
-
return false
|
|
240
|
+
Avro::SchemaCompatibility.match_schemas(writers_schema, readers_schema)
|
|
264
241
|
end
|
|
265
242
|
|
|
266
243
|
attr_accessor :writers_schema, :readers_schema
|
|
@@ -293,7 +270,7 @@ module Avro
|
|
|
293
270
|
|
|
294
271
|
# function dispatch for reading data based on type of writer's
|
|
295
272
|
# schema
|
|
296
|
-
case writers_schema.type_sym
|
|
273
|
+
datum = case writers_schema.type_sym
|
|
297
274
|
when :null; decoder.read_null
|
|
298
275
|
when :boolean; decoder.read_boolean
|
|
299
276
|
when :string; decoder.read_string
|
|
@@ -311,9 +288,11 @@ module Avro
|
|
|
311
288
|
else
|
|
312
289
|
raise AvroError, "Cannot read unknown schema type: #{writers_schema.type}"
|
|
313
290
|
end
|
|
291
|
+
|
|
292
|
+
readers_schema.type_adapter.decode(datum)
|
|
314
293
|
end
|
|
315
294
|
|
|
316
|
-
def read_fixed(writers_schema,
|
|
295
|
+
def read_fixed(writers_schema, _readers_schema, decoder)
|
|
317
296
|
decoder.read(writers_schema.size)
|
|
318
297
|
end
|
|
319
298
|
|
|
@@ -321,12 +300,12 @@ module Avro
|
|
|
321
300
|
index_of_symbol = decoder.read_int
|
|
322
301
|
read_symbol = writers_schema.symbols[index_of_symbol]
|
|
323
302
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
unless readers_schema.symbols.include?(read_symbol)
|
|
327
|
-
# 'unset' here
|
|
303
|
+
if !readers_schema.symbols.include?(read_symbol) && readers_schema.default
|
|
304
|
+
read_symbol = readers_schema.default
|
|
328
305
|
end
|
|
329
306
|
|
|
307
|
+
# This implementation deviates from the spec by always returning
|
|
308
|
+
# a symbol.
|
|
330
309
|
read_symbol
|
|
331
310
|
end
|
|
332
311
|
|
|
@@ -336,7 +315,7 @@ module Avro
|
|
|
336
315
|
while block_count != 0
|
|
337
316
|
if block_count < 0
|
|
338
317
|
block_count = -block_count
|
|
339
|
-
|
|
318
|
+
_block_size = decoder.read_long
|
|
340
319
|
end
|
|
341
320
|
block_count.times do
|
|
342
321
|
read_items << read_data(writers_schema.items,
|
|
@@ -355,7 +334,7 @@ module Avro
|
|
|
355
334
|
while block_count != 0
|
|
356
335
|
if block_count < 0
|
|
357
336
|
block_count = -block_count
|
|
358
|
-
|
|
337
|
+
_block_size = decoder.read_long
|
|
359
338
|
end
|
|
360
339
|
block_count.times do
|
|
361
340
|
key = decoder.read_string
|
|
@@ -380,26 +359,28 @@ module Avro
|
|
|
380
359
|
readers_fields_hash = readers_schema.fields_hash
|
|
381
360
|
read_record = {}
|
|
382
361
|
writers_schema.fields.each do |field|
|
|
383
|
-
|
|
362
|
+
readers_field = readers_fields_hash[field.name]
|
|
363
|
+
if readers_field
|
|
384
364
|
field_val = read_data(field.type, readers_field.type, decoder)
|
|
385
365
|
read_record[field.name] = field_val
|
|
366
|
+
elsif readers_schema.fields_by_alias.key?(field.name)
|
|
367
|
+
readers_field = readers_schema.fields_by_alias[field.name]
|
|
368
|
+
field_val = read_data(field.type, readers_field.type, decoder)
|
|
369
|
+
read_record[readers_field.name] = field_val
|
|
386
370
|
else
|
|
387
371
|
skip_data(field.type, decoder)
|
|
388
372
|
end
|
|
389
373
|
end
|
|
390
374
|
|
|
391
375
|
# fill in the default values
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
# FIXME(jmhodges) another 'unset' here
|
|
401
|
-
end
|
|
402
|
-
end
|
|
376
|
+
readers_fields_hash.each do |field_name, field|
|
|
377
|
+
next if read_record.key?(field_name)
|
|
378
|
+
|
|
379
|
+
if field.default?
|
|
380
|
+
field_val = read_default_value(field.type, field.default)
|
|
381
|
+
read_record[field.name] = field_val
|
|
382
|
+
else
|
|
383
|
+
raise AvroError, "Missing data for #{field.type} with no default"
|
|
403
384
|
end
|
|
404
385
|
end
|
|
405
386
|
|
|
@@ -489,7 +470,7 @@ module Avro
|
|
|
489
470
|
decoder.skip(writers_schema.size)
|
|
490
471
|
end
|
|
491
472
|
|
|
492
|
-
def skip_enum(
|
|
473
|
+
def skip_enum(_writers_schema, decoder)
|
|
493
474
|
decoder.skip_int
|
|
494
475
|
end
|
|
495
476
|
|
|
@@ -520,7 +501,7 @@ module Avro
|
|
|
520
501
|
if block_count < 0
|
|
521
502
|
decoder.skip(decoder.read_long)
|
|
522
503
|
else
|
|
523
|
-
block_count.times
|
|
504
|
+
block_count.times(&blk)
|
|
524
505
|
end
|
|
525
506
|
block_count = decoder.read_long
|
|
526
507
|
end
|
|
@@ -538,8 +519,10 @@ module Avro
|
|
|
538
519
|
write_data(writers_schema, datum, encoder)
|
|
539
520
|
end
|
|
540
521
|
|
|
541
|
-
def write_data(writers_schema,
|
|
542
|
-
|
|
522
|
+
def write_data(writers_schema, logical_datum, encoder)
|
|
523
|
+
datum = writers_schema.type_adapter.encode(logical_datum)
|
|
524
|
+
|
|
525
|
+
unless Schema.validate(writers_schema, datum, { recursive: false, encoded: true })
|
|
543
526
|
raise AvroTypeError.new(writers_schema, datum)
|
|
544
527
|
end
|
|
545
528
|
|
|
@@ -564,7 +547,7 @@ module Avro
|
|
|
564
547
|
end
|
|
565
548
|
end
|
|
566
549
|
|
|
567
|
-
def write_fixed(
|
|
550
|
+
def write_fixed(_writers_schema, datum, encoder)
|
|
568
551
|
encoder.write(datum)
|
|
569
552
|
end
|
|
570
553
|
|
|
@@ -574,6 +557,7 @@ module Avro
|
|
|
574
557
|
end
|
|
575
558
|
|
|
576
559
|
def write_array(writers_schema, datum, encoder)
|
|
560
|
+
raise AvroTypeError.new(writers_schema, datum) unless datum.is_a?(Array)
|
|
577
561
|
if datum.size > 0
|
|
578
562
|
encoder.write_long(datum.size)
|
|
579
563
|
datum.each do |item|
|
|
@@ -584,6 +568,7 @@ module Avro
|
|
|
584
568
|
end
|
|
585
569
|
|
|
586
570
|
def write_map(writers_schema, datum, encoder)
|
|
571
|
+
raise AvroTypeError.new(writers_schema, datum) unless datum.is_a?(Hash)
|
|
587
572
|
if datum.size > 0
|
|
588
573
|
encoder.write_long(datum.size)
|
|
589
574
|
datum.each do |k,v|
|
|
@@ -606,8 +591,9 @@ module Avro
|
|
|
606
591
|
end
|
|
607
592
|
|
|
608
593
|
def write_record(writers_schema, datum, encoder)
|
|
594
|
+
raise AvroTypeError.new(writers_schema, datum) unless datum.is_a?(Hash)
|
|
609
595
|
writers_schema.fields.each do |field|
|
|
610
|
-
write_data(field.type, datum[field.name], encoder)
|
|
596
|
+
write_data(field.type, datum.key?(field.name) ? datum[field.name] : datum[field.name.to_sym], encoder)
|
|
611
597
|
end
|
|
612
598
|
end
|
|
613
599
|
end # DatumWriter
|