avro-salsify-fork 1.9.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG +1 -0
- data/LICENSE +203 -0
- data/Manifest +31 -0
- data/NOTICE +6 -0
- data/Rakefile +66 -0
- data/avro-salsify-fork.gemspec +35 -0
- data/avro.gemspec +35 -0
- data/interop/test_interop.rb +41 -0
- data/lib/avro.rb +42 -0
- data/lib/avro/data_file.rb +366 -0
- data/lib/avro/io.rb +619 -0
- data/lib/avro/ipc.rb +551 -0
- data/lib/avro/logical_types.rb +84 -0
- data/lib/avro/protocol.rb +161 -0
- data/lib/avro/schema.rb +434 -0
- data/lib/avro/schema_normalization.rb +83 -0
- data/test/case_finder.rb +87 -0
- data/test/random_data.rb +90 -0
- data/test/sample_ipc_client.rb +85 -0
- data/test/sample_ipc_http_client.rb +84 -0
- data/test/sample_ipc_http_server.rb +79 -0
- data/test/sample_ipc_server.rb +92 -0
- data/test/test_datafile.rb +214 -0
- data/test/test_fingerprints.rb +37 -0
- data/test/test_help.rb +23 -0
- data/test/test_io.rb +451 -0
- data/test/test_logical_types.rb +111 -0
- data/test/test_protocol.rb +199 -0
- data/test/test_schema.rb +146 -0
- data/test/test_schema_normalization.rb +171 -0
- data/test/test_socket_transport.rb +40 -0
- data/test/tool.rb +144 -0
- metadata +114 -0
@@ -0,0 +1,92 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
4
|
+
# distributed with this work for additional information
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
7
|
+
# "License"); you may not use this file except in compliance
|
8
|
+
# with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
# See the License for the specific language governing permissions and
|
16
|
+
# limitations under the License.
|
17
|
+
|
18
|
+
require 'socket'
|
19
|
+
require 'avro'
|
20
|
+
|
21
|
+
MAIL_PROTOCOL_JSON = <<-EOS
|
22
|
+
{"namespace": "example.proto",
|
23
|
+
"protocol": "Mail",
|
24
|
+
|
25
|
+
"types": [
|
26
|
+
{"name": "Message", "type": "record",
|
27
|
+
"fields": [
|
28
|
+
{"name": "to", "type": "string"},
|
29
|
+
{"name": "from", "type": "string"},
|
30
|
+
{"name": "body", "type": "string"}
|
31
|
+
]
|
32
|
+
}
|
33
|
+
],
|
34
|
+
|
35
|
+
"messages": {
|
36
|
+
"send": {
|
37
|
+
"request": [{"name": "message", "type": "Message"}],
|
38
|
+
"response": "string"
|
39
|
+
},
|
40
|
+
"replay": {
|
41
|
+
"request": [],
|
42
|
+
"response": "string"
|
43
|
+
}
|
44
|
+
}
|
45
|
+
}
|
46
|
+
EOS
|
47
|
+
|
48
|
+
MAIL_PROTOCOL = Avro::Protocol.parse(MAIL_PROTOCOL_JSON)
|
49
|
+
|
50
|
+
class MailResponder < Avro::IPC::Responder
|
51
|
+
def initialize
|
52
|
+
super(MAIL_PROTOCOL)
|
53
|
+
end
|
54
|
+
|
55
|
+
def call(message, request)
|
56
|
+
if message.name == 'send'
|
57
|
+
request_content = request['message']
|
58
|
+
"Sent message to #{request_content['to']} from #{request_content['from']} with body #{request_content['body']}"
|
59
|
+
elsif message.name == 'replay'
|
60
|
+
'replay'
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
class RequestHandler
|
66
|
+
def initialize(address, port)
|
67
|
+
@ip_address = address
|
68
|
+
@port = port
|
69
|
+
end
|
70
|
+
|
71
|
+
def run
|
72
|
+
server = TCPServer.new(@ip_address, @port)
|
73
|
+
while (session = server.accept)
|
74
|
+
handle(session)
|
75
|
+
session.close
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
class MailHandler < RequestHandler
|
81
|
+
def handle(request)
|
82
|
+
responder = MailResponder.new()
|
83
|
+
transport = Avro::IPC::SocketTransport.new(request)
|
84
|
+
str = transport.read_framed_message
|
85
|
+
transport.write_framed_message(responder.respond(str))
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
if $0 == __FILE__
|
90
|
+
handler = MailHandler.new('localhost', 9090)
|
91
|
+
handler.run
|
92
|
+
end
|
@@ -0,0 +1,214 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
4
|
+
# distributed with this work for additional information
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
7
|
+
# "License"); you may not use this file except in compliance
|
8
|
+
# with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
# See the License for the specific language governing permissions and
|
16
|
+
# limitations under the License.
|
17
|
+
|
18
|
+
require 'test_help'
|
19
|
+
|
20
|
+
class TestDataFile < Test::Unit::TestCase
|
21
|
+
HERE = File.expand_path File.dirname(__FILE__)
|
22
|
+
def setup
|
23
|
+
if File.exists?(HERE + '/data.avr')
|
24
|
+
File.unlink(HERE + '/data.avr')
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def teardown
|
29
|
+
if File.exists?(HERE + '/data.avr')
|
30
|
+
File.unlink(HERE + '/data.avr')
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_differing_schemas_with_primitives
|
35
|
+
writer_schema = <<-JSON
|
36
|
+
{ "type": "record",
|
37
|
+
"name": "User",
|
38
|
+
"fields" : [
|
39
|
+
{"name": "username", "type": "string"},
|
40
|
+
{"name": "age", "type": "int"},
|
41
|
+
{"name": "verified", "type": "boolean", "default": "false"}
|
42
|
+
]}
|
43
|
+
JSON
|
44
|
+
|
45
|
+
data = [{"username" => "john", "age" => 25, "verified" => true},
|
46
|
+
{"username" => "ryan", "age" => 23, "verified" => false}]
|
47
|
+
|
48
|
+
Avro::DataFile.open('data.avr', 'w', writer_schema) do |dw|
|
49
|
+
data.each{|h| dw << h }
|
50
|
+
end
|
51
|
+
|
52
|
+
# extract the username only from the avro serialized file
|
53
|
+
reader_schema = <<-JSON
|
54
|
+
{ "type": "record",
|
55
|
+
"name": "User",
|
56
|
+
"fields" : [
|
57
|
+
{"name": "username", "type": "string"}
|
58
|
+
]}
|
59
|
+
JSON
|
60
|
+
|
61
|
+
Avro::DataFile.open('data.avr', 'r', reader_schema) do |dr|
|
62
|
+
dr.each_with_index do |record, i|
|
63
|
+
assert_equal data[i]['username'], record['username']
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_differing_schemas_with_complex_objects
|
69
|
+
writer_schema = <<-JSON
|
70
|
+
{ "type": "record",
|
71
|
+
"name": "something",
|
72
|
+
"fields": [
|
73
|
+
{"name": "something_fixed", "type": {"name": "inner_fixed",
|
74
|
+
"type": "fixed", "size": 3}},
|
75
|
+
{"name": "something_enum", "type": {"name": "inner_enum",
|
76
|
+
"type": "enum",
|
77
|
+
"symbols": ["hello", "goodbye"]}},
|
78
|
+
{"name": "something_array", "type": {"type": "array", "items": "int"}},
|
79
|
+
{"name": "something_map", "type": {"type": "map", "values": "int"}},
|
80
|
+
{"name": "something_record", "type": {"name": "inner_record",
|
81
|
+
"type": "record",
|
82
|
+
"fields": [
|
83
|
+
{"name": "inner", "type": "int"}
|
84
|
+
]}},
|
85
|
+
{"name": "username", "type": "string"}
|
86
|
+
]}
|
87
|
+
JSON
|
88
|
+
|
89
|
+
data = [{"username" => "john",
|
90
|
+
"something_fixed" => "foo",
|
91
|
+
"something_enum" => "hello",
|
92
|
+
"something_array" => [1,2,3],
|
93
|
+
"something_map" => {"a" => 1, "b" => 2},
|
94
|
+
"something_record" => {"inner" => 2},
|
95
|
+
"something_error" => {"code" => 403}
|
96
|
+
},
|
97
|
+
{"username" => "ryan",
|
98
|
+
"something_fixed" => "bar",
|
99
|
+
"something_enum" => "goodbye",
|
100
|
+
"something_array" => [1,2,3],
|
101
|
+
"something_map" => {"a" => 2, "b" => 6},
|
102
|
+
"something_record" => {"inner" => 1},
|
103
|
+
"something_error" => {"code" => 401}
|
104
|
+
}]
|
105
|
+
|
106
|
+
Avro::DataFile.open('data.avr', 'w', writer_schema) do |dw|
|
107
|
+
data.each{|d| dw << d }
|
108
|
+
end
|
109
|
+
|
110
|
+
%w[fixed enum record error array map union].each do |s|
|
111
|
+
reader = MultiJson.load(writer_schema)
|
112
|
+
reader['fields'] = reader['fields'].reject{|f| f['type']['type'] == s}
|
113
|
+
Avro::DataFile.open('data.avr', 'r', MultiJson.dump(reader)) do |dr|
|
114
|
+
dr.each_with_index do |obj, i|
|
115
|
+
reader['fields'].each do |field|
|
116
|
+
assert_equal data[i][field['name']], obj[field['name']]
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def test_data_writer_handles_sync_interval
|
124
|
+
writer_schema = <<-JSON
|
125
|
+
{ "type": "record",
|
126
|
+
"name": "something",
|
127
|
+
"fields": [
|
128
|
+
{"name": "something_boolean", "type": "boolean"}
|
129
|
+
]}
|
130
|
+
JSON
|
131
|
+
|
132
|
+
data = {"something_boolean" => true }
|
133
|
+
|
134
|
+
Avro::DataFile.open('data.avr', 'w', writer_schema) do |dw|
|
135
|
+
while dw.writer.tell < Avro::DataFile::SYNC_INTERVAL
|
136
|
+
dw << data
|
137
|
+
end
|
138
|
+
block_count = dw.block_count
|
139
|
+
dw << data
|
140
|
+
# ensure we didn't just write another block
|
141
|
+
assert_equal(block_count+1, dw.block_count)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def test_utf8
|
146
|
+
datafile = Avro::DataFile::open('data.avr', 'w', '"string"')
|
147
|
+
datafile << "家"
|
148
|
+
datafile.close
|
149
|
+
|
150
|
+
datafile = Avro::DataFile.open('data.avr')
|
151
|
+
datafile.each do |s|
|
152
|
+
assert_equal "家", s
|
153
|
+
end
|
154
|
+
datafile.close
|
155
|
+
end
|
156
|
+
|
157
|
+
def test_deflate
|
158
|
+
Avro::DataFile.open('data.avr', 'w', '"string"', :deflate) do |writer|
|
159
|
+
writer << 'a' * 10_000
|
160
|
+
end
|
161
|
+
assert(File.size('data.avr') < 500)
|
162
|
+
|
163
|
+
records = []
|
164
|
+
Avro::DataFile.open('data.avr') do |reader|
|
165
|
+
reader.each {|record| records << record }
|
166
|
+
end
|
167
|
+
assert_equal records, ['a' * 10_000]
|
168
|
+
end
|
169
|
+
|
170
|
+
def test_snappy
|
171
|
+
Avro::DataFile.open('data.avr', 'w', '"string"', :snappy) do |writer|
|
172
|
+
writer << 'a' * 10_000
|
173
|
+
end
|
174
|
+
assert(File.size('data.avr') < 600)
|
175
|
+
|
176
|
+
records = []
|
177
|
+
Avro::DataFile.open('data.avr') do |reader|
|
178
|
+
reader.each {|record| records << record }
|
179
|
+
end
|
180
|
+
assert_equal records, ['a' * 10_000]
|
181
|
+
end
|
182
|
+
|
183
|
+
def test_append_to_deflated_file
|
184
|
+
schema = Avro::Schema.parse('"string"')
|
185
|
+
writer = Avro::IO::DatumWriter.new(schema)
|
186
|
+
file = Avro::DataFile::Writer.new(File.open('data.avr', 'wb'), writer, schema, :deflate)
|
187
|
+
file << 'a' * 10_000
|
188
|
+
file.close
|
189
|
+
|
190
|
+
file = Avro::DataFile::Writer.new(File.open('data.avr', 'a+b'), writer)
|
191
|
+
file << 'b' * 10_000
|
192
|
+
file.close
|
193
|
+
assert(File.size('data.avr') < 1_000)
|
194
|
+
|
195
|
+
records = []
|
196
|
+
Avro::DataFile.open('data.avr') do |reader|
|
197
|
+
reader.each {|record| records << record }
|
198
|
+
end
|
199
|
+
assert_equal records, ['a' * 10_000, 'b' * 10_000]
|
200
|
+
end
|
201
|
+
|
202
|
+
def test_custom_meta
|
203
|
+
meta = { 'x.greeting' => 'yo' }
|
204
|
+
|
205
|
+
schema = Avro::Schema.parse('"string"')
|
206
|
+
writer = Avro::IO::DatumWriter.new(schema)
|
207
|
+
file = Avro::DataFile::Writer.new(File.open('data.avr', 'wb'), writer, schema, nil, meta)
|
208
|
+
file.close
|
209
|
+
|
210
|
+
Avro::DataFile.open('data.avr') do |reader|
|
211
|
+
assert_equal 'yo', reader.meta['x.greeting']
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
# See the License for the specific language governing permissions and
|
15
|
+
# limitations under the License.
|
16
|
+
|
17
|
+
require 'test_help'
|
18
|
+
|
19
|
+
class TestFingerprints < Test::Unit::TestCase
|
20
|
+
def test_md5_fingerprint
|
21
|
+
schema = Avro::Schema.parse <<-SCHEMA
|
22
|
+
{ "type": "int" }
|
23
|
+
SCHEMA
|
24
|
+
|
25
|
+
assert_equal 318112854175969537208795771590915775282,
|
26
|
+
schema.md5_fingerprint
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_sha256_fingerprint
|
30
|
+
schema = Avro::Schema.parse <<-SCHEMA
|
31
|
+
{ "type": "int" }
|
32
|
+
SCHEMA
|
33
|
+
|
34
|
+
assert_equal 28572620203319713300323544804233350633246234624932075150020181448463213378117,
|
35
|
+
schema.sha256_fingerprint
|
36
|
+
end
|
37
|
+
end
|
data/test/test_help.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
# See the License for the specific language governing permissions and
|
15
|
+
# limitations under the License.
|
16
|
+
|
17
|
+
require 'rubygems'
|
18
|
+
require 'test/unit'
|
19
|
+
require 'stringio'
|
20
|
+
require 'fileutils'
|
21
|
+
FileUtils.mkdir_p('tmp')
|
22
|
+
require 'avro'
|
23
|
+
require 'random_data'
|
data/test/test_io.rb
ADDED
@@ -0,0 +1,451 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
# See the License for the specific language governing permissions and
|
15
|
+
# limitations under the License.
|
16
|
+
|
17
|
+
require 'test_help'
|
18
|
+
|
19
|
+
class TestIO < Test::Unit::TestCase
|
20
|
+
DATAFILE = 'tmp/test.rb.avro'
|
21
|
+
Schema = Avro::Schema
|
22
|
+
|
23
|
+
def test_null
|
24
|
+
check('"null"')
|
25
|
+
check_default('"null"', "null", nil)
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_boolean
|
29
|
+
check('"boolean"')
|
30
|
+
check_default('"boolean"', "true", true)
|
31
|
+
check_default('"boolean"', "false", false)
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_string
|
35
|
+
check('"string"')
|
36
|
+
check_default('"string"', '"foo"', "foo")
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_bytes
|
40
|
+
check('"bytes"')
|
41
|
+
check_default('"bytes"', '"foo"', "foo")
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_int
|
45
|
+
check('"int"')
|
46
|
+
check_default('"int"', "5", 5)
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_long
|
50
|
+
check('"long"')
|
51
|
+
check_default('"long"', "9", 9)
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_float
|
55
|
+
check('"float"')
|
56
|
+
check_default('"float"', "1.2", 1.2)
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_double
|
60
|
+
check('"double"')
|
61
|
+
check_default('"double"', "1.2", 1.2)
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_array
|
65
|
+
array_schema = '{"type": "array", "items": "long"}'
|
66
|
+
check(array_schema)
|
67
|
+
check_default(array_schema, "[1]", [1])
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_map
|
71
|
+
map_schema = '{"type": "map", "values": "long"}'
|
72
|
+
check(map_schema)
|
73
|
+
check_default(map_schema, '{"a": 1}', {"a" => 1})
|
74
|
+
end
|
75
|
+
|
76
|
+
def test_record
|
77
|
+
record_schema = <<EOS
|
78
|
+
{"type": "record",
|
79
|
+
"name": "Test",
|
80
|
+
"fields": [{"name": "f",
|
81
|
+
"type": "long"}]}
|
82
|
+
EOS
|
83
|
+
check(record_schema)
|
84
|
+
check_default(record_schema, '{"f": 11}', {"f" => 11})
|
85
|
+
end
|
86
|
+
|
87
|
+
def test_error
|
88
|
+
error_schema = <<EOS
|
89
|
+
{"type": "error",
|
90
|
+
"name": "TestError",
|
91
|
+
"fields": [{"name": "message",
|
92
|
+
"type": "string"}]}
|
93
|
+
EOS
|
94
|
+
check(error_schema)
|
95
|
+
check_default(error_schema, '{"message": "boom"}', {"message" => "boom"})
|
96
|
+
end
|
97
|
+
|
98
|
+
def test_enum
|
99
|
+
enum_schema = '{"type": "enum", "name": "Test","symbols": ["A", "B"]}'
|
100
|
+
check(enum_schema)
|
101
|
+
check_default(enum_schema, '"B"', "B")
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_recursive
|
105
|
+
recursive_schema = <<EOS
|
106
|
+
{"type": "record",
|
107
|
+
"name": "Node",
|
108
|
+
"fields": [{"name": "label", "type": "string"},
|
109
|
+
{"name": "children",
|
110
|
+
"type": {"type": "array", "items": "Node"}}]}
|
111
|
+
EOS
|
112
|
+
check(recursive_schema)
|
113
|
+
end
|
114
|
+
|
115
|
+
def test_union
|
116
|
+
union_schema = <<EOS
|
117
|
+
["string",
|
118
|
+
"null",
|
119
|
+
"long",
|
120
|
+
{"type": "record",
|
121
|
+
"name": "Cons",
|
122
|
+
"fields": [{"name": "car", "type": "string"},
|
123
|
+
{"name": "cdr", "type": "string"}]}]
|
124
|
+
EOS
|
125
|
+
check(union_schema)
|
126
|
+
check_default('["double", "long"]', "1.1", 1.1)
|
127
|
+
end
|
128
|
+
|
129
|
+
def test_lisp
|
130
|
+
lisp_schema = <<EOS
|
131
|
+
{"type": "record",
|
132
|
+
"name": "Lisp",
|
133
|
+
"fields": [{"name": "value",
|
134
|
+
"type": ["null", "string",
|
135
|
+
{"type": "record",
|
136
|
+
"name": "Cons",
|
137
|
+
"fields": [{"name": "car", "type": "Lisp"},
|
138
|
+
{"name": "cdr", "type": "Lisp"}]}]}]}
|
139
|
+
EOS
|
140
|
+
check(lisp_schema)
|
141
|
+
end
|
142
|
+
|
143
|
+
def test_fixed
|
144
|
+
fixed_schema = '{"type": "fixed", "name": "Test", "size": 1}'
|
145
|
+
check(fixed_schema)
|
146
|
+
check_default(fixed_schema, '"a"', "a")
|
147
|
+
end
|
148
|
+
|
149
|
+
def test_enum_with_duplicate
|
150
|
+
str = '{"type": "enum", "name": "Test","symbols" : ["AA", "AA"]}'
|
151
|
+
assert_raises(Avro::SchemaParseError) do
|
152
|
+
schema = Avro::Schema.parse str
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
BINARY_INT_ENCODINGS = [
|
157
|
+
[0, '00'],
|
158
|
+
[-1, '01'],
|
159
|
+
[1, '02'],
|
160
|
+
[-2, '03'],
|
161
|
+
[2, '04'],
|
162
|
+
[-64, '7f'],
|
163
|
+
[64, '80 01'],
|
164
|
+
[8192, '80 80 01'],
|
165
|
+
[-8193, '81 80 01'],
|
166
|
+
]
|
167
|
+
|
168
|
+
def avro_hexlify(reader)
|
169
|
+
bytes = []
|
170
|
+
current_byte = reader.read(1)
|
171
|
+
bytes << hexlify(current_byte)
|
172
|
+
while (current_byte.unpack('C').first & 0x80) != 0
|
173
|
+
current_byte = reader.read(1)
|
174
|
+
bytes << hexlify(current_byte)
|
175
|
+
end
|
176
|
+
bytes.join ' '
|
177
|
+
end
|
178
|
+
|
179
|
+
def hexlify(msg)
|
180
|
+
msg.unpack("H*")
|
181
|
+
end
|
182
|
+
|
183
|
+
def test_binary_int_encoding
|
184
|
+
for value, hex_encoding in BINARY_INT_ENCODINGS
|
185
|
+
# write datum in binary to string buffer
|
186
|
+
buffer = StringIO.new
|
187
|
+
encoder = Avro::IO::BinaryEncoder.new(buffer)
|
188
|
+
datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"int"'))
|
189
|
+
datum_writer.write(value, encoder)
|
190
|
+
|
191
|
+
buffer.seek(0)
|
192
|
+
hex_val = avro_hexlify(buffer)
|
193
|
+
|
194
|
+
assert_equal hex_encoding, hex_val
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def test_binary_long_encoding
|
199
|
+
for value, hex_encoding in BINARY_INT_ENCODINGS
|
200
|
+
buffer = StringIO.new
|
201
|
+
encoder = Avro::IO::BinaryEncoder.new(buffer)
|
202
|
+
datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"long"'))
|
203
|
+
datum_writer.write(value, encoder)
|
204
|
+
|
205
|
+
# read it out of the buffer and hexlify it
|
206
|
+
buffer.seek(0)
|
207
|
+
hex_val = avro_hexlify(buffer)
|
208
|
+
|
209
|
+
assert_equal hex_encoding, hex_val
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
def test_utf8_string_encoding
|
214
|
+
[
|
215
|
+
"\xC3".force_encoding('ISO-8859-1'),
|
216
|
+
"\xC3\x83".force_encoding('UTF-8')
|
217
|
+
].each do |value|
|
218
|
+
output = ''.force_encoding('BINARY')
|
219
|
+
encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output))
|
220
|
+
datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"string"'))
|
221
|
+
datum_writer.write(value, encoder)
|
222
|
+
|
223
|
+
assert_equal "\x04\xc3\x83".force_encoding('BINARY'), output
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
def test_bytes_encoding
|
228
|
+
[
|
229
|
+
"\xC3\x83".force_encoding('BINARY'),
|
230
|
+
"\xC3\x83".force_encoding('ISO-8859-1'),
|
231
|
+
"\xC3\x83".force_encoding('UTF-8')
|
232
|
+
].each do |value|
|
233
|
+
output = ''.force_encoding('BINARY')
|
234
|
+
encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output))
|
235
|
+
datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"bytes"'))
|
236
|
+
datum_writer.write(value, encoder)
|
237
|
+
|
238
|
+
assert_equal "\x04\xc3\x83".force_encoding('BINARY'), output
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
def test_fixed_encoding
|
243
|
+
[
|
244
|
+
"\xC3\x83".force_encoding('BINARY'),
|
245
|
+
"\xC3\x83".force_encoding('ISO-8859-1'),
|
246
|
+
"\xC3\x83".force_encoding('UTF-8')
|
247
|
+
].each do |value|
|
248
|
+
output = ''.force_encoding('BINARY')
|
249
|
+
encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output))
|
250
|
+
schema = '{"type": "fixed", "name": "TwoBytes", "size": 2}'
|
251
|
+
datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse(schema))
|
252
|
+
datum_writer.write(value, encoder)
|
253
|
+
|
254
|
+
assert_equal "\xc3\x83".force_encoding('BINARY'), output
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
def test_skip_long
|
259
|
+
for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS
|
260
|
+
value_to_read = 6253
|
261
|
+
|
262
|
+
# write some data in binary to string buffer
|
263
|
+
writer = StringIO.new
|
264
|
+
encoder = Avro::IO::BinaryEncoder.new(writer)
|
265
|
+
datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"long"'))
|
266
|
+
datum_writer.write(value_to_skip, encoder)
|
267
|
+
datum_writer.write(value_to_read, encoder)
|
268
|
+
|
269
|
+
# skip the value
|
270
|
+
reader = StringIO.new(writer.string())
|
271
|
+
decoder = Avro::IO::BinaryDecoder.new(reader)
|
272
|
+
decoder.skip_long()
|
273
|
+
|
274
|
+
# read data from string buffer
|
275
|
+
datum_reader = Avro::IO::DatumReader.new(Avro::Schema.parse('"long"'))
|
276
|
+
read_value = datum_reader.read(decoder)
|
277
|
+
|
278
|
+
# check it
|
279
|
+
assert_equal value_to_read, read_value
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
def test_skip_int
|
284
|
+
for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS
|
285
|
+
value_to_read = 6253
|
286
|
+
|
287
|
+
writer = StringIO.new
|
288
|
+
encoder = Avro::IO::BinaryEncoder.new(writer)
|
289
|
+
datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"int"'))
|
290
|
+
datum_writer.write(value_to_skip, encoder)
|
291
|
+
datum_writer.write(value_to_read, encoder)
|
292
|
+
|
293
|
+
reader = StringIO.new(writer.string)
|
294
|
+
decoder = Avro::IO::BinaryDecoder.new(reader)
|
295
|
+
decoder.skip_int
|
296
|
+
|
297
|
+
datum_reader = Avro::IO::DatumReader.new(Avro::Schema.parse('"int"'))
|
298
|
+
read_value = datum_reader.read(decoder)
|
299
|
+
|
300
|
+
assert_equal value_to_read, read_value
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
def test_skip_union
|
305
|
+
["hello", -1, 32, nil].each do |value_to_skip|
|
306
|
+
value_to_read = 6253
|
307
|
+
|
308
|
+
schema = Avro::Schema.parse('["int", "string", "null"]')
|
309
|
+
writer = StringIO.new
|
310
|
+
encoder = Avro::IO::BinaryEncoder.new(writer)
|
311
|
+
datum_writer = Avro::IO::DatumWriter.new(schema)
|
312
|
+
datum_writer.write(value_to_skip, encoder)
|
313
|
+
datum_writer.write(value_to_read, encoder)
|
314
|
+
|
315
|
+
reader = StringIO.new(writer.string)
|
316
|
+
decoder = Avro::IO::BinaryDecoder.new(reader)
|
317
|
+
datum_reader = Avro::IO::DatumReader.new(schema)
|
318
|
+
datum_reader.skip_data(schema, decoder)
|
319
|
+
read_value = datum_reader.read(decoder)
|
320
|
+
|
321
|
+
assert_equal value_to_read, read_value
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
|
326
|
+
def test_schema_promotion
|
327
|
+
promotable_schemas = ['"int"', '"long"', '"float"', '"double"']
|
328
|
+
incorrect = 0
|
329
|
+
promotable_schemas.each_with_index do |ws, i|
|
330
|
+
writers_schema = Avro::Schema.parse(ws)
|
331
|
+
datum_to_write = 219
|
332
|
+
for rs in promotable_schemas[(i + 1)..-1]
|
333
|
+
readers_schema = Avro::Schema.parse(rs)
|
334
|
+
writer, enc, dw = write_datum(datum_to_write, writers_schema)
|
335
|
+
datum_read = read_datum(writer, writers_schema, readers_schema)
|
336
|
+
if datum_read != datum_to_write
|
337
|
+
incorrect += 1
|
338
|
+
end
|
339
|
+
end
|
340
|
+
assert_equal(incorrect, 0)
|
341
|
+
end
|
342
|
+
end
|
343
|
+
private
|
344
|
+
|
345
|
+
def check_default(schema_json, default_json, default_value)
|
346
|
+
actual_schema = '{"type": "record", "name": "Foo", "fields": []}'
|
347
|
+
actual = Avro::Schema.parse(actual_schema)
|
348
|
+
|
349
|
+
expected_schema = <<EOS
|
350
|
+
{"type": "record",
|
351
|
+
"name": "Foo",
|
352
|
+
"fields": [{"name": "f", "type": #{schema_json}, "default": #{default_json}}]}
|
353
|
+
EOS
|
354
|
+
expected = Avro::Schema.parse(expected_schema)
|
355
|
+
|
356
|
+
reader = Avro::IO::DatumReader.new(actual, expected)
|
357
|
+
record = reader.read(Avro::IO::BinaryDecoder.new(StringIO.new))
|
358
|
+
assert_equal default_value, record["f"]
|
359
|
+
end
|
360
|
+
|
361
|
+
def check(str)
|
362
|
+
# parse schema, then convert back to string
|
363
|
+
schema = Avro::Schema.parse str
|
364
|
+
|
365
|
+
parsed_string = schema.to_s
|
366
|
+
|
367
|
+
# test that the round-trip didn't mess up anything
|
368
|
+
# NB: I don't think we should do this. Why enforce ordering?
|
369
|
+
assert_equal(MultiJson.load(str),
|
370
|
+
MultiJson.load(parsed_string))
|
371
|
+
|
372
|
+
# test __eq__
|
373
|
+
assert_equal(schema, Avro::Schema.parse(str))
|
374
|
+
|
375
|
+
# test hashcode doesn't generate infinite recursion
|
376
|
+
schema.hash
|
377
|
+
|
378
|
+
# test serialization of random data
|
379
|
+
randomdata = RandomData.new(schema)
|
380
|
+
9.times { checkser(schema, randomdata) }
|
381
|
+
|
382
|
+
# test writing of data to file
|
383
|
+
check_datafile(schema)
|
384
|
+
end
|
385
|
+
|
386
|
+
def checkser(schm, randomdata)
|
387
|
+
datum = randomdata.next
|
388
|
+
assert validate(schm, datum)
|
389
|
+
w = Avro::IO::DatumWriter.new(schm)
|
390
|
+
writer = StringIO.new "", "w"
|
391
|
+
w.write(datum, Avro::IO::BinaryEncoder.new(writer))
|
392
|
+
r = datum_reader(schm)
|
393
|
+
reader = StringIO.new(writer.string)
|
394
|
+
ob = r.read(Avro::IO::BinaryDecoder.new(reader))
|
395
|
+
assert_equal(datum, ob) # FIXME check on assertdata conditional
|
396
|
+
end
|
397
|
+
|
398
|
+
def check_datafile(schm)
|
399
|
+
seed = 0
|
400
|
+
count = 10
|
401
|
+
random_data = RandomData.new(schm, seed)
|
402
|
+
|
403
|
+
|
404
|
+
f = File.open(DATAFILE, 'wb')
|
405
|
+
dw = Avro::DataFile::Writer.new(f, datum_writer(schm), schm)
|
406
|
+
count.times{ dw << random_data.next }
|
407
|
+
dw.close
|
408
|
+
|
409
|
+
random_data = RandomData.new(schm, seed)
|
410
|
+
|
411
|
+
|
412
|
+
f = File.open(DATAFILE, 'r+')
|
413
|
+
dr = Avro::DataFile::Reader.new(f, datum_reader(schm))
|
414
|
+
|
415
|
+
last_index = nil
|
416
|
+
dr.each_with_index do |data, c|
|
417
|
+
last_index = c
|
418
|
+
# FIXME assertdata conditional
|
419
|
+
assert_equal(random_data.next, data)
|
420
|
+
end
|
421
|
+
dr.close
|
422
|
+
assert_equal count, last_index+1
|
423
|
+
end
|
424
|
+
|
425
|
+
def validate(schm, datum)
|
426
|
+
Avro::Schema.validate(schm, datum)
|
427
|
+
end
|
428
|
+
|
429
|
+
def datum_writer(schm)
|
430
|
+
Avro::IO::DatumWriter.new(schm)
|
431
|
+
end
|
432
|
+
|
433
|
+
def datum_reader(schm)
|
434
|
+
Avro::IO::DatumReader.new(schm)
|
435
|
+
end
|
436
|
+
|
437
|
+
def write_datum(datum, writers_schema)
|
438
|
+
writer = StringIO.new
|
439
|
+
encoder = Avro::IO::BinaryEncoder.new(writer)
|
440
|
+
datum_writer = Avro::IO::DatumWriter.new(writers_schema)
|
441
|
+
datum_writer.write(datum, encoder)
|
442
|
+
[writer, encoder, datum_writer]
|
443
|
+
end
|
444
|
+
|
445
|
+
def read_datum(buffer, writers_schema, readers_schema=nil)
|
446
|
+
reader = StringIO.new(buffer.string)
|
447
|
+
decoder = Avro::IO::BinaryDecoder.new(reader)
|
448
|
+
datum_reader = Avro::IO::DatumReader.new(writers_schema, readers_schema)
|
449
|
+
datum_reader.read(decoder)
|
450
|
+
end
|
451
|
+
end
|