avro-salsify-fork 1.9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,92 @@
1
+ #!/usr/bin/env ruby
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ require 'socket'
19
+ require 'avro'
20
+
21
+ MAIL_PROTOCOL_JSON = <<-EOS
22
+ {"namespace": "example.proto",
23
+ "protocol": "Mail",
24
+
25
+ "types": [
26
+ {"name": "Message", "type": "record",
27
+ "fields": [
28
+ {"name": "to", "type": "string"},
29
+ {"name": "from", "type": "string"},
30
+ {"name": "body", "type": "string"}
31
+ ]
32
+ }
33
+ ],
34
+
35
+ "messages": {
36
+ "send": {
37
+ "request": [{"name": "message", "type": "Message"}],
38
+ "response": "string"
39
+ },
40
+ "replay": {
41
+ "request": [],
42
+ "response": "string"
43
+ }
44
+ }
45
+ }
46
+ EOS
47
+
48
+ MAIL_PROTOCOL = Avro::Protocol.parse(MAIL_PROTOCOL_JSON)
49
+
50
+ class MailResponder < Avro::IPC::Responder
51
+ def initialize
52
+ super(MAIL_PROTOCOL)
53
+ end
54
+
55
+ def call(message, request)
56
+ if message.name == 'send'
57
+ request_content = request['message']
58
+ "Sent message to #{request_content['to']} from #{request_content['from']} with body #{request_content['body']}"
59
+ elsif message.name == 'replay'
60
+ 'replay'
61
+ end
62
+ end
63
+ end
64
+
65
+ class RequestHandler
66
+ def initialize(address, port)
67
+ @ip_address = address
68
+ @port = port
69
+ end
70
+
71
+ def run
72
+ server = TCPServer.new(@ip_address, @port)
73
+ while (session = server.accept)
74
+ handle(session)
75
+ session.close
76
+ end
77
+ end
78
+ end
79
+
80
+ class MailHandler < RequestHandler
81
+ def handle(request)
82
+ responder = MailResponder.new()
83
+ transport = Avro::IPC::SocketTransport.new(request)
84
+ str = transport.read_framed_message
85
+ transport.write_framed_message(responder.respond(str))
86
+ end
87
+ end
88
+
89
+ if $0 == __FILE__
90
+ handler = MailHandler.new('localhost', 9090)
91
+ handler.run
92
+ end
@@ -0,0 +1,214 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ require 'test_help'
19
+
20
+ class TestDataFile < Test::Unit::TestCase
21
+ HERE = File.expand_path File.dirname(__FILE__)
22
+ def setup
23
+ if File.exists?(HERE + '/data.avr')
24
+ File.unlink(HERE + '/data.avr')
25
+ end
26
+ end
27
+
28
+ def teardown
29
+ if File.exists?(HERE + '/data.avr')
30
+ File.unlink(HERE + '/data.avr')
31
+ end
32
+ end
33
+
34
+ def test_differing_schemas_with_primitives
35
+ writer_schema = <<-JSON
36
+ { "type": "record",
37
+ "name": "User",
38
+ "fields" : [
39
+ {"name": "username", "type": "string"},
40
+ {"name": "age", "type": "int"},
41
+ {"name": "verified", "type": "boolean", "default": "false"}
42
+ ]}
43
+ JSON
44
+
45
+ data = [{"username" => "john", "age" => 25, "verified" => true},
46
+ {"username" => "ryan", "age" => 23, "verified" => false}]
47
+
48
+ Avro::DataFile.open('data.avr', 'w', writer_schema) do |dw|
49
+ data.each{|h| dw << h }
50
+ end
51
+
52
+ # extract the username only from the avro serialized file
53
+ reader_schema = <<-JSON
54
+ { "type": "record",
55
+ "name": "User",
56
+ "fields" : [
57
+ {"name": "username", "type": "string"}
58
+ ]}
59
+ JSON
60
+
61
+ Avro::DataFile.open('data.avr', 'r', reader_schema) do |dr|
62
+ dr.each_with_index do |record, i|
63
+ assert_equal data[i]['username'], record['username']
64
+ end
65
+ end
66
+ end
67
+
68
+ def test_differing_schemas_with_complex_objects
69
+ writer_schema = <<-JSON
70
+ { "type": "record",
71
+ "name": "something",
72
+ "fields": [
73
+ {"name": "something_fixed", "type": {"name": "inner_fixed",
74
+ "type": "fixed", "size": 3}},
75
+ {"name": "something_enum", "type": {"name": "inner_enum",
76
+ "type": "enum",
77
+ "symbols": ["hello", "goodbye"]}},
78
+ {"name": "something_array", "type": {"type": "array", "items": "int"}},
79
+ {"name": "something_map", "type": {"type": "map", "values": "int"}},
80
+ {"name": "something_record", "type": {"name": "inner_record",
81
+ "type": "record",
82
+ "fields": [
83
+ {"name": "inner", "type": "int"}
84
+ ]}},
85
+ {"name": "username", "type": "string"}
86
+ ]}
87
+ JSON
88
+
89
+ data = [{"username" => "john",
90
+ "something_fixed" => "foo",
91
+ "something_enum" => "hello",
92
+ "something_array" => [1,2,3],
93
+ "something_map" => {"a" => 1, "b" => 2},
94
+ "something_record" => {"inner" => 2},
95
+ "something_error" => {"code" => 403}
96
+ },
97
+ {"username" => "ryan",
98
+ "something_fixed" => "bar",
99
+ "something_enum" => "goodbye",
100
+ "something_array" => [1,2,3],
101
+ "something_map" => {"a" => 2, "b" => 6},
102
+ "something_record" => {"inner" => 1},
103
+ "something_error" => {"code" => 401}
104
+ }]
105
+
106
+ Avro::DataFile.open('data.avr', 'w', writer_schema) do |dw|
107
+ data.each{|d| dw << d }
108
+ end
109
+
110
+ %w[fixed enum record error array map union].each do |s|
111
+ reader = MultiJson.load(writer_schema)
112
+ reader['fields'] = reader['fields'].reject{|f| f['type']['type'] == s}
113
+ Avro::DataFile.open('data.avr', 'r', MultiJson.dump(reader)) do |dr|
114
+ dr.each_with_index do |obj, i|
115
+ reader['fields'].each do |field|
116
+ assert_equal data[i][field['name']], obj[field['name']]
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
122
+
123
+ def test_data_writer_handles_sync_interval
124
+ writer_schema = <<-JSON
125
+ { "type": "record",
126
+ "name": "something",
127
+ "fields": [
128
+ {"name": "something_boolean", "type": "boolean"}
129
+ ]}
130
+ JSON
131
+
132
+ data = {"something_boolean" => true }
133
+
134
+ Avro::DataFile.open('data.avr', 'w', writer_schema) do |dw|
135
+ while dw.writer.tell < Avro::DataFile::SYNC_INTERVAL
136
+ dw << data
137
+ end
138
+ block_count = dw.block_count
139
+ dw << data
140
+ # ensure we didn't just write another block
141
+ assert_equal(block_count+1, dw.block_count)
142
+ end
143
+ end
144
+
145
+ def test_utf8
146
+ datafile = Avro::DataFile::open('data.avr', 'w', '"string"')
147
+ datafile << "家"
148
+ datafile.close
149
+
150
+ datafile = Avro::DataFile.open('data.avr')
151
+ datafile.each do |s|
152
+ assert_equal "家", s
153
+ end
154
+ datafile.close
155
+ end
156
+
157
+ def test_deflate
158
+ Avro::DataFile.open('data.avr', 'w', '"string"', :deflate) do |writer|
159
+ writer << 'a' * 10_000
160
+ end
161
+ assert(File.size('data.avr') < 500)
162
+
163
+ records = []
164
+ Avro::DataFile.open('data.avr') do |reader|
165
+ reader.each {|record| records << record }
166
+ end
167
+ assert_equal records, ['a' * 10_000]
168
+ end
169
+
170
+ def test_snappy
171
+ Avro::DataFile.open('data.avr', 'w', '"string"', :snappy) do |writer|
172
+ writer << 'a' * 10_000
173
+ end
174
+ assert(File.size('data.avr') < 600)
175
+
176
+ records = []
177
+ Avro::DataFile.open('data.avr') do |reader|
178
+ reader.each {|record| records << record }
179
+ end
180
+ assert_equal records, ['a' * 10_000]
181
+ end
182
+
183
+ def test_append_to_deflated_file
184
+ schema = Avro::Schema.parse('"string"')
185
+ writer = Avro::IO::DatumWriter.new(schema)
186
+ file = Avro::DataFile::Writer.new(File.open('data.avr', 'wb'), writer, schema, :deflate)
187
+ file << 'a' * 10_000
188
+ file.close
189
+
190
+ file = Avro::DataFile::Writer.new(File.open('data.avr', 'a+b'), writer)
191
+ file << 'b' * 10_000
192
+ file.close
193
+ assert(File.size('data.avr') < 1_000)
194
+
195
+ records = []
196
+ Avro::DataFile.open('data.avr') do |reader|
197
+ reader.each {|record| records << record }
198
+ end
199
+ assert_equal records, ['a' * 10_000, 'b' * 10_000]
200
+ end
201
+
202
+ def test_custom_meta
203
+ meta = { 'x.greeting' => 'yo' }
204
+
205
+ schema = Avro::Schema.parse('"string"')
206
+ writer = Avro::IO::DatumWriter.new(schema)
207
+ file = Avro::DataFile::Writer.new(File.open('data.avr', 'wb'), writer, schema, nil, meta)
208
+ file.close
209
+
210
+ Avro::DataFile.open('data.avr') do |reader|
211
+ assert_equal 'yo', reader.meta['x.greeting']
212
+ end
213
+ end
214
+ end
@@ -0,0 +1,37 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'test_help'
18
+
19
+ class TestFingerprints < Test::Unit::TestCase
20
+ def test_md5_fingerprint
21
+ schema = Avro::Schema.parse <<-SCHEMA
22
+ { "type": "int" }
23
+ SCHEMA
24
+
25
+ assert_equal 318112854175969537208795771590915775282,
26
+ schema.md5_fingerprint
27
+ end
28
+
29
+ def test_sha256_fingerprint
30
+ schema = Avro::Schema.parse <<-SCHEMA
31
+ { "type": "int" }
32
+ SCHEMA
33
+
34
+ assert_equal 28572620203319713300323544804233350633246234624932075150020181448463213378117,
35
+ schema.sha256_fingerprint
36
+ end
37
+ end
@@ -0,0 +1,23 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'rubygems'
18
+ require 'test/unit'
19
+ require 'stringio'
20
+ require 'fileutils'
21
+ FileUtils.mkdir_p('tmp')
22
+ require 'avro'
23
+ require 'random_data'
@@ -0,0 +1,451 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'test_help'
18
+
19
+ class TestIO < Test::Unit::TestCase
20
+ DATAFILE = 'tmp/test.rb.avro'
21
+ Schema = Avro::Schema
22
+
23
+ def test_null
24
+ check('"null"')
25
+ check_default('"null"', "null", nil)
26
+ end
27
+
28
+ def test_boolean
29
+ check('"boolean"')
30
+ check_default('"boolean"', "true", true)
31
+ check_default('"boolean"', "false", false)
32
+ end
33
+
34
+ def test_string
35
+ check('"string"')
36
+ check_default('"string"', '"foo"', "foo")
37
+ end
38
+
39
+ def test_bytes
40
+ check('"bytes"')
41
+ check_default('"bytes"', '"foo"', "foo")
42
+ end
43
+
44
+ def test_int
45
+ check('"int"')
46
+ check_default('"int"', "5", 5)
47
+ end
48
+
49
+ def test_long
50
+ check('"long"')
51
+ check_default('"long"', "9", 9)
52
+ end
53
+
54
+ def test_float
55
+ check('"float"')
56
+ check_default('"float"', "1.2", 1.2)
57
+ end
58
+
59
+ def test_double
60
+ check('"double"')
61
+ check_default('"double"', "1.2", 1.2)
62
+ end
63
+
64
+ def test_array
65
+ array_schema = '{"type": "array", "items": "long"}'
66
+ check(array_schema)
67
+ check_default(array_schema, "[1]", [1])
68
+ end
69
+
70
+ def test_map
71
+ map_schema = '{"type": "map", "values": "long"}'
72
+ check(map_schema)
73
+ check_default(map_schema, '{"a": 1}', {"a" => 1})
74
+ end
75
+
76
+ def test_record
77
+ record_schema = <<EOS
78
+ {"type": "record",
79
+ "name": "Test",
80
+ "fields": [{"name": "f",
81
+ "type": "long"}]}
82
+ EOS
83
+ check(record_schema)
84
+ check_default(record_schema, '{"f": 11}', {"f" => 11})
85
+ end
86
+
87
+ def test_error
88
+ error_schema = <<EOS
89
+ {"type": "error",
90
+ "name": "TestError",
91
+ "fields": [{"name": "message",
92
+ "type": "string"}]}
93
+ EOS
94
+ check(error_schema)
95
+ check_default(error_schema, '{"message": "boom"}', {"message" => "boom"})
96
+ end
97
+
98
+ def test_enum
99
+ enum_schema = '{"type": "enum", "name": "Test","symbols": ["A", "B"]}'
100
+ check(enum_schema)
101
+ check_default(enum_schema, '"B"', "B")
102
+ end
103
+
104
+ def test_recursive
105
+ recursive_schema = <<EOS
106
+ {"type": "record",
107
+ "name": "Node",
108
+ "fields": [{"name": "label", "type": "string"},
109
+ {"name": "children",
110
+ "type": {"type": "array", "items": "Node"}}]}
111
+ EOS
112
+ check(recursive_schema)
113
+ end
114
+
115
+ def test_union
116
+ union_schema = <<EOS
117
+ ["string",
118
+ "null",
119
+ "long",
120
+ {"type": "record",
121
+ "name": "Cons",
122
+ "fields": [{"name": "car", "type": "string"},
123
+ {"name": "cdr", "type": "string"}]}]
124
+ EOS
125
+ check(union_schema)
126
+ check_default('["double", "long"]', "1.1", 1.1)
127
+ end
128
+
129
+ def test_lisp
130
+ lisp_schema = <<EOS
131
+ {"type": "record",
132
+ "name": "Lisp",
133
+ "fields": [{"name": "value",
134
+ "type": ["null", "string",
135
+ {"type": "record",
136
+ "name": "Cons",
137
+ "fields": [{"name": "car", "type": "Lisp"},
138
+ {"name": "cdr", "type": "Lisp"}]}]}]}
139
+ EOS
140
+ check(lisp_schema)
141
+ end
142
+
143
+ def test_fixed
144
+ fixed_schema = '{"type": "fixed", "name": "Test", "size": 1}'
145
+ check(fixed_schema)
146
+ check_default(fixed_schema, '"a"', "a")
147
+ end
148
+
149
+ def test_enum_with_duplicate
150
+ str = '{"type": "enum", "name": "Test","symbols" : ["AA", "AA"]}'
151
+ assert_raises(Avro::SchemaParseError) do
152
+ schema = Avro::Schema.parse str
153
+ end
154
+ end
155
+
156
+ BINARY_INT_ENCODINGS = [
157
+ [0, '00'],
158
+ [-1, '01'],
159
+ [1, '02'],
160
+ [-2, '03'],
161
+ [2, '04'],
162
+ [-64, '7f'],
163
+ [64, '80 01'],
164
+ [8192, '80 80 01'],
165
+ [-8193, '81 80 01'],
166
+ ]
167
+
168
+ def avro_hexlify(reader)
169
+ bytes = []
170
+ current_byte = reader.read(1)
171
+ bytes << hexlify(current_byte)
172
+ while (current_byte.unpack('C').first & 0x80) != 0
173
+ current_byte = reader.read(1)
174
+ bytes << hexlify(current_byte)
175
+ end
176
+ bytes.join ' '
177
+ end
178
+
179
+ def hexlify(msg)
180
+ msg.unpack("H*")
181
+ end
182
+
183
+ def test_binary_int_encoding
184
+ for value, hex_encoding in BINARY_INT_ENCODINGS
185
+ # write datum in binary to string buffer
186
+ buffer = StringIO.new
187
+ encoder = Avro::IO::BinaryEncoder.new(buffer)
188
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"int"'))
189
+ datum_writer.write(value, encoder)
190
+
191
+ buffer.seek(0)
192
+ hex_val = avro_hexlify(buffer)
193
+
194
+ assert_equal hex_encoding, hex_val
195
+ end
196
+ end
197
+
198
+ def test_binary_long_encoding
199
+ for value, hex_encoding in BINARY_INT_ENCODINGS
200
+ buffer = StringIO.new
201
+ encoder = Avro::IO::BinaryEncoder.new(buffer)
202
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"long"'))
203
+ datum_writer.write(value, encoder)
204
+
205
+ # read it out of the buffer and hexlify it
206
+ buffer.seek(0)
207
+ hex_val = avro_hexlify(buffer)
208
+
209
+ assert_equal hex_encoding, hex_val
210
+ end
211
+ end
212
+
213
+ def test_utf8_string_encoding
214
+ [
215
+ "\xC3".force_encoding('ISO-8859-1'),
216
+ "\xC3\x83".force_encoding('UTF-8')
217
+ ].each do |value|
218
+ output = ''.force_encoding('BINARY')
219
+ encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output))
220
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"string"'))
221
+ datum_writer.write(value, encoder)
222
+
223
+ assert_equal "\x04\xc3\x83".force_encoding('BINARY'), output
224
+ end
225
+ end
226
+
227
+ def test_bytes_encoding
228
+ [
229
+ "\xC3\x83".force_encoding('BINARY'),
230
+ "\xC3\x83".force_encoding('ISO-8859-1'),
231
+ "\xC3\x83".force_encoding('UTF-8')
232
+ ].each do |value|
233
+ output = ''.force_encoding('BINARY')
234
+ encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output))
235
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"bytes"'))
236
+ datum_writer.write(value, encoder)
237
+
238
+ assert_equal "\x04\xc3\x83".force_encoding('BINARY'), output
239
+ end
240
+ end
241
+
242
+ def test_fixed_encoding
243
+ [
244
+ "\xC3\x83".force_encoding('BINARY'),
245
+ "\xC3\x83".force_encoding('ISO-8859-1'),
246
+ "\xC3\x83".force_encoding('UTF-8')
247
+ ].each do |value|
248
+ output = ''.force_encoding('BINARY')
249
+ encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output))
250
+ schema = '{"type": "fixed", "name": "TwoBytes", "size": 2}'
251
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse(schema))
252
+ datum_writer.write(value, encoder)
253
+
254
+ assert_equal "\xc3\x83".force_encoding('BINARY'), output
255
+ end
256
+ end
257
+
258
+ def test_skip_long
259
+ for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS
260
+ value_to_read = 6253
261
+
262
+ # write some data in binary to string buffer
263
+ writer = StringIO.new
264
+ encoder = Avro::IO::BinaryEncoder.new(writer)
265
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"long"'))
266
+ datum_writer.write(value_to_skip, encoder)
267
+ datum_writer.write(value_to_read, encoder)
268
+
269
+ # skip the value
270
+ reader = StringIO.new(writer.string())
271
+ decoder = Avro::IO::BinaryDecoder.new(reader)
272
+ decoder.skip_long()
273
+
274
+ # read data from string buffer
275
+ datum_reader = Avro::IO::DatumReader.new(Avro::Schema.parse('"long"'))
276
+ read_value = datum_reader.read(decoder)
277
+
278
+ # check it
279
+ assert_equal value_to_read, read_value
280
+ end
281
+ end
282
+
283
+ def test_skip_int
284
+ for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS
285
+ value_to_read = 6253
286
+
287
+ writer = StringIO.new
288
+ encoder = Avro::IO::BinaryEncoder.new(writer)
289
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"int"'))
290
+ datum_writer.write(value_to_skip, encoder)
291
+ datum_writer.write(value_to_read, encoder)
292
+
293
+ reader = StringIO.new(writer.string)
294
+ decoder = Avro::IO::BinaryDecoder.new(reader)
295
+ decoder.skip_int
296
+
297
+ datum_reader = Avro::IO::DatumReader.new(Avro::Schema.parse('"int"'))
298
+ read_value = datum_reader.read(decoder)
299
+
300
+ assert_equal value_to_read, read_value
301
+ end
302
+ end
303
+
304
+ def test_skip_union
305
+ ["hello", -1, 32, nil].each do |value_to_skip|
306
+ value_to_read = 6253
307
+
308
+ schema = Avro::Schema.parse('["int", "string", "null"]')
309
+ writer = StringIO.new
310
+ encoder = Avro::IO::BinaryEncoder.new(writer)
311
+ datum_writer = Avro::IO::DatumWriter.new(schema)
312
+ datum_writer.write(value_to_skip, encoder)
313
+ datum_writer.write(value_to_read, encoder)
314
+
315
+ reader = StringIO.new(writer.string)
316
+ decoder = Avro::IO::BinaryDecoder.new(reader)
317
+ datum_reader = Avro::IO::DatumReader.new(schema)
318
+ datum_reader.skip_data(schema, decoder)
319
+ read_value = datum_reader.read(decoder)
320
+
321
+ assert_equal value_to_read, read_value
322
+ end
323
+ end
324
+
325
+
326
+ def test_schema_promotion
327
+ promotable_schemas = ['"int"', '"long"', '"float"', '"double"']
328
+ incorrect = 0
329
+ promotable_schemas.each_with_index do |ws, i|
330
+ writers_schema = Avro::Schema.parse(ws)
331
+ datum_to_write = 219
332
+ for rs in promotable_schemas[(i + 1)..-1]
333
+ readers_schema = Avro::Schema.parse(rs)
334
+ writer, enc, dw = write_datum(datum_to_write, writers_schema)
335
+ datum_read = read_datum(writer, writers_schema, readers_schema)
336
+ if datum_read != datum_to_write
337
+ incorrect += 1
338
+ end
339
+ end
340
+ assert_equal(incorrect, 0)
341
+ end
342
+ end
343
+ private
344
+
345
+ def check_default(schema_json, default_json, default_value)
346
+ actual_schema = '{"type": "record", "name": "Foo", "fields": []}'
347
+ actual = Avro::Schema.parse(actual_schema)
348
+
349
+ expected_schema = <<EOS
350
+ {"type": "record",
351
+ "name": "Foo",
352
+ "fields": [{"name": "f", "type": #{schema_json}, "default": #{default_json}}]}
353
+ EOS
354
+ expected = Avro::Schema.parse(expected_schema)
355
+
356
+ reader = Avro::IO::DatumReader.new(actual, expected)
357
+ record = reader.read(Avro::IO::BinaryDecoder.new(StringIO.new))
358
+ assert_equal default_value, record["f"]
359
+ end
360
+
361
+ def check(str)
362
+ # parse schema, then convert back to string
363
+ schema = Avro::Schema.parse str
364
+
365
+ parsed_string = schema.to_s
366
+
367
+ # test that the round-trip didn't mess up anything
368
+ # NB: I don't think we should do this. Why enforce ordering?
369
+ assert_equal(MultiJson.load(str),
370
+ MultiJson.load(parsed_string))
371
+
372
+ # test __eq__
373
+ assert_equal(schema, Avro::Schema.parse(str))
374
+
375
+ # test hashcode doesn't generate infinite recursion
376
+ schema.hash
377
+
378
+ # test serialization of random data
379
+ randomdata = RandomData.new(schema)
380
+ 9.times { checkser(schema, randomdata) }
381
+
382
+ # test writing of data to file
383
+ check_datafile(schema)
384
+ end
385
+
386
+ def checkser(schm, randomdata)
387
+ datum = randomdata.next
388
+ assert validate(schm, datum)
389
+ w = Avro::IO::DatumWriter.new(schm)
390
+ writer = StringIO.new "", "w"
391
+ w.write(datum, Avro::IO::BinaryEncoder.new(writer))
392
+ r = datum_reader(schm)
393
+ reader = StringIO.new(writer.string)
394
+ ob = r.read(Avro::IO::BinaryDecoder.new(reader))
395
+ assert_equal(datum, ob) # FIXME check on assertdata conditional
396
+ end
397
+
398
+ def check_datafile(schm)
399
+ seed = 0
400
+ count = 10
401
+ random_data = RandomData.new(schm, seed)
402
+
403
+
404
+ f = File.open(DATAFILE, 'wb')
405
+ dw = Avro::DataFile::Writer.new(f, datum_writer(schm), schm)
406
+ count.times{ dw << random_data.next }
407
+ dw.close
408
+
409
+ random_data = RandomData.new(schm, seed)
410
+
411
+
412
+ f = File.open(DATAFILE, 'r+')
413
+ dr = Avro::DataFile::Reader.new(f, datum_reader(schm))
414
+
415
+ last_index = nil
416
+ dr.each_with_index do |data, c|
417
+ last_index = c
418
+ # FIXME assertdata conditional
419
+ assert_equal(random_data.next, data)
420
+ end
421
+ dr.close
422
+ assert_equal count, last_index+1
423
+ end
424
+
425
+ def validate(schm, datum)
426
+ Avro::Schema.validate(schm, datum)
427
+ end
428
+
429
+ def datum_writer(schm)
430
+ Avro::IO::DatumWriter.new(schm)
431
+ end
432
+
433
+ def datum_reader(schm)
434
+ Avro::IO::DatumReader.new(schm)
435
+ end
436
+
437
+ def write_datum(datum, writers_schema)
438
+ writer = StringIO.new
439
+ encoder = Avro::IO::BinaryEncoder.new(writer)
440
+ datum_writer = Avro::IO::DatumWriter.new(writers_schema)
441
+ datum_writer.write(datum, encoder)
442
+ [writer, encoder, datum_writer]
443
+ end
444
+
445
+ def read_datum(buffer, writers_schema, readers_schema=nil)
446
+ reader = StringIO.new(buffer.string)
447
+ decoder = Avro::IO::BinaryDecoder.new(reader)
448
+ datum_reader = Avro::IO::DatumReader.new(writers_schema, readers_schema)
449
+ datum_reader.read(decoder)
450
+ end
451
+ end