avro-jruby 1.7.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,23 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'rubygems'
18
+ require 'test/unit'
19
+ require 'stringio'
20
+ require 'fileutils'
21
+ FileUtils.mkdir_p('tmp')
22
+ require 'avro'
23
+ require 'random_data'
@@ -0,0 +1,393 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'test_help'
18
+
19
+ class TestIO < Test::Unit::TestCase
20
+ DATAFILE = 'tmp/test.rb.avro'
21
+ Schema = Avro::Schema
22
+
23
+ def test_null
24
+ check_default('"null"', "null", nil)
25
+ end
26
+
27
+ def test_boolean
28
+ check_default('"boolean"', "true", true)
29
+ check_default('"boolean"', "false", false)
30
+ end
31
+
32
+ def test_string
33
+ check_default('"string"', '"foo"', "foo")
34
+ end
35
+
36
+ def test_bytes
37
+ check_default('"bytes"', '"foo"', "foo")
38
+ end
39
+
40
+ def test_int
41
+ check_default('"int"', "5", 5)
42
+ end
43
+
44
+ def test_long
45
+ check_default('"long"', "9", 9)
46
+ end
47
+
48
+ def test_float
49
+ check_default('"float"', "1.2", 1.2)
50
+ end
51
+
52
+ def test_double
53
+ check_default('"double"', "1.2", 1.2)
54
+ end
55
+
56
+ def test_array
57
+ array_schema = '{"type": "array", "items": "long"}'
58
+ check_default(array_schema, "[1]", [1])
59
+ end
60
+
61
+ def test_map
62
+ map_schema = '{"type": "map", "values": "long"}'
63
+ check_default(map_schema, '{"a": 1}', {"a" => 1})
64
+ end
65
+
66
+ def test_record
67
+ record_schema = <<EOS
68
+ {"type": "record",
69
+ "name": "Test",
70
+ "fields": [{"name": "f",
71
+ "type": "long"}]}
72
+ EOS
73
+ check_default(record_schema, '{"f": 11}', {"f" => 11})
74
+ end
75
+
76
+ def test_error
77
+ error_schema = <<EOS
78
+ {"type": "error",
79
+ "name": "TestError",
80
+ "fields": [{"name": "message",
81
+ "type": "string"}]}
82
+ EOS
83
+ check_default(error_schema, '{"message": "boom"}', {"message" => "boom"})
84
+ end
85
+
86
+ def test_enum
87
+ enum_schema = '{"type": "enum", "name": "Test","symbols": ["A", "B"]}'
88
+ check_default(enum_schema, '"B"', "B")
89
+ end
90
+
91
+ def test_recursive
92
+ recursive_schema = <<EOS
93
+ {"type": "record",
94
+ "name": "Node",
95
+ "fields": [{"name": "label", "type": "string"},
96
+ {"name": "children",
97
+ "type": {"type": "array", "items": "Node"}}]}
98
+ EOS
99
+ check(recursive_schema)
100
+ end
101
+
102
+ def test_union
103
+ union_schema = <<EOS
104
+ ["string",
105
+ "null",
106
+ "long",
107
+ {"type": "record",
108
+ "name": "Cons",
109
+ "fields": [{"name": "car", "type": "string"},
110
+ {"name": "cdr", "type": "string"}]}]
111
+ EOS
112
+ check(union_schema)
113
+ check_default('["double", "long"]', "1.1", 1.1)
114
+ end
115
+
116
+ def test_lisp
117
+ lisp_schema = <<EOS
118
+ {"type": "record",
119
+ "name": "Lisp",
120
+ "fields": [{"name": "value",
121
+ "type": ["null", "string",
122
+ {"type": "record",
123
+ "name": "Cons",
124
+ "fields": [{"name": "car", "type": "Lisp"},
125
+ {"name": "cdr", "type": "Lisp"}]}]}]}
126
+ EOS
127
+ check(lisp_schema)
128
+ end
129
+
130
+ def test_fixed
131
+ fixed_schema = '{"type": "fixed", "name": "Test", "size": 1}'
132
+ check_default(fixed_schema, '"a"', "a")
133
+ end
134
+
135
+ def test_enum_with_duplicate
136
+ str = '{"type": "enum", "name": "Test","symbols" : ["AA", "AA"]}'
137
+ assert_raises(Avro::SchemaParseError) do
138
+ schema = Avro::Schema.parse str
139
+ end
140
+ end
141
+
142
+ BINARY_INT_ENCODINGS = [
143
+ [0, '00'],
144
+ [-1, '01'],
145
+ [1, '02'],
146
+ [-2, '03'],
147
+ [2, '04'],
148
+ [-64, '7f'],
149
+ [64, '80 01'],
150
+ [8192, '80 80 01'],
151
+ [-8193, '81 80 01'],
152
+ ]
153
+
154
+ def avro_hexlify(reader)
155
+ bytes = []
156
+ current_byte = reader.read(1)
157
+ bytes << hexlify(current_byte)
158
+ while (current_byte.unpack('C').first & 0x80) != 0
159
+ current_byte = reader.read(1)
160
+ bytes << hexlify(current_byte)
161
+ end
162
+ bytes.join ' '
163
+ end
164
+
165
+ def hexlify(msg)
166
+ msg.unpack("H*")
167
+ end
168
+
169
+ def test_binary_int_encoding
170
+ for value, hex_encoding in BINARY_INT_ENCODINGS
171
+ # write datum in binary to string buffer
172
+ buffer = StringIO.new
173
+ encoder = Avro::IO::BinaryEncoder.new(buffer)
174
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"int"'))
175
+ datum_writer.write(value, encoder)
176
+
177
+ buffer.seek(0)
178
+ hex_val = avro_hexlify(buffer)
179
+
180
+ assert_equal hex_encoding, hex_val
181
+ end
182
+ end
183
+
184
+ def test_binary_long_encoding
185
+ for value, hex_encoding in BINARY_INT_ENCODINGS
186
+ buffer = StringIO.new
187
+ encoder = Avro::IO::BinaryEncoder.new(buffer)
188
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"long"'))
189
+ datum_writer.write(value, encoder)
190
+
191
+ # read it out of the buffer and hexlify it
192
+ buffer.seek(0)
193
+ hex_val = avro_hexlify(buffer)
194
+
195
+ assert_equal hex_encoding, hex_val
196
+ end
197
+ end
198
+
199
+ def test_skip_long
200
+ for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS
201
+ value_to_read = 6253
202
+
203
+ # write some data in binary to string buffer
204
+ writer = StringIO.new
205
+ encoder = Avro::IO::BinaryEncoder.new(writer)
206
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"long"'))
207
+ datum_writer.write(value_to_skip, encoder)
208
+ datum_writer.write(value_to_read, encoder)
209
+
210
+ # skip the value
211
+ reader = StringIO.new(writer.string())
212
+ decoder = Avro::IO::BinaryDecoder.new(reader)
213
+ decoder.skip_long()
214
+
215
+ # read data from string buffer
216
+ datum_reader = Avro::IO::DatumReader.new(Avro::Schema.parse('"long"'))
217
+ read_value = datum_reader.read(decoder)
218
+
219
+ # check it
220
+ assert_equal value_to_read, read_value
221
+ end
222
+ end
223
+
224
+ def test_skip_int
225
+ for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS
226
+ value_to_read = 6253
227
+
228
+ writer = StringIO.new
229
+ encoder = Avro::IO::BinaryEncoder.new(writer)
230
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"int"'))
231
+ datum_writer.write(value_to_skip, encoder)
232
+ datum_writer.write(value_to_read, encoder)
233
+
234
+ reader = StringIO.new(writer.string)
235
+ decoder = Avro::IO::BinaryDecoder.new(reader)
236
+ decoder.skip_int
237
+
238
+ datum_reader = Avro::IO::DatumReader.new(Avro::Schema.parse('"int"'))
239
+ read_value = datum_reader.read(decoder)
240
+
241
+ assert_equal value_to_read, read_value
242
+ end
243
+ end
244
+
245
+ def test_skip_union
246
+ ["hello", -1, 32, nil].each do |value_to_skip|
247
+ value_to_read = 6253
248
+
249
+ schema = Avro::Schema.parse('["int", "string", "null"]')
250
+ writer = StringIO.new
251
+ encoder = Avro::IO::BinaryEncoder.new(writer)
252
+ datum_writer = Avro::IO::DatumWriter.new(schema)
253
+ datum_writer.write(value_to_skip, encoder)
254
+ datum_writer.write(value_to_read, encoder)
255
+
256
+ reader = StringIO.new(writer.string)
257
+ decoder = Avro::IO::BinaryDecoder.new(reader)
258
+ datum_reader = Avro::IO::DatumReader.new(schema)
259
+ datum_reader.skip_data(schema, decoder)
260
+ read_value = datum_reader.read(decoder)
261
+
262
+ assert_equal value_to_read, read_value
263
+ end
264
+ end
265
+
266
+
267
+ def test_schema_promotion
268
+ promotable_schemas = ['"int"', '"long"', '"float"', '"double"']
269
+ incorrect = 0
270
+ promotable_schemas.each_with_index do |ws, i|
271
+ writers_schema = Avro::Schema.parse(ws)
272
+ datum_to_write = 219
273
+ for rs in promotable_schemas[(i + 1)..-1]
274
+ readers_schema = Avro::Schema.parse(rs)
275
+ writer, enc, dw = write_datum(datum_to_write, writers_schema)
276
+ datum_read = read_datum(writer, writers_schema, readers_schema)
277
+ if datum_read != datum_to_write
278
+ incorrect += 1
279
+ end
280
+ end
281
+ assert_equal(incorrect, 0)
282
+ end
283
+ end
284
+ private
285
+
286
+ def check_default(schema_json, default_json, default_value)
287
+ check(schema_json)
288
+ actual_schema = '{"type": "record", "name": "Foo", "fields": []}'
289
+ actual = Avro::Schema.parse(actual_schema)
290
+
291
+ expected_schema = <<EOS
292
+ {"type": "record",
293
+ "name": "Foo",
294
+ "fields": [{"name": "f", "type": #{schema_json}, "default": #{default_json}}]}
295
+ EOS
296
+ expected = Avro::Schema.parse(expected_schema)
297
+
298
+ reader = Avro::IO::DatumReader.new(actual, expected)
299
+ record = reader.read(Avro::IO::BinaryDecoder.new(StringIO.new))
300
+ assert_equal default_value, record["f"]
301
+ end
302
+
303
+ def check(str)
304
+ # parse schema, then convert back to string
305
+ schema = Avro::Schema.parse str
306
+
307
+ parsed_string = schema.to_s
308
+
309
+ # test that the round-trip didn't mess up anything
310
+ # NB: I don't think we should do this. Why enforce ordering?
311
+ assert_equal(MultiJson.load(str),
312
+ MultiJson.load(parsed_string))
313
+
314
+ # test __eq__
315
+ assert_equal(schema, Avro::Schema.parse(str))
316
+
317
+ # test hashcode doesn't generate infinite recursion
318
+ schema.hash
319
+
320
+ # test serialization of random data
321
+ randomdata = RandomData.new(schema)
322
+ 9.times { checkser(schema, randomdata) }
323
+
324
+ # test writing of data to file
325
+ check_datafile(schema)
326
+ end
327
+
328
+ def checkser(schm, randomdata)
329
+ datum = randomdata.next
330
+ assert validate(schm, datum)
331
+ w = Avro::IO::DatumWriter.new(schm)
332
+ writer = StringIO.new "", "w"
333
+ w.write(datum, Avro::IO::BinaryEncoder.new(writer))
334
+ r = datum_reader(schm)
335
+ reader = StringIO.new(writer.string)
336
+ ob = r.read(Avro::IO::BinaryDecoder.new(reader))
337
+ assert_equal(datum, ob) # FIXME check on assertdata conditional
338
+ end
339
+
340
+ def check_datafile(schm)
341
+ seed = 0
342
+ count = 10
343
+ random_data = RandomData.new(schm, seed)
344
+
345
+
346
+ f = File.open(DATAFILE, 'wb')
347
+ dw = Avro::DataFile::Writer.new(f, datum_writer(schm), schm)
348
+ count.times{ dw << random_data.next }
349
+ dw.close
350
+
351
+ random_data = RandomData.new(schm, seed)
352
+
353
+
354
+ f = File.open(DATAFILE, 'r+')
355
+ dr = Avro::DataFile::Reader.new(f, datum_reader(schm))
356
+
357
+ last_index = nil
358
+ dr.each_with_index do |data, c|
359
+ last_index = c
360
+ # FIXME assertdata conditional
361
+ assert_equal(random_data.next, data)
362
+ end
363
+ dr.close
364
+ assert_equal count, last_index+1
365
+ end
366
+
367
+ def validate(schm, datum)
368
+ Avro::Schema.validate(schm, datum)
369
+ end
370
+
371
+ def datum_writer(schm)
372
+ Avro::IO::DatumWriter.new(schm)
373
+ end
374
+
375
+ def datum_reader(schm)
376
+ Avro::IO::DatumReader.new(schm)
377
+ end
378
+
379
+ def write_datum(datum, writers_schema)
380
+ writer = StringIO.new
381
+ encoder = Avro::IO::BinaryEncoder.new(writer)
382
+ datum_writer = Avro::IO::DatumWriter.new(writers_schema)
383
+ datum_writer.write(datum, encoder)
384
+ [writer, encoder, datum_writer]
385
+ end
386
+
387
+ def read_datum(buffer, writers_schema, readers_schema=nil)
388
+ reader = StringIO.new(buffer.string)
389
+ decoder = Avro::IO::BinaryDecoder.new(reader)
390
+ datum_reader = Avro::IO::DatumReader.new(writers_schema, readers_schema)
391
+ datum_reader.read(decoder)
392
+ end
393
+ end
@@ -0,0 +1,199 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'test_help'
18
+
19
+ class TestProtocol < Test::Unit::TestCase
20
+
21
+ class ExampleProtocol
22
+ attr_reader :protocol_string, :valid, :name
23
+ attr_accessor :comment
24
+ def initialize(protocol_string, name=nil, comment='')
25
+ @protocol_string = protocol_string
26
+ @name = name || protocol_string # default to schema_string for name
27
+ @comment = comment
28
+ end
29
+ end
30
+ #
31
+ # Example Protocols
32
+ #
33
+
34
+ EXAMPLES = [
35
+ ExampleProtocol.new(<<-EOS, true),
36
+ {
37
+ "namespace": "com.acme",
38
+ "protocol": "HelloWorld",
39
+
40
+ "types": [
41
+ {"name": "Greeting", "type": "record", "fields": [
42
+ {"name": "message", "type": "string"}]},
43
+ {"name": "Curse", "type": "error", "fields": [
44
+ {"name": "message", "type": "string"}]}
45
+ ],
46
+
47
+ "messages": {
48
+ "hello": {
49
+ "request": [{"name": "greeting", "type": "Greeting" }],
50
+ "response": "Greeting",
51
+ "errors": ["Curse"]
52
+ }
53
+ }
54
+ }
55
+ EOS
56
+
57
+ ExampleProtocol.new(<<-EOS, true),
58
+ {"namespace": "org.apache.avro.test",
59
+ "protocol": "Simple",
60
+
61
+ "types": [
62
+ {"name": "Kind", "type": "enum", "symbols": ["FOO","BAR","BAZ"]},
63
+
64
+ {"name": "MD5", "type": "fixed", "size": 16},
65
+
66
+ {"name": "TestRecord", "type": "record",
67
+ "fields": [
68
+ {"name": "name", "type": "string", "order": "ignore"},
69
+ {"name": "kind", "type": "Kind", "order": "descending"},
70
+ {"name": "hash", "type": "MD5"}
71
+ ]
72
+ },
73
+
74
+ {"name": "TestError", "type": "error", "fields": [
75
+ {"name": "message", "type": "string"}
76
+ ]
77
+ }
78
+
79
+ ],
80
+
81
+ "messages": {
82
+
83
+ "hello": {
84
+ "request": [{"name": "greeting", "type": "string"}],
85
+ "response": "string"
86
+ },
87
+
88
+ "echo": {
89
+ "request": [{"name": "record", "type": "TestRecord"}],
90
+ "response": "TestRecord"
91
+ },
92
+
93
+ "add": {
94
+ "request": [{"name": "arg1", "type": "int"}, {"name": "arg2", "type": "int"}],
95
+ "response": "int"
96
+ },
97
+
98
+ "echoBytes": {
99
+ "request": [{"name": "data", "type": "bytes"}],
100
+ "response": "bytes"
101
+ },
102
+
103
+ "error": {
104
+ "request": [],
105
+ "response": "null",
106
+ "errors": ["TestError"]
107
+ }
108
+ }
109
+
110
+ }
111
+ EOS
112
+ ExampleProtocol.new(<<-EOS, true),
113
+ {"namespace": "org.apache.avro.test.namespace",
114
+ "protocol": "TestNamespace",
115
+
116
+ "types": [
117
+ {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
118
+ {"name": "TestRecord", "type": "record",
119
+ "fields": [ {"name": "hash", "type": "org.apache.avro.test.util.MD5"} ]
120
+ },
121
+ {"name": "TestError", "namespace": "org.apache.avro.test.errors",
122
+ "type": "error", "fields": [ {"name": "message", "type": "string"} ]
123
+ }
124
+ ],
125
+
126
+ "messages": {
127
+ "echo": {
128
+ "request": [{"name": "record", "type": "TestRecord"}],
129
+ "response": "TestRecord"
130
+ },
131
+
132
+ "error": {
133
+ "request": [],
134
+ "response": "null",
135
+ "errors": ["org.apache.avro.test.errors.TestError"]
136
+ }
137
+
138
+ }
139
+
140
+ }
141
+ EOS
142
+ ExampleProtocol.new(<<-EOS, true)
143
+ {"namespace": "org.apache.avro.test",
144
+ "protocol": "BulkData",
145
+
146
+ "types": [],
147
+
148
+ "messages": {
149
+
150
+ "read": {
151
+ "request": [],
152
+ "response": "bytes"
153
+ },
154
+
155
+ "write": {
156
+ "request": [ {"name": "data", "type": "bytes"} ],
157
+ "response": "null"
158
+ }
159
+
160
+ }
161
+
162
+ }
163
+ EOS
164
+ ]
165
+
166
+ Protocol = Avro::Protocol
167
+ def test_parse
168
+ EXAMPLES.each do |example|
169
+ assert_nothing_raised("should be valid: #{example.protocol_string}") {
170
+ Protocol.parse(example.protocol_string)
171
+ }
172
+ end
173
+ end
174
+
175
+ def test_valid_cast_to_string_after_parse
176
+ EXAMPLES.each do |example|
177
+ assert_nothing_raised("round tripped okay #{example.protocol_string}") {
178
+ foo = Protocol.parse(example.protocol_string).to_s
179
+ Protocol.parse(foo)
180
+ }
181
+ end
182
+ end
183
+
184
+ def test_equivalence_after_round_trip
185
+ EXAMPLES.each do |example|
186
+ original = Protocol.parse(example.protocol_string)
187
+ round_trip = Protocol.parse(original.to_s)
188
+
189
+ assert_equal original, round_trip
190
+ end
191
+ end
192
+
193
+ def test_namespaces
194
+ protocol = Protocol.parse(EXAMPLES.first.protocol_string)
195
+ protocol.types.each do |type|
196
+ assert_equal type.namespace, 'com.acme'
197
+ end
198
+ end
199
+ end