avro-jruby 1.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,23 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'rubygems'
18
+ require 'test/unit'
19
+ require 'stringio'
20
+ require 'fileutils'
21
+ FileUtils.mkdir_p('tmp')
22
+ require 'avro'
23
+ require 'random_data'
@@ -0,0 +1,393 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'test_help'
18
+
19
+ class TestIO < Test::Unit::TestCase
20
+ DATAFILE = 'tmp/test.rb.avro'
21
+ Schema = Avro::Schema
22
+
23
+ def test_null
24
+ check_default('"null"', "null", nil)
25
+ end
26
+
27
+ def test_boolean
28
+ check_default('"boolean"', "true", true)
29
+ check_default('"boolean"', "false", false)
30
+ end
31
+
32
+ def test_string
33
+ check_default('"string"', '"foo"', "foo")
34
+ end
35
+
36
+ def test_bytes
37
+ check_default('"bytes"', '"foo"', "foo")
38
+ end
39
+
40
+ def test_int
41
+ check_default('"int"', "5", 5)
42
+ end
43
+
44
+ def test_long
45
+ check_default('"long"', "9", 9)
46
+ end
47
+
48
+ def test_float
49
+ check_default('"float"', "1.2", 1.2)
50
+ end
51
+
52
+ def test_double
53
+ check_default('"double"', "1.2", 1.2)
54
+ end
55
+
56
+ def test_array
57
+ array_schema = '{"type": "array", "items": "long"}'
58
+ check_default(array_schema, "[1]", [1])
59
+ end
60
+
61
+ def test_map
62
+ map_schema = '{"type": "map", "values": "long"}'
63
+ check_default(map_schema, '{"a": 1}', {"a" => 1})
64
+ end
65
+
66
+ def test_record
67
+ record_schema = <<EOS
68
+ {"type": "record",
69
+ "name": "Test",
70
+ "fields": [{"name": "f",
71
+ "type": "long"}]}
72
+ EOS
73
+ check_default(record_schema, '{"f": 11}', {"f" => 11})
74
+ end
75
+
76
+ def test_error
77
+ error_schema = <<EOS
78
+ {"type": "error",
79
+ "name": "TestError",
80
+ "fields": [{"name": "message",
81
+ "type": "string"}]}
82
+ EOS
83
+ check_default(error_schema, '{"message": "boom"}', {"message" => "boom"})
84
+ end
85
+
86
+ def test_enum
87
+ enum_schema = '{"type": "enum", "name": "Test","symbols": ["A", "B"]}'
88
+ check_default(enum_schema, '"B"', "B")
89
+ end
90
+
91
+ def test_recursive
92
+ recursive_schema = <<EOS
93
+ {"type": "record",
94
+ "name": "Node",
95
+ "fields": [{"name": "label", "type": "string"},
96
+ {"name": "children",
97
+ "type": {"type": "array", "items": "Node"}}]}
98
+ EOS
99
+ check(recursive_schema)
100
+ end
101
+
102
+ def test_union
103
+ union_schema = <<EOS
104
+ ["string",
105
+ "null",
106
+ "long",
107
+ {"type": "record",
108
+ "name": "Cons",
109
+ "fields": [{"name": "car", "type": "string"},
110
+ {"name": "cdr", "type": "string"}]}]
111
+ EOS
112
+ check(union_schema)
113
+ check_default('["double", "long"]', "1.1", 1.1)
114
+ end
115
+
116
+ def test_lisp
117
+ lisp_schema = <<EOS
118
+ {"type": "record",
119
+ "name": "Lisp",
120
+ "fields": [{"name": "value",
121
+ "type": ["null", "string",
122
+ {"type": "record",
123
+ "name": "Cons",
124
+ "fields": [{"name": "car", "type": "Lisp"},
125
+ {"name": "cdr", "type": "Lisp"}]}]}]}
126
+ EOS
127
+ check(lisp_schema)
128
+ end
129
+
130
+ def test_fixed
131
+ fixed_schema = '{"type": "fixed", "name": "Test", "size": 1}'
132
+ check_default(fixed_schema, '"a"', "a")
133
+ end
134
+
135
+ def test_enum_with_duplicate
136
+ str = '{"type": "enum", "name": "Test","symbols" : ["AA", "AA"]}'
137
+ assert_raises(Avro::SchemaParseError) do
138
+ schema = Avro::Schema.parse str
139
+ end
140
+ end
141
+
142
+ BINARY_INT_ENCODINGS = [
143
+ [0, '00'],
144
+ [-1, '01'],
145
+ [1, '02'],
146
+ [-2, '03'],
147
+ [2, '04'],
148
+ [-64, '7f'],
149
+ [64, '80 01'],
150
+ [8192, '80 80 01'],
151
+ [-8193, '81 80 01'],
152
+ ]
153
+
154
+ def avro_hexlify(reader)
155
+ bytes = []
156
+ current_byte = reader.read(1)
157
+ bytes << hexlify(current_byte)
158
+ while (current_byte.unpack('C').first & 0x80) != 0
159
+ current_byte = reader.read(1)
160
+ bytes << hexlify(current_byte)
161
+ end
162
+ bytes.join ' '
163
+ end
164
+
165
+ def hexlify(msg)
166
+ msg.unpack("H*")
167
+ end
168
+
169
+ def test_binary_int_encoding
170
+ for value, hex_encoding in BINARY_INT_ENCODINGS
171
+ # write datum in binary to string buffer
172
+ buffer = StringIO.new
173
+ encoder = Avro::IO::BinaryEncoder.new(buffer)
174
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"int"'))
175
+ datum_writer.write(value, encoder)
176
+
177
+ buffer.seek(0)
178
+ hex_val = avro_hexlify(buffer)
179
+
180
+ assert_equal hex_encoding, hex_val
181
+ end
182
+ end
183
+
184
+ def test_binary_long_encoding
185
+ for value, hex_encoding in BINARY_INT_ENCODINGS
186
+ buffer = StringIO.new
187
+ encoder = Avro::IO::BinaryEncoder.new(buffer)
188
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"long"'))
189
+ datum_writer.write(value, encoder)
190
+
191
+ # read it out of the buffer and hexlify it
192
+ buffer.seek(0)
193
+ hex_val = avro_hexlify(buffer)
194
+
195
+ assert_equal hex_encoding, hex_val
196
+ end
197
+ end
198
+
199
+ def test_skip_long
200
+ for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS
201
+ value_to_read = 6253
202
+
203
+ # write some data in binary to string buffer
204
+ writer = StringIO.new
205
+ encoder = Avro::IO::BinaryEncoder.new(writer)
206
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"long"'))
207
+ datum_writer.write(value_to_skip, encoder)
208
+ datum_writer.write(value_to_read, encoder)
209
+
210
+ # skip the value
211
+ reader = StringIO.new(writer.string())
212
+ decoder = Avro::IO::BinaryDecoder.new(reader)
213
+ decoder.skip_long()
214
+
215
+ # read data from string buffer
216
+ datum_reader = Avro::IO::DatumReader.new(Avro::Schema.parse('"long"'))
217
+ read_value = datum_reader.read(decoder)
218
+
219
+ # check it
220
+ assert_equal value_to_read, read_value
221
+ end
222
+ end
223
+
224
+ def test_skip_int
225
+ for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS
226
+ value_to_read = 6253
227
+
228
+ writer = StringIO.new
229
+ encoder = Avro::IO::BinaryEncoder.new(writer)
230
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"int"'))
231
+ datum_writer.write(value_to_skip, encoder)
232
+ datum_writer.write(value_to_read, encoder)
233
+
234
+ reader = StringIO.new(writer.string)
235
+ decoder = Avro::IO::BinaryDecoder.new(reader)
236
+ decoder.skip_int
237
+
238
+ datum_reader = Avro::IO::DatumReader.new(Avro::Schema.parse('"int"'))
239
+ read_value = datum_reader.read(decoder)
240
+
241
+ assert_equal value_to_read, read_value
242
+ end
243
+ end
244
+
245
+ def test_skip_union
246
+ ["hello", -1, 32, nil].each do |value_to_skip|
247
+ value_to_read = 6253
248
+
249
+ schema = Avro::Schema.parse('["int", "string", "null"]')
250
+ writer = StringIO.new
251
+ encoder = Avro::IO::BinaryEncoder.new(writer)
252
+ datum_writer = Avro::IO::DatumWriter.new(schema)
253
+ datum_writer.write(value_to_skip, encoder)
254
+ datum_writer.write(value_to_read, encoder)
255
+
256
+ reader = StringIO.new(writer.string)
257
+ decoder = Avro::IO::BinaryDecoder.new(reader)
258
+ datum_reader = Avro::IO::DatumReader.new(schema)
259
+ datum_reader.skip_data(schema, decoder)
260
+ read_value = datum_reader.read(decoder)
261
+
262
+ assert_equal value_to_read, read_value
263
+ end
264
+ end
265
+
266
+
267
+ def test_schema_promotion
268
+ promotable_schemas = ['"int"', '"long"', '"float"', '"double"']
269
+ incorrect = 0
270
+ promotable_schemas.each_with_index do |ws, i|
271
+ writers_schema = Avro::Schema.parse(ws)
272
+ datum_to_write = 219
273
+ for rs in promotable_schemas[(i + 1)..-1]
274
+ readers_schema = Avro::Schema.parse(rs)
275
+ writer, enc, dw = write_datum(datum_to_write, writers_schema)
276
+ datum_read = read_datum(writer, writers_schema, readers_schema)
277
+ if datum_read != datum_to_write
278
+ incorrect += 1
279
+ end
280
+ end
281
+ assert_equal(incorrect, 0)
282
+ end
283
+ end
284
+ private
285
+
286
+ def check_default(schema_json, default_json, default_value)
287
+ check(schema_json)
288
+ actual_schema = '{"type": "record", "name": "Foo", "fields": []}'
289
+ actual = Avro::Schema.parse(actual_schema)
290
+
291
+ expected_schema = <<EOS
292
+ {"type": "record",
293
+ "name": "Foo",
294
+ "fields": [{"name": "f", "type": #{schema_json}, "default": #{default_json}}]}
295
+ EOS
296
+ expected = Avro::Schema.parse(expected_schema)
297
+
298
+ reader = Avro::IO::DatumReader.new(actual, expected)
299
+ record = reader.read(Avro::IO::BinaryDecoder.new(StringIO.new))
300
+ assert_equal default_value, record["f"]
301
+ end
302
+
303
+ def check(str)
304
+ # parse schema, then convert back to string
305
+ schema = Avro::Schema.parse str
306
+
307
+ parsed_string = schema.to_s
308
+
309
+ # test that the round-trip didn't mess up anything
310
+ # NB: I don't think we should do this. Why enforce ordering?
311
+ assert_equal(MultiJson.load(str),
312
+ MultiJson.load(parsed_string))
313
+
314
+ # test __eq__
315
+ assert_equal(schema, Avro::Schema.parse(str))
316
+
317
+ # test hashcode doesn't generate infinite recursion
318
+ schema.hash
319
+
320
+ # test serialization of random data
321
+ randomdata = RandomData.new(schema)
322
+ 9.times { checkser(schema, randomdata) }
323
+
324
+ # test writing of data to file
325
+ check_datafile(schema)
326
+ end
327
+
328
+ def checkser(schm, randomdata)
329
+ datum = randomdata.next
330
+ assert validate(schm, datum)
331
+ w = Avro::IO::DatumWriter.new(schm)
332
+ writer = StringIO.new "", "w"
333
+ w.write(datum, Avro::IO::BinaryEncoder.new(writer))
334
+ r = datum_reader(schm)
335
+ reader = StringIO.new(writer.string)
336
+ ob = r.read(Avro::IO::BinaryDecoder.new(reader))
337
+ assert_equal(datum, ob) # FIXME check on assertdata conditional
338
+ end
339
+
340
+ def check_datafile(schm)
341
+ seed = 0
342
+ count = 10
343
+ random_data = RandomData.new(schm, seed)
344
+
345
+
346
+ f = File.open(DATAFILE, 'wb')
347
+ dw = Avro::DataFile::Writer.new(f, datum_writer(schm), schm)
348
+ count.times{ dw << random_data.next }
349
+ dw.close
350
+
351
+ random_data = RandomData.new(schm, seed)
352
+
353
+
354
+ f = File.open(DATAFILE, 'r+')
355
+ dr = Avro::DataFile::Reader.new(f, datum_reader(schm))
356
+
357
+ last_index = nil
358
+ dr.each_with_index do |data, c|
359
+ last_index = c
360
+ # FIXME assertdata conditional
361
+ assert_equal(random_data.next, data)
362
+ end
363
+ dr.close
364
+ assert_equal count, last_index+1
365
+ end
366
+
367
+ def validate(schm, datum)
368
+ Avro::Schema.validate(schm, datum)
369
+ end
370
+
371
+ def datum_writer(schm)
372
+ Avro::IO::DatumWriter.new(schm)
373
+ end
374
+
375
+ def datum_reader(schm)
376
+ Avro::IO::DatumReader.new(schm)
377
+ end
378
+
379
+ def write_datum(datum, writers_schema)
380
+ writer = StringIO.new
381
+ encoder = Avro::IO::BinaryEncoder.new(writer)
382
+ datum_writer = Avro::IO::DatumWriter.new(writers_schema)
383
+ datum_writer.write(datum, encoder)
384
+ [writer, encoder, datum_writer]
385
+ end
386
+
387
+ def read_datum(buffer, writers_schema, readers_schema=nil)
388
+ reader = StringIO.new(buffer.string)
389
+ decoder = Avro::IO::BinaryDecoder.new(reader)
390
+ datum_reader = Avro::IO::DatumReader.new(writers_schema, readers_schema)
391
+ datum_reader.read(decoder)
392
+ end
393
+ end
@@ -0,0 +1,199 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'test_help'
18
+
19
+ class TestProtocol < Test::Unit::TestCase
20
+
21
+ class ExampleProtocol
22
+ attr_reader :protocol_string, :valid, :name
23
+ attr_accessor :comment
24
+ def initialize(protocol_string, name=nil, comment='')
25
+ @protocol_string = protocol_string
26
+ @name = name || protocol_string # default to schema_string for name
27
+ @comment = comment
28
+ end
29
+ end
30
+ #
31
+ # Example Protocols
32
+ #
33
+
34
+ EXAMPLES = [
35
+ ExampleProtocol.new(<<-EOS, true),
36
+ {
37
+ "namespace": "com.acme",
38
+ "protocol": "HelloWorld",
39
+
40
+ "types": [
41
+ {"name": "Greeting", "type": "record", "fields": [
42
+ {"name": "message", "type": "string"}]},
43
+ {"name": "Curse", "type": "error", "fields": [
44
+ {"name": "message", "type": "string"}]}
45
+ ],
46
+
47
+ "messages": {
48
+ "hello": {
49
+ "request": [{"name": "greeting", "type": "Greeting" }],
50
+ "response": "Greeting",
51
+ "errors": ["Curse"]
52
+ }
53
+ }
54
+ }
55
+ EOS
56
+
57
+ ExampleProtocol.new(<<-EOS, true),
58
+ {"namespace": "org.apache.avro.test",
59
+ "protocol": "Simple",
60
+
61
+ "types": [
62
+ {"name": "Kind", "type": "enum", "symbols": ["FOO","BAR","BAZ"]},
63
+
64
+ {"name": "MD5", "type": "fixed", "size": 16},
65
+
66
+ {"name": "TestRecord", "type": "record",
67
+ "fields": [
68
+ {"name": "name", "type": "string", "order": "ignore"},
69
+ {"name": "kind", "type": "Kind", "order": "descending"},
70
+ {"name": "hash", "type": "MD5"}
71
+ ]
72
+ },
73
+
74
+ {"name": "TestError", "type": "error", "fields": [
75
+ {"name": "message", "type": "string"}
76
+ ]
77
+ }
78
+
79
+ ],
80
+
81
+ "messages": {
82
+
83
+ "hello": {
84
+ "request": [{"name": "greeting", "type": "string"}],
85
+ "response": "string"
86
+ },
87
+
88
+ "echo": {
89
+ "request": [{"name": "record", "type": "TestRecord"}],
90
+ "response": "TestRecord"
91
+ },
92
+
93
+ "add": {
94
+ "request": [{"name": "arg1", "type": "int"}, {"name": "arg2", "type": "int"}],
95
+ "response": "int"
96
+ },
97
+
98
+ "echoBytes": {
99
+ "request": [{"name": "data", "type": "bytes"}],
100
+ "response": "bytes"
101
+ },
102
+
103
+ "error": {
104
+ "request": [],
105
+ "response": "null",
106
+ "errors": ["TestError"]
107
+ }
108
+ }
109
+
110
+ }
111
+ EOS
112
+ ExampleProtocol.new(<<-EOS, true),
113
+ {"namespace": "org.apache.avro.test.namespace",
114
+ "protocol": "TestNamespace",
115
+
116
+ "types": [
117
+ {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
118
+ {"name": "TestRecord", "type": "record",
119
+ "fields": [ {"name": "hash", "type": "org.apache.avro.test.util.MD5"} ]
120
+ },
121
+ {"name": "TestError", "namespace": "org.apache.avro.test.errors",
122
+ "type": "error", "fields": [ {"name": "message", "type": "string"} ]
123
+ }
124
+ ],
125
+
126
+ "messages": {
127
+ "echo": {
128
+ "request": [{"name": "record", "type": "TestRecord"}],
129
+ "response": "TestRecord"
130
+ },
131
+
132
+ "error": {
133
+ "request": [],
134
+ "response": "null",
135
+ "errors": ["org.apache.avro.test.errors.TestError"]
136
+ }
137
+
138
+ }
139
+
140
+ }
141
+ EOS
142
+ ExampleProtocol.new(<<-EOS, true)
143
+ {"namespace": "org.apache.avro.test",
144
+ "protocol": "BulkData",
145
+
146
+ "types": [],
147
+
148
+ "messages": {
149
+
150
+ "read": {
151
+ "request": [],
152
+ "response": "bytes"
153
+ },
154
+
155
+ "write": {
156
+ "request": [ {"name": "data", "type": "bytes"} ],
157
+ "response": "null"
158
+ }
159
+
160
+ }
161
+
162
+ }
163
+ EOS
164
+ ]
165
+
166
+ Protocol = Avro::Protocol
167
+ def test_parse
168
+ EXAMPLES.each do |example|
169
+ assert_nothing_raised("should be valid: #{example.protocol_string}") {
170
+ Protocol.parse(example.protocol_string)
171
+ }
172
+ end
173
+ end
174
+
175
+ def test_valid_cast_to_string_after_parse
176
+ EXAMPLES.each do |example|
177
+ assert_nothing_raised("round tripped okay #{example.protocol_string}") {
178
+ foo = Protocol.parse(example.protocol_string).to_s
179
+ Protocol.parse(foo)
180
+ }
181
+ end
182
+ end
183
+
184
+ def test_equivalence_after_round_trip
185
+ EXAMPLES.each do |example|
186
+ original = Protocol.parse(example.protocol_string)
187
+ round_trip = Protocol.parse(original.to_s)
188
+
189
+ assert_equal original, round_trip
190
+ end
191
+ end
192
+
193
+ def test_namespaces
194
+ protocol = Protocol.parse(EXAMPLES.first.protocol_string)
195
+ protocol.types.each do |type|
196
+ assert_equal type.namespace, 'com.acme'
197
+ end
198
+ end
199
+ end