avro 1.8.1 → 1.10.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,3 @@
1
- #!/usr/bin/env ruby
2
1
  # Licensed to the Apache Software Foundation (ASF) under one
3
2
  # or more contributor license agreements. See the NOTICE file
4
3
  # distributed with this work for additional information
@@ -7,7 +6,7 @@
7
6
  # "License"); you may not use this file except in compliance
8
7
  # with the License. You may obtain a copy of the License at
9
8
  #
10
- # http://www.apache.org/licenses/LICENSE-2.0
9
+ # https://www.apache.org/licenses/LICENSE-2.0
11
10
  #
12
11
  # Unless required by applicable law or agreed to in writing, software
13
12
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -27,15 +26,17 @@ class RandomData
27
26
  end
28
27
 
29
28
  def nextdata(schm, d=0)
29
+ return logical_nextdata(schm, d=0) unless schm.type_adapter.eql?(Avro::LogicalTypes::Identity)
30
+
30
31
  case schm.type_sym
31
32
  when :boolean
32
33
  rand > 0.5
33
34
  when :string
34
35
  randstr()
35
36
  when :int
36
- rand(Avro::Schema::INT_MAX_VALUE - Avro::Schema::INT_MIN_VALUE) + Avro::Schema::INT_MIN_VALUE
37
+ rand_int
37
38
  when :long
38
- rand(Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) + Avro::Schema::LONG_MIN_VALUE
39
+ rand_long
39
40
  when :float
40
41
  (-1024 + 2048 * rand).round.to_f
41
42
  when :double
@@ -79,6 +80,17 @@ class RandomData
79
80
  end
80
81
  end
81
82
 
83
+ def logical_nextdata(schm, _d=0)
84
+ case schm.logical_type
85
+ when 'date'
86
+ Avro::LogicalTypes::IntDate.decode(rand_int)
87
+ when 'timestamp-micros'
88
+ Avro::LogicalTypes::TimestampMicros.decode(rand_long)
89
+ when 'timestamp-millis'
90
+ Avro::LogicalTypes::TimestampMillis.decode(rand_long)
91
+ end
92
+ end
93
+
82
94
  CHARPOOL = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789'
83
95
  BYTEPOOL = '12345abcd'
84
96
 
@@ -87,4 +99,12 @@ class RandomData
87
99
  rand(length+1).times { str << chars[rand(chars.size)] }
88
100
  str
89
101
  end
102
+
103
+ def rand_int
104
+ rand(Avro::Schema::INT_MAX_VALUE - Avro::Schema::INT_MIN_VALUE) + Avro::Schema::INT_MIN_VALUE
105
+ end
106
+
107
+ def rand_long
108
+ rand(Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) + Avro::Schema::LONG_MIN_VALUE
109
+ end
90
110
  end
@@ -7,7 +7,7 @@
7
7
  # "License"); you may not use this file except in compliance
8
8
  # with the License. You may obtain a copy of the License at
9
9
  #
10
- # http://www.apache.org/licenses/LICENSE-2.0
10
+ # https://www.apache.org/licenses/LICENSE-2.0
11
11
  #
12
12
  # Unless required by applicable law or agreed to in writing, software
13
13
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -7,7 +7,7 @@
7
7
  # "License"); you may not use this file except in compliance
8
8
  # with the License. You may obtain a copy of the License at
9
9
  #
10
- # http://www.apache.org/licenses/LICENSE-2.0
10
+ # https://www.apache.org/licenses/LICENSE-2.0
11
11
  #
12
12
  # Unless required by applicable law or agreed to in writing, software
13
13
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -6,7 +6,7 @@
6
6
  # "License"); you may not use this file except in compliance
7
7
  # with the License. You may obtain a copy of the License at
8
8
  #
9
- # http://www.apache.org/licenses/LICENSE-2.0
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
10
  #
11
11
  # Unless required by applicable law or agreed to in writing, software
12
12
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -7,7 +7,7 @@
7
7
  # "License"); you may not use this file except in compliance
8
8
  # with the License. You may obtain a copy of the License at
9
9
  #
10
- # http://www.apache.org/licenses/LICENSE-2.0
10
+ # https://www.apache.org/licenses/LICENSE-2.0
11
11
  #
12
12
  # Unless required by applicable law or agreed to in writing, software
13
13
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -7,7 +7,7 @@
7
7
  # "License"); you may not use this file except in compliance
8
8
  # with the License. You may obtain a copy of the License at
9
9
  #
10
- # http://www.apache.org/licenses/LICENSE-2.0
10
+ # https://www.apache.org/licenses/LICENSE-2.0
11
11
  #
12
12
  # Unless required by applicable law or agreed to in writing, software
13
13
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -20,13 +20,13 @@ require 'test_help'
20
20
  class TestDataFile < Test::Unit::TestCase
21
21
  HERE = File.expand_path File.dirname(__FILE__)
22
22
  def setup
23
- if File.exists?(HERE + '/data.avr')
23
+ if File.exist?(HERE + '/data.avr')
24
24
  File.unlink(HERE + '/data.avr')
25
25
  end
26
26
  end
27
27
 
28
28
  def teardown
29
- if File.exists?(HERE + '/data.avr')
29
+ if File.exist?(HERE + '/data.avr')
30
30
  File.unlink(HERE + '/data.avr')
31
31
  end
32
32
  end
@@ -38,7 +38,7 @@ class TestDataFile < Test::Unit::TestCase
38
38
  "fields" : [
39
39
  {"name": "username", "type": "string"},
40
40
  {"name": "age", "type": "int"},
41
- {"name": "verified", "type": "boolean", "default": "false"}
41
+ {"name": "verified", "type": "boolean", "default": false}
42
42
  ]}
43
43
  JSON
44
44
 
@@ -180,6 +180,19 @@ JSON
180
180
  assert_equal records, ['a' * 10_000]
181
181
  end
182
182
 
183
+ def test_zstandard
184
+ Avro::DataFile.open('data.avr', 'w', '"string"', :zstandard) do |writer|
185
+ writer << 'a' * 10_000
186
+ end
187
+ assert(File.size('data.avr') < 600)
188
+
189
+ records = []
190
+ Avro::DataFile.open('data.avr') do |reader|
191
+ reader.each {|record| records << record }
192
+ end
193
+ assert_equal records, ['a' * 10_000]
194
+ end
195
+
183
196
  def test_append_to_deflated_file
184
197
  schema = Avro::Schema.parse('"string"')
185
198
  writer = Avro::IO::DatumWriter.new(schema)
@@ -6,7 +6,7 @@
6
6
  # "License"); you may not use this file except in compliance
7
7
  # with the License. You may obtain a copy of the License at
8
8
  #
9
- # http://www.apache.org/licenses/LICENSE-2.0
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
10
  #
11
11
  # Unless required by applicable law or agreed to in writing, software
12
12
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -34,4 +34,23 @@ class TestFingerprints < Test::Unit::TestCase
34
34
  assert_equal 28572620203319713300323544804233350633246234624932075150020181448463213378117,
35
35
  schema.sha256_fingerprint
36
36
  end
37
+
38
+ def test_crc_64_avro_fingerprint
39
+ schema = Avro::Schema.parse <<-SCHEMA
40
+ { "type": "int" }
41
+ SCHEMA
42
+
43
+ assert_equal 8247732601305521295, # hex: 0x7275d51a3f395c8f
44
+ schema.crc_64_avro_fingerprint
45
+ end
46
+
47
+ # This definitely belongs somewhere else
48
+ def test_single_object_encoding_header
49
+ schema = Avro::Schema.parse <<-SCHEMA
50
+ { "type": "int" }
51
+ SCHEMA
52
+
53
+ assert_equal ["c3", "01", "72", "75", "d5", "1a", "3f", "39", "5c", "8f"].map{|e| e.to_i(16) },
54
+ schema.single_object_encoding_header
55
+ end
37
56
  end
@@ -6,7 +6,7 @@
6
6
  # "License"); you may not use this file except in compliance
7
7
  # with the License. You may obtain a copy of the License at
8
8
  #
9
- # http://www.apache.org/licenses/LICENSE-2.0
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
10
  #
11
11
  # Unless required by applicable law or agreed to in writing, software
12
12
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -6,7 +6,7 @@
6
6
  # "License"); you may not use this file except in compliance
7
7
  # with the License. You may obtain a copy of the License at
8
8
  #
9
- # http://www.apache.org/licenses/LICENSE-2.0
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
10
  #
11
11
  # Unless required by applicable law or agreed to in writing, software
12
12
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -84,6 +84,20 @@ EOS
84
84
  check_default(record_schema, '{"f": 11}', {"f" => 11})
85
85
  end
86
86
 
87
+ def test_record_with_logical_type
88
+ record_schema = <<EOS
89
+ {"type": "record",
90
+ "name": "Test",
91
+ "fields": [{"name": "ts",
92
+ "type": {"type": "long",
93
+ "logicalType": "timestamp-micros"}},
94
+ {"name": "ts2",
95
+ "type": {"type": "long",
96
+ "logicalType": "timestamp-millis"}}]}
97
+ EOS
98
+ check(record_schema)
99
+ end
100
+
87
101
  def test_error
88
102
  error_schema = <<EOS
89
103
  {"type": "error",
@@ -101,6 +115,13 @@ EOS
101
115
  check_default(enum_schema, '"B"', "B")
102
116
  end
103
117
 
118
+ def test_enum_with_default
119
+ enum_schema = '{"type": "enum", "name": "Test", "symbols": ["A", "B"], "default": "A"}'
120
+ check(enum_schema)
121
+ # Field default is used for missing field.
122
+ check_default(enum_schema, '"B"', "B")
123
+ end
124
+
104
125
  def test_recursive
105
126
  recursive_schema = <<EOS
106
127
  {"type": "record",
@@ -115,6 +136,7 @@ EOS
115
136
  def test_union
116
137
  union_schema = <<EOS
117
138
  ["string",
139
+ {"type": "int", "logicalType": "date"},
118
140
  "null",
119
141
  "long",
120
142
  {"type": "record",
@@ -146,10 +168,42 @@ EOS
146
168
  check_default(fixed_schema, '"a"', "a")
147
169
  end
148
170
 
171
+ def test_record_variable_key_types
172
+ datum = { sym: "foo", "str"=>"bar"}
173
+ ret_val = { "sym"=> "foo", "str"=>"bar"}
174
+ schema = Schema.parse('{"type":"record", "name":"rec", "fields":[{"name":"sym", "type":"string"}, {"name":"str", "type":"string"}]}')
175
+
176
+ writer, _encoder, _datum_writer = write_datum(datum, schema)
177
+
178
+ ret_datum = read_datum(writer, schema)
179
+ assert_equal ret_datum, ret_val
180
+ end
181
+
182
+ def test_record_with_nil
183
+ schema = Avro::Schema.parse('{"type":"record", "name":"rec", "fields":[{"type":"int", "name":"i"}]}')
184
+ assert_raise(Avro::IO::AvroTypeError) do
185
+ write_datum(nil, schema)
186
+ end
187
+ end
188
+
189
+ def test_array_with_nil
190
+ schema = Avro::Schema.parse('{"type":"array", "items":"int"}')
191
+ assert_raise(Avro::IO::AvroTypeError) do
192
+ write_datum(nil, schema)
193
+ end
194
+ end
195
+
196
+ def test_map_with_nil
197
+ schema = Avro::Schema.parse('{"type":"map", "values":"long"}')
198
+ assert_raise(Avro::IO::AvroTypeError) do
199
+ write_datum(nil, schema)
200
+ end
201
+ end
202
+
149
203
  def test_enum_with_duplicate
150
204
  str = '{"type": "enum", "name": "Test","symbols" : ["AA", "AA"]}'
151
- assert_raises(Avro::SchemaParseError) do
152
- schema = Avro::Schema.parse str
205
+ assert_raises(Avro::SchemaParseError.new('Duplicate symbol: ["AA", "AA"]')) do
206
+ Avro::Schema.parse str
153
207
  end
154
208
  end
155
209
 
@@ -256,7 +310,7 @@ EOS
256
310
  end
257
311
 
258
312
  def test_skip_long
259
- for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS
313
+ for value_to_skip, _hex_encoding in BINARY_INT_ENCODINGS
260
314
  value_to_read = 6253
261
315
 
262
316
  # write some data in binary to string buffer
@@ -281,7 +335,7 @@ EOS
281
335
  end
282
336
 
283
337
  def test_skip_int
284
- for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS
338
+ for value_to_skip, _hex_encoding in BINARY_INT_ENCODINGS
285
339
  value_to_read = 6253
286
340
 
287
341
  writer = StringIO.new
@@ -331,7 +385,7 @@ EOS
331
385
  datum_to_write = 219
332
386
  for rs in promotable_schemas[(i + 1)..-1]
333
387
  readers_schema = Avro::Schema.parse(rs)
334
- writer, enc, dw = write_datum(datum_to_write, writers_schema)
388
+ writer, _enc, _dw = write_datum(datum_to_write, writers_schema)
335
389
  datum_read = read_datum(writer, writers_schema, readers_schema)
336
390
  if datum_read != datum_to_write
337
391
  incorrect += 1
@@ -340,8 +394,131 @@ EOS
340
394
  assert_equal(incorrect, 0)
341
395
  end
342
396
  end
397
+
398
+ def test_interchangeable_schemas
399
+ interchangeable_schemas = ['"string"', '"bytes"']
400
+ incorrect = 0
401
+ interchangeable_schemas.each_with_index do |ws, i|
402
+ writers_schema = Avro::Schema.parse(ws)
403
+ datum_to_write = 'foo'
404
+ readers_schema = Avro::Schema.parse(interchangeable_schemas[i == 0 ? 1 : 0])
405
+ writer, * = write_datum(datum_to_write, writers_schema)
406
+ datum_read = read_datum(writer, writers_schema, readers_schema)
407
+ if datum_read != datum_to_write
408
+ incorrect += 1
409
+ end
410
+ end
411
+ assert_equal(incorrect, 0)
412
+ end
413
+
414
+ def test_unknown_enum_symbol
415
+ writers_schema = Avro::Schema.parse(<<-SCHEMA)
416
+ {
417
+ "type": "enum",
418
+ "name": "test",
419
+ "symbols": ["B", "C"]
420
+ }
421
+ SCHEMA
422
+ readers_schema = Avro::Schema.parse(<<-SCHEMA)
423
+ {
424
+ "type": "enum",
425
+ "name": "test",
426
+ "symbols": ["A", "B"]
427
+ }
428
+ SCHEMA
429
+ datum_to_write = "C"
430
+ writer, * = write_datum(datum_to_write, writers_schema)
431
+ datum_read = read_datum(writer, writers_schema, readers_schema)
432
+ # Ruby implementation did not follow the spec and returns the writer's symbol here
433
+ assert_equal(datum_read, datum_to_write)
434
+ end
435
+
436
+ def test_unknown_enum_symbol_with_enum_default
437
+ writers_schema = Avro::Schema.parse(<<-SCHEMA)
438
+ {
439
+ "type": "enum",
440
+ "name": "test",
441
+ "symbols": ["B", "C"]
442
+ }
443
+ SCHEMA
444
+ readers_schema = Avro::Schema.parse(<<-SCHEMA)
445
+ {
446
+ "type": "enum",
447
+ "name": "test",
448
+ "symbols": ["A", "B", "UNKNOWN"],
449
+ "default": "UNKNOWN"
450
+ }
451
+ SCHEMA
452
+ datum_to_write = "C"
453
+ writer, * = write_datum(datum_to_write, writers_schema)
454
+ datum_read = read_datum(writer, writers_schema, readers_schema)
455
+ assert_equal(datum_read, "UNKNOWN")
456
+ end
457
+
458
+ def test_array_schema_promotion
459
+ writers_schema = Avro::Schema.parse('{"type":"array", "items":"int"}')
460
+ readers_schema = Avro::Schema.parse('{"type":"array", "items":"long"}')
461
+ datum_to_write = [1, 2]
462
+ writer, * = write_datum(datum_to_write, writers_schema)
463
+ datum_read = read_datum(writer, writers_schema, readers_schema)
464
+ assert_equal(datum_read, datum_to_write)
465
+ end
466
+
467
+ def test_map_schema_promotion
468
+ writers_schema = Avro::Schema.parse('{"type":"map", "values":"int"}')
469
+ readers_schema = Avro::Schema.parse('{"type":"map", "values":"long"}')
470
+ datum_to_write = { 'foo' => 1, 'bar' => 2 }
471
+ writer, * = write_datum(datum_to_write, writers_schema)
472
+ datum_read = read_datum(writer, writers_schema, readers_schema)
473
+ assert_equal(datum_read, datum_to_write)
474
+ end
475
+
476
+ def test_aliased
477
+ writers_schema = Avro::Schema.parse(<<-SCHEMA)
478
+ {"type":"record", "name":"Rec1", "fields":[
479
+ {"name":"field1", "type":"int"}
480
+ ]}
481
+ SCHEMA
482
+ readers_schema = Avro::Schema.parse(<<-SCHEMA)
483
+ {"type":"record", "name":"Rec2", "aliases":["Rec1"], "fields":[
484
+ {"name":"field2", "aliases":["field1"], "type":"int"}
485
+ ]}
486
+ SCHEMA
487
+ writer, * = write_datum({ 'field1' => 1 }, writers_schema)
488
+ datum_read = read_datum(writer, writers_schema, readers_schema)
489
+ assert_equal(datum_read, { 'field2' => 1 })
490
+ end
491
+
492
+ def test_snappy_backward_compat
493
+ # a snappy-compressed block payload without the checksum
494
+ # this has no back-references, just one literal so the last 9
495
+ # bytes are the uncompressed payload.
496
+ old_snappy_bytes = "\x09\x20\x02\x06\x02\x0a\x67\x72\x65\x65\x6e"
497
+ uncompressed_bytes = "\x02\x06\x02\x0a\x67\x72\x65\x65\x6e"
498
+ snappy = Avro::DataFile::SnappyCodec.new
499
+ assert_equal(uncompressed_bytes, snappy.decompress(old_snappy_bytes))
500
+ end
501
+
343
502
  private
344
503
 
504
+ def check_no_default(schema_json)
505
+ actual_schema = '{"type": "record", "name": "Foo", "fields": []}'
506
+ actual = Avro::Schema.parse(actual_schema)
507
+
508
+ expected_schema = <<EOS
509
+ {"type": "record",
510
+ "name": "Foo",
511
+ "fields": [{"name": "f", "type": #{schema_json}}]}
512
+ EOS
513
+ expected = Avro::Schema.parse(expected_schema)
514
+
515
+ reader = Avro::IO::DatumReader.new(actual, expected)
516
+ assert_raise Avro::AvroError do
517
+ value = reader.read(Avro::IO::BinaryDecoder.new(StringIO.new))
518
+ assert_not_equal(value, :no_default) # should never return this
519
+ end
520
+ end
521
+
345
522
  def check_default(schema_json, default_json, default_value)
346
523
  actual_schema = '{"type": "record", "name": "Foo", "fields": []}'
347
524
  actual = Avro::Schema.parse(actual_schema)
@@ -381,11 +558,14 @@ EOS
381
558
 
382
559
  # test writing of data to file
383
560
  check_datafile(schema)
561
+
562
+ # check that AvroError is raised when there is no default
563
+ check_no_default(str)
384
564
  end
385
565
 
386
566
  def checkser(schm, randomdata)
387
567
  datum = randomdata.next
388
- assert validate(schm, datum)
568
+ assert validate(schm, datum), 'datum is not valid for schema'
389
569
  w = Avro::IO::DatumWriter.new(schm)
390
570
  writer = StringIO.new "", "w"
391
571
  w.write(datum, Avro::IO::BinaryEncoder.new(writer))