avro 1.9.1 → 1.10.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -28,10 +28,8 @@ module Avro
28
28
  end
29
29
 
30
30
  # Perform a basic check that a datum written with the writers_schema could
31
- # be read using the readers_schema. This check only includes matching the types,
32
- # including schema promotion, and matching the full name for named types.
33
- # Aliases for named types are not supported here, and the ruby implementation
34
- # of Avro in general does not include support for aliases.
31
+ # be read using the readers_schema. This check includes matching the types,
32
+ # including schema promotion, and matching the full name (including aliases) for named types.
35
33
  def self.match_schemas(writers_schema, readers_schema)
36
34
  w_type = writers_schema.type_sym
37
35
  r_type = readers_schema.type_sym
@@ -46,16 +44,16 @@ module Avro
46
44
 
47
45
  case r_type
48
46
  when :record
49
- return writers_schema.fullname == readers_schema.fullname
47
+ return readers_schema.match_fullname?(writers_schema.fullname)
50
48
  when :error
51
- return writers_schema.fullname == readers_schema.fullname
49
+ return readers_schema.match_fullname?(writers_schema.fullname)
52
50
  when :request
53
51
  return true
54
52
  when :fixed
55
- return writers_schema.fullname == readers_schema.fullname &&
53
+ return readers_schema.match_fullname?(writers_schema.fullname) &&
56
54
  writers_schema.size == readers_schema.size
57
55
  when :enum
58
- return writers_schema.fullname == readers_schema.fullname
56
+ return readers_schema.match_fullname?(writers_schema.fullname)
59
57
  when :map
60
58
  return match_schemas(writers_schema.values, readers_schema.values)
61
59
  when :array
@@ -118,8 +116,8 @@ module Avro
118
116
  when :union
119
117
  match_union_schemas(writers_schema, readers_schema)
120
118
  when :enum
121
- # reader's symbols must contain all writer's symbols
122
- (writers_schema.symbols - readers_schema.symbols).empty?
119
+ # reader's symbols must contain all writer's symbols or reader has default
120
+ (writers_schema.symbols - readers_schema.symbols).empty? || !readers_schema.default.nil?
123
121
  else
124
122
  if writers_schema.type_sym == :union && writers_schema.schemas.size == 1
125
123
  full_match_schemas(writers_schema.schemas.first, readers_schema)
@@ -148,7 +146,14 @@ module Avro
148
146
  if writer_fields_hash.key?(field.name)
149
147
  return false unless full_match_schemas(writer_fields_hash[field.name].type, field.type)
150
148
  else
151
- return false unless field.default?
149
+ names = writer_fields_hash.keys & field.alias_names
150
+ if names.size > 1
151
+ return false
152
+ elsif names.size == 1
153
+ return false unless full_match_schemas(writer_fields_hash[names.first].type, field.type)
154
+ else
155
+ return false unless field.default?
156
+ end
152
157
  end
153
158
  end
154
159
 
@@ -44,13 +44,18 @@ class CaseFinder
44
44
  private
45
45
 
46
46
  def scan_case
47
- if id = @scanner.scan(/\/\/ \d+\n/)
47
+ if (id = @scanner.scan(/\/\/ \d+\n/))
48
48
  while @scanner.skip(/\/\/ .*\n/); end
49
49
 
50
50
  input = scan_input
51
51
  canonical = scan_canonical
52
52
  fingerprint = scan_fingerprint
53
-
53
+ if not fingerprint and @cases
54
+ fingerprint = @cases[-1].fingerprint
55
+ end
56
+ if fingerprint
57
+ fingerprint = fingerprint.to_i & 0xFFFF_FFFF_FFFF_FFFF
58
+ end
54
59
  Case.new(id, input, canonical, fingerprint)
55
60
  else
56
61
  @scanner.skip(/.*\n/)
@@ -61,7 +66,7 @@ class CaseFinder
61
66
  def scan_item(name)
62
67
  if @scanner.scan(/<<#{name}\n/)
63
68
  lines = []
64
- while line = @scanner.scan(/.+\n/)
69
+ while (line = @scanner.scan(/.+\n/))
65
70
  break if line.chomp == name
66
71
  lines << line
67
72
  end
@@ -1,4 +1,3 @@
1
- #!/usr/bin/env ruby
2
1
  # Licensed to the Apache Software Foundation (ASF) under one
3
2
  # or more contributor license agreements. See the NOTICE file
4
3
  # distributed with this work for additional information
@@ -85,8 +84,10 @@ class RandomData
85
84
  case schm.logical_type
86
85
  when 'date'
87
86
  Avro::LogicalTypes::IntDate.decode(rand_int)
88
- when 'timestamp-millis', 'timestamp-micros'
87
+ when 'timestamp-micros'
89
88
  Avro::LogicalTypes::TimestampMicros.decode(rand_long)
89
+ when 'timestamp-millis'
90
+ Avro::LogicalTypes::TimestampMillis.decode(rand_long)
90
91
  end
91
92
  end
92
93
 
File without changes
File without changes
File without changes
File without changes
@@ -180,6 +180,19 @@ JSON
180
180
  assert_equal records, ['a' * 10_000]
181
181
  end
182
182
 
183
+ def test_zstandard
184
+ Avro::DataFile.open('data.avr', 'w', '"string"', :zstandard) do |writer|
185
+ writer << 'a' * 10_000
186
+ end
187
+ assert(File.size('data.avr') < 600)
188
+
189
+ records = []
190
+ Avro::DataFile.open('data.avr') do |reader|
191
+ reader.each {|record| records << record }
192
+ end
193
+ assert_equal records, ['a' * 10_000]
194
+ end
195
+
183
196
  def test_append_to_deflated_file
184
197
  schema = Avro::Schema.parse('"string"')
185
198
  writer = Avro::IO::DatumWriter.new(schema)
@@ -34,4 +34,23 @@ class TestFingerprints < Test::Unit::TestCase
34
34
  assert_equal 28572620203319713300323544804233350633246234624932075150020181448463213378117,
35
35
  schema.sha256_fingerprint
36
36
  end
37
+
38
+ def test_crc_64_avro_fingerprint
39
+ schema = Avro::Schema.parse <<-SCHEMA
40
+ { "type": "int" }
41
+ SCHEMA
42
+
43
+ assert_equal 8247732601305521295, # hex: 0x7275d51a3f395c8f
44
+ schema.crc_64_avro_fingerprint
45
+ end
46
+
47
+ # This definitely belongs somewhere else
48
+ def test_single_object_encoding_header
49
+ schema = Avro::Schema.parse <<-SCHEMA
50
+ { "type": "int" }
51
+ SCHEMA
52
+
53
+ assert_equal ["c3", "01", "72", "75", "d5", "1a", "3f", "39", "5c", "8f"].map{|e| e.to_i(16) },
54
+ schema.single_object_encoding_header
55
+ end
37
56
  end
@@ -90,7 +90,10 @@ EOS
90
90
  "name": "Test",
91
91
  "fields": [{"name": "ts",
92
92
  "type": {"type": "long",
93
- "logicalType": "timestamp-micros"}}]}
93
+ "logicalType": "timestamp-micros"}},
94
+ {"name": "ts2",
95
+ "type": {"type": "long",
96
+ "logicalType": "timestamp-millis"}}]}
94
97
  EOS
95
98
  check(record_schema)
96
99
  end
@@ -112,6 +115,13 @@ EOS
112
115
  check_default(enum_schema, '"B"', "B")
113
116
  end
114
117
 
118
+ def test_enum_with_default
119
+ enum_schema = '{"type": "enum", "name": "Test", "symbols": ["A", "B"], "default": "A"}'
120
+ check(enum_schema)
121
+ # Field default is used for missing field.
122
+ check_default(enum_schema, '"B"', "B")
123
+ end
124
+
115
125
  def test_recursive
116
126
  recursive_schema = <<EOS
117
127
  {"type": "record",
@@ -158,6 +168,17 @@ EOS
158
168
  check_default(fixed_schema, '"a"', "a")
159
169
  end
160
170
 
171
+ def test_record_variable_key_types
172
+ datum = { sym: "foo", "str"=>"bar"}
173
+ ret_val = { "sym"=> "foo", "str"=>"bar"}
174
+ schema = Schema.parse('{"type":"record", "name":"rec", "fields":[{"name":"sym", "type":"string"}, {"name":"str", "type":"string"}]}')
175
+
176
+ writer, _encoder, _datum_writer = write_datum(datum, schema)
177
+
178
+ ret_datum = read_datum(writer, schema)
179
+ assert_equal ret_datum, ret_val
180
+ end
181
+
161
182
  def test_record_with_nil
162
183
  schema = Avro::Schema.parse('{"type":"record", "name":"rec", "fields":[{"type":"int", "name":"i"}]}')
163
184
  assert_raise(Avro::IO::AvroTypeError) do
@@ -390,6 +411,50 @@ EOS
390
411
  assert_equal(incorrect, 0)
391
412
  end
392
413
 
414
+ def test_unknown_enum_symbol
415
+ writers_schema = Avro::Schema.parse(<<-SCHEMA)
416
+ {
417
+ "type": "enum",
418
+ "name": "test",
419
+ "symbols": ["B", "C"]
420
+ }
421
+ SCHEMA
422
+ readers_schema = Avro::Schema.parse(<<-SCHEMA)
423
+ {
424
+ "type": "enum",
425
+ "name": "test",
426
+ "symbols": ["A", "B"]
427
+ }
428
+ SCHEMA
429
+ datum_to_write = "C"
430
+ writer, * = write_datum(datum_to_write, writers_schema)
431
+ datum_read = read_datum(writer, writers_schema, readers_schema)
432
+ # Ruby implementation did not follow the spec and returns the writer's symbol here
433
+ assert_equal(datum_read, datum_to_write)
434
+ end
435
+
436
+ def test_unknown_enum_symbol_with_enum_default
437
+ writers_schema = Avro::Schema.parse(<<-SCHEMA)
438
+ {
439
+ "type": "enum",
440
+ "name": "test",
441
+ "symbols": ["B", "C"]
442
+ }
443
+ SCHEMA
444
+ readers_schema = Avro::Schema.parse(<<-SCHEMA)
445
+ {
446
+ "type": "enum",
447
+ "name": "test",
448
+ "symbols": ["A", "B", "UNKNOWN"],
449
+ "default": "UNKNOWN"
450
+ }
451
+ SCHEMA
452
+ datum_to_write = "C"
453
+ writer, * = write_datum(datum_to_write, writers_schema)
454
+ datum_read = read_datum(writer, writers_schema, readers_schema)
455
+ assert_equal(datum_read, "UNKNOWN")
456
+ end
457
+
393
458
  def test_array_schema_promotion
394
459
  writers_schema = Avro::Schema.parse('{"type":"array", "items":"int"}')
395
460
  readers_schema = Avro::Schema.parse('{"type":"array", "items":"long"}')
@@ -408,6 +473,22 @@ EOS
408
473
  assert_equal(datum_read, datum_to_write)
409
474
  end
410
475
 
476
+ def test_aliased
477
+ writers_schema = Avro::Schema.parse(<<-SCHEMA)
478
+ {"type":"record", "name":"Rec1", "fields":[
479
+ {"name":"field1", "type":"int"}
480
+ ]}
481
+ SCHEMA
482
+ readers_schema = Avro::Schema.parse(<<-SCHEMA)
483
+ {"type":"record", "name":"Rec2", "aliases":["Rec1"], "fields":[
484
+ {"name":"field2", "aliases":["field1"], "type":"int"}
485
+ ]}
486
+ SCHEMA
487
+ writer, * = write_datum({ 'field1' => 1 }, writers_schema)
488
+ datum_read = read_datum(writer, writers_schema, readers_schema)
489
+ assert_equal(datum_read, { 'field2' => 1 })
490
+ end
491
+
411
492
  def test_snappy_backward_compat
412
493
  # a snappy-compressed block payload without the checksum
413
494
  # this has no back-references, just one literal so the last 9
@@ -151,6 +151,20 @@ class TestSchema < Test::Unit::TestCase
151
151
  }
152
152
  end
153
153
 
154
+ def test_to_avro_includes_aliases
155
+ hash = {
156
+ 'type' => 'record',
157
+ 'name' => 'test_record',
158
+ 'aliases' => %w(alt_record),
159
+ 'fields' => [
160
+ { 'name' => 'f', 'type' => { 'type' => 'fixed', 'size' => 2, 'name' => 'test_fixed', 'aliases' => %w(alt_fixed) } },
161
+ { 'name' => 'e', 'type' => { 'type' => 'enum', 'symbols' => %w(A B), 'name' => 'test_enum', 'aliases' => %w(alt_enum) } }
162
+ ]
163
+ }
164
+ schema = hash_to_schema(hash)
165
+ assert_equal(schema.to_avro, hash)
166
+ end
167
+
154
168
  def test_unknown_named_type
155
169
  error = assert_raise Avro::UnknownSchemaError do
156
170
  Avro::Schema.parse <<-SCHEMA
@@ -163,6 +177,42 @@ class TestSchema < Test::Unit::TestCase
163
177
  assert_equal '"MissingType" is not a schema we know about.', error.message
164
178
  end
165
179
 
180
+ def test_invalid_name
181
+ error = assert_raise Avro::SchemaParseError do
182
+ Avro::Schema.parse <<-SCHEMA
183
+ {"type": "record", "name": "my-invalid-name", "fields": [
184
+ {"name": "id", "type": "int"}
185
+ ]}
186
+ SCHEMA
187
+ end
188
+
189
+ assert_equal "Name my-invalid-name is invalid for type record!", error.message
190
+ end
191
+
192
+ def test_invalid_name_with_two_periods
193
+ error = assert_raise Avro::SchemaParseError do
194
+ Avro::Schema.parse <<-SCHEMA
195
+ {"type": "record", "name": "my..invalid.name", "fields": [
196
+ {"name": "id", "type": "int"}
197
+ ]}
198
+ SCHEMA
199
+ end
200
+
201
+ assert_equal "Name my..invalid.name is invalid for type record!", error.message
202
+ end
203
+
204
+ def test_invalid_name_with_validation_disabled
205
+ Avro.disable_schema_name_validation = true
206
+ assert_nothing_raised do
207
+ Avro::Schema.parse <<-SCHEMA
208
+ {"type": "record", "name": "my-invalid-name", "fields": [
209
+ {"name": "id", "type": "int"}
210
+ ]}
211
+ SCHEMA
212
+ end
213
+ Avro.disable_schema_name_validation = false
214
+ end
215
+
166
216
  def test_to_avro_handles_falsey_defaults
167
217
  schema = Avro::Schema.parse <<-SCHEMA
168
218
  {"type": "record", "name": "Record", "namespace": "my.name.space",
@@ -278,6 +328,40 @@ class TestSchema < Test::Unit::TestCase
278
328
  assert_equal enum_schema_hash, enum_schema_json.to_avro
279
329
  end
280
330
 
331
+ def test_enum_default_attribute
332
+ enum_schema = Avro::Schema.parse <<-SCHEMA
333
+ {
334
+ "type": "enum",
335
+ "name": "fruit",
336
+ "default": "apples",
337
+ "symbols": ["apples", "oranges"]
338
+ }
339
+ SCHEMA
340
+
341
+ enum_schema_hash = {
342
+ 'type' => 'enum',
343
+ 'name' => 'fruit',
344
+ 'default' => 'apples',
345
+ 'symbols' => %w(apples oranges)
346
+ }
347
+
348
+ assert_equal(enum_schema.default, "apples")
349
+ assert_equal(enum_schema_hash, enum_schema.to_avro)
350
+ end
351
+
352
+ def test_validate_enum_default
353
+ exception = assert_raise(Avro::SchemaParseError) do
354
+ hash_to_schema(
355
+ type: 'enum',
356
+ name: 'fruit',
357
+ default: 'bananas',
358
+ symbols: %w(apples oranges)
359
+ )
360
+ end
361
+ assert_equal("Default 'bananas' is not a valid symbol for enum fruit",
362
+ exception.to_s)
363
+ end
364
+
281
365
  def test_empty_record
282
366
  schema = Avro::Schema.parse('{"type":"record", "name":"Empty"}')
283
367
  assert_empty(schema.fields)
@@ -455,5 +539,189 @@ class TestSchema < Test::Unit::TestCase
455
539
  end
456
540
  assert_equal('Error validating default for veggies: at . expected type null, got string with value "apple"',
457
541
  exception.to_s)
542
+ end
543
+
544
+ def test_bytes_decimal_to_include_precision_scale
545
+ schema = Avro::Schema.parse <<-SCHEMA
546
+ {
547
+ "type": "bytes",
548
+ "logicalType": "decimal",
549
+ "precision": 9,
550
+ "scale": 2
551
+ }
552
+ SCHEMA
553
+
554
+ schema_hash =
555
+ {
556
+ 'type' => 'bytes',
557
+ 'logicalType' => 'decimal',
558
+ 'precision' => 9,
559
+ 'scale' => 2
560
+ }
561
+
562
+ assert_equal schema_hash, schema.to_avro
563
+ end
564
+
565
+ def test_bytes_decimal_to_without_precision_scale
566
+ schema = Avro::Schema.parse <<-SCHEMA
567
+ {
568
+ "type": "bytes",
569
+ "logicalType": "decimal"
570
+ }
571
+ SCHEMA
572
+
573
+ schema_hash =
574
+ {
575
+ 'type' => 'bytes',
576
+ 'logicalType' => 'decimal'
577
+ }
578
+
579
+ assert_equal schema_hash, schema.to_avro
580
+ end
581
+
582
+ def test_bytes_schema
583
+ schema = Avro::Schema.parse <<-SCHEMA
584
+ {
585
+ "type": "bytes"
586
+ }
587
+ SCHEMA
588
+
589
+ schema_str = 'bytes'
590
+ assert_equal schema_str, schema.to_avro
591
+ end
592
+
593
+ def test_validate_duplicate_symbols
594
+ exception = assert_raise(Avro::SchemaParseError) do
595
+ hash_to_schema(
596
+ type: 'enum',
597
+ name: 'name',
598
+ symbols: ['erica', 'erica']
599
+ )
600
+ end
601
+ assert_equal(
602
+ 'Duplicate symbol: ["erica", "erica"]',
603
+ exception.to_s
604
+ )
605
+ end
606
+
607
+ def test_validate_enum_symbols
608
+ exception = assert_raise(Avro::SchemaParseError) do
609
+ hash_to_schema(
610
+ type: 'enum',
611
+ name: 'things',
612
+ symbols: ['good_symbol', '_GOOD_SYMBOL_2', '8ad_symbol', 'also-bad-symbol', '>=', '$']
613
+ )
614
+ end
615
+
616
+ assert_equal(
617
+ "Invalid symbols for things: 8ad_symbol, also-bad-symbol, >=, $ don't match #{Avro::Schema::EnumSchema::SYMBOL_REGEX.inspect}",
618
+ exception.to_s
619
+ )
620
+ end
621
+
622
+ def test_enum_symbol_validation_disabled_via_env
623
+ Avro.disable_enum_symbol_validation = nil
624
+ ENV['AVRO_DISABLE_ENUM_SYMBOL_VALIDATION'] = '1'
625
+
626
+ hash_to_schema(
627
+ type: 'enum',
628
+ name: 'things',
629
+ symbols: ['good_symbol', '_GOOD_SYMBOL_2', '8ad_symbol', 'also-bad-symbol', '>=', '$'],
630
+ )
631
+ ensure
632
+ ENV.delete('AVRO_DISABLE_ENUM_SYMBOL_VALIDATION')
633
+ Avro.disable_enum_symbol_validation = nil
634
+ end
635
+
636
+ def test_enum_symbol_validation_disabled_via_class_method
637
+ Avro.disable_enum_symbol_validation = true
638
+
639
+ hash_to_schema(
640
+ type: 'enum',
641
+ name: 'things',
642
+ symbols: ['good_symbol', '_GOOD_SYMBOL_2', '8ad_symbol', 'also-bad-symbol', '>=', '$'],
643
+ )
644
+ ensure
645
+ Avro.disable_enum_symbol_validation = nil
646
+ end
647
+
648
+ def test_validate_field_aliases
649
+ exception = assert_raise(Avro::SchemaParseError) do
650
+ hash_to_schema(
651
+ type: 'record',
652
+ name: 'fruits',
653
+ fields: [
654
+ { name: 'banana', type: 'string', aliases: 'banane' }
655
+ ]
656
+ )
657
+ end
658
+
659
+ assert_match(/Invalid aliases value "banane" for "string" banana/, exception.to_s)
660
+ end
661
+
662
+ def test_validate_same_alias_multiple_fields
663
+ exception = assert_raise(Avro::SchemaParseError) do
664
+ hash_to_schema(
665
+ type: 'record',
666
+ name: 'fruits',
667
+ fields: [
668
+ { name: 'banana', type: 'string', aliases: %w(yellow) },
669
+ { name: 'lemo', type: 'string', aliases: %w(yellow) }
670
+ ]
671
+ )
672
+ end
673
+
674
+ assert_match('Alias ["yellow"] already in use', exception.to_s)
675
+ end
676
+
677
+ def test_validate_repeated_aliases
678
+ assert_nothing_raised do
679
+ hash_to_schema(
680
+ type: 'record',
681
+ name: 'fruits',
682
+ fields: [
683
+ { name: 'banana', type: 'string', aliases: %w(yellow yellow) },
684
+ ]
685
+ )
686
+ end
687
+ end
688
+
689
+ def test_validate_record_aliases
690
+ exception = assert_raise(Avro::SchemaParseError) do
691
+ hash_to_schema(
692
+ type: 'record',
693
+ name: 'fruits',
694
+ aliases: ["foods", 2],
695
+ fields: []
696
+ )
697
+ end
698
+
699
+ assert_match(/Invalid aliases value \["foods", 2\] for record fruits/, exception.to_s)
700
+ end
701
+
702
+ def test_validate_enum_aliases
703
+ exception = assert_raise(Avro::SchemaParseError) do
704
+ hash_to_schema(
705
+ type: 'enum',
706
+ name: 'vowels',
707
+ aliases: [1, 2],
708
+ symbols: %w(A E I O U)
709
+ )
458
710
  end
711
+
712
+ assert_match(/Invalid aliases value \[1, 2\] for enum vowels/, exception.to_s)
713
+ end
714
+
715
+ def test_validate_fixed_aliases
716
+ exception = assert_raise(Avro::SchemaParseError) do
717
+ hash_to_schema(
718
+ type: 'fixed',
719
+ name: 'uuid',
720
+ size: 36,
721
+ aliases: "unique_id"
722
+ )
723
+ end
724
+
725
+ assert_match(/Invalid aliases value "unique_id" for fixed uuid/, exception.to_s)
726
+ end
459
727
  end