avro 1.9.1 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,10 +28,8 @@ module Avro
28
28
  end
29
29
 
30
30
  # Perform a basic check that a datum written with the writers_schema could
31
- # be read using the readers_schema. This check only includes matching the types,
32
- # including schema promotion, and matching the full name for named types.
33
- # Aliases for named types are not supported here, and the ruby implementation
34
- # of Avro in general does not include support for aliases.
31
+ # be read using the readers_schema. This check includes matching the types,
32
+ # including schema promotion, and matching the full name (including aliases) for named types.
35
33
  def self.match_schemas(writers_schema, readers_schema)
36
34
  w_type = writers_schema.type_sym
37
35
  r_type = readers_schema.type_sym
@@ -46,16 +44,16 @@ module Avro
46
44
 
47
45
  case r_type
48
46
  when :record
49
- return writers_schema.fullname == readers_schema.fullname
47
+ return readers_schema.match_fullname?(writers_schema.fullname)
50
48
  when :error
51
- return writers_schema.fullname == readers_schema.fullname
49
+ return readers_schema.match_fullname?(writers_schema.fullname)
52
50
  when :request
53
51
  return true
54
52
  when :fixed
55
- return writers_schema.fullname == readers_schema.fullname &&
53
+ return readers_schema.match_fullname?(writers_schema.fullname) &&
56
54
  writers_schema.size == readers_schema.size
57
55
  when :enum
58
- return writers_schema.fullname == readers_schema.fullname
56
+ return readers_schema.match_fullname?(writers_schema.fullname)
59
57
  when :map
60
58
  return match_schemas(writers_schema.values, readers_schema.values)
61
59
  when :array
@@ -118,8 +116,8 @@ module Avro
118
116
  when :union
119
117
  match_union_schemas(writers_schema, readers_schema)
120
118
  when :enum
121
- # reader's symbols must contain all writer's symbols
122
- (writers_schema.symbols - readers_schema.symbols).empty?
119
+ # reader's symbols must contain all writer's symbols or reader has default
120
+ (writers_schema.symbols - readers_schema.symbols).empty? || !readers_schema.default.nil?
123
121
  else
124
122
  if writers_schema.type_sym == :union && writers_schema.schemas.size == 1
125
123
  full_match_schemas(writers_schema.schemas.first, readers_schema)
@@ -148,7 +146,14 @@ module Avro
148
146
  if writer_fields_hash.key?(field.name)
149
147
  return false unless full_match_schemas(writer_fields_hash[field.name].type, field.type)
150
148
  else
151
- return false unless field.default?
149
+ names = writer_fields_hash.keys & field.alias_names
150
+ if names.size > 1
151
+ return false
152
+ elsif names.size == 1
153
+ return false unless full_match_schemas(writer_fields_hash[names.first].type, field.type)
154
+ else
155
+ return false unless field.default?
156
+ end
152
157
  end
153
158
  end
154
159
 
@@ -44,13 +44,18 @@ class CaseFinder
44
44
  private
45
45
 
46
46
  def scan_case
47
- if id = @scanner.scan(/\/\/ \d+\n/)
47
+ if (id = @scanner.scan(/\/\/ \d+\n/))
48
48
  while @scanner.skip(/\/\/ .*\n/); end
49
49
 
50
50
  input = scan_input
51
51
  canonical = scan_canonical
52
52
  fingerprint = scan_fingerprint
53
-
53
+ if not fingerprint and @cases
54
+ fingerprint = @cases[-1].fingerprint
55
+ end
56
+ if fingerprint
57
+ fingerprint = fingerprint.to_i & 0xFFFF_FFFF_FFFF_FFFF
58
+ end
54
59
  Case.new(id, input, canonical, fingerprint)
55
60
  else
56
61
  @scanner.skip(/.*\n/)
@@ -61,7 +66,7 @@ class CaseFinder
61
66
  def scan_item(name)
62
67
  if @scanner.scan(/<<#{name}\n/)
63
68
  lines = []
64
- while line = @scanner.scan(/.+\n/)
69
+ while (line = @scanner.scan(/.+\n/))
65
70
  break if line.chomp == name
66
71
  lines << line
67
72
  end
@@ -1,4 +1,3 @@
1
- #!/usr/bin/env ruby
2
1
  # Licensed to the Apache Software Foundation (ASF) under one
3
2
  # or more contributor license agreements. See the NOTICE file
4
3
  # distributed with this work for additional information
@@ -85,8 +84,10 @@ class RandomData
85
84
  case schm.logical_type
86
85
  when 'date'
87
86
  Avro::LogicalTypes::IntDate.decode(rand_int)
88
- when 'timestamp-millis', 'timestamp-micros'
87
+ when 'timestamp-micros'
89
88
  Avro::LogicalTypes::TimestampMicros.decode(rand_long)
89
+ when 'timestamp-millis'
90
+ Avro::LogicalTypes::TimestampMillis.decode(rand_long)
90
91
  end
91
92
  end
92
93
 
File without changes
File without changes
File without changes
File without changes
@@ -180,6 +180,19 @@ JSON
180
180
  assert_equal records, ['a' * 10_000]
181
181
  end
182
182
 
183
+ def test_zstandard
184
+ Avro::DataFile.open('data.avr', 'w', '"string"', :zstandard) do |writer|
185
+ writer << 'a' * 10_000
186
+ end
187
+ assert(File.size('data.avr') < 600)
188
+
189
+ records = []
190
+ Avro::DataFile.open('data.avr') do |reader|
191
+ reader.each {|record| records << record }
192
+ end
193
+ assert_equal records, ['a' * 10_000]
194
+ end
195
+
183
196
  def test_append_to_deflated_file
184
197
  schema = Avro::Schema.parse('"string"')
185
198
  writer = Avro::IO::DatumWriter.new(schema)
@@ -34,4 +34,23 @@ class TestFingerprints < Test::Unit::TestCase
34
34
  assert_equal 28572620203319713300323544804233350633246234624932075150020181448463213378117,
35
35
  schema.sha256_fingerprint
36
36
  end
37
+
38
+ def test_crc_64_avro_fingerprint
39
+ schema = Avro::Schema.parse <<-SCHEMA
40
+ { "type": "int" }
41
+ SCHEMA
42
+
43
+ assert_equal 8247732601305521295, # hex: 0x7275d51a3f395c8f
44
+ schema.crc_64_avro_fingerprint
45
+ end
46
+
47
+ # This definitely belongs somewhere else
48
+ def test_single_object_encoding_header
49
+ schema = Avro::Schema.parse <<-SCHEMA
50
+ { "type": "int" }
51
+ SCHEMA
52
+
53
+ assert_equal ["c3", "01", "72", "75", "d5", "1a", "3f", "39", "5c", "8f"].map{|e| e.to_i(16) },
54
+ schema.single_object_encoding_header
55
+ end
37
56
  end
@@ -90,7 +90,10 @@ EOS
90
90
  "name": "Test",
91
91
  "fields": [{"name": "ts",
92
92
  "type": {"type": "long",
93
- "logicalType": "timestamp-micros"}}]}
93
+ "logicalType": "timestamp-micros"}},
94
+ {"name": "ts2",
95
+ "type": {"type": "long",
96
+ "logicalType": "timestamp-millis"}}]}
94
97
  EOS
95
98
  check(record_schema)
96
99
  end
@@ -112,6 +115,13 @@ EOS
112
115
  check_default(enum_schema, '"B"', "B")
113
116
  end
114
117
 
118
+ def test_enum_with_default
119
+ enum_schema = '{"type": "enum", "name": "Test", "symbols": ["A", "B"], "default": "A"}'
120
+ check(enum_schema)
121
+ # Field default is used for missing field.
122
+ check_default(enum_schema, '"B"', "B")
123
+ end
124
+
115
125
  def test_recursive
116
126
  recursive_schema = <<EOS
117
127
  {"type": "record",
@@ -158,6 +168,17 @@ EOS
158
168
  check_default(fixed_schema, '"a"', "a")
159
169
  end
160
170
 
171
+ def test_record_variable_key_types
172
+ datum = { sym: "foo", "str"=>"bar"}
173
+ ret_val = { "sym"=> "foo", "str"=>"bar"}
174
+ schema = Schema.parse('{"type":"record", "name":"rec", "fields":[{"name":"sym", "type":"string"}, {"name":"str", "type":"string"}]}')
175
+
176
+ writer, _encoder, _datum_writer = write_datum(datum, schema)
177
+
178
+ ret_datum = read_datum(writer, schema)
179
+ assert_equal ret_datum, ret_val
180
+ end
181
+
161
182
  def test_record_with_nil
162
183
  schema = Avro::Schema.parse('{"type":"record", "name":"rec", "fields":[{"type":"int", "name":"i"}]}')
163
184
  assert_raise(Avro::IO::AvroTypeError) do
@@ -390,6 +411,50 @@ EOS
390
411
  assert_equal(incorrect, 0)
391
412
  end
392
413
 
414
+ def test_unknown_enum_symbol
415
+ writers_schema = Avro::Schema.parse(<<-SCHEMA)
416
+ {
417
+ "type": "enum",
418
+ "name": "test",
419
+ "symbols": ["B", "C"]
420
+ }
421
+ SCHEMA
422
+ readers_schema = Avro::Schema.parse(<<-SCHEMA)
423
+ {
424
+ "type": "enum",
425
+ "name": "test",
426
+ "symbols": ["A", "B"]
427
+ }
428
+ SCHEMA
429
+ datum_to_write = "C"
430
+ writer, * = write_datum(datum_to_write, writers_schema)
431
+ datum_read = read_datum(writer, writers_schema, readers_schema)
432
+ # Ruby implementation did not follow the spec and returns the writer's symbol here
433
+ assert_equal(datum_read, datum_to_write)
434
+ end
435
+
436
+ def test_unknown_enum_symbol_with_enum_default
437
+ writers_schema = Avro::Schema.parse(<<-SCHEMA)
438
+ {
439
+ "type": "enum",
440
+ "name": "test",
441
+ "symbols": ["B", "C"]
442
+ }
443
+ SCHEMA
444
+ readers_schema = Avro::Schema.parse(<<-SCHEMA)
445
+ {
446
+ "type": "enum",
447
+ "name": "test",
448
+ "symbols": ["A", "B", "UNKNOWN"],
449
+ "default": "UNKNOWN"
450
+ }
451
+ SCHEMA
452
+ datum_to_write = "C"
453
+ writer, * = write_datum(datum_to_write, writers_schema)
454
+ datum_read = read_datum(writer, writers_schema, readers_schema)
455
+ assert_equal(datum_read, "UNKNOWN")
456
+ end
457
+
393
458
  def test_array_schema_promotion
394
459
  writers_schema = Avro::Schema.parse('{"type":"array", "items":"int"}')
395
460
  readers_schema = Avro::Schema.parse('{"type":"array", "items":"long"}')
@@ -408,6 +473,22 @@ EOS
408
473
  assert_equal(datum_read, datum_to_write)
409
474
  end
410
475
 
476
+ def test_aliased
477
+ writers_schema = Avro::Schema.parse(<<-SCHEMA)
478
+ {"type":"record", "name":"Rec1", "fields":[
479
+ {"name":"field1", "type":"int"}
480
+ ]}
481
+ SCHEMA
482
+ readers_schema = Avro::Schema.parse(<<-SCHEMA)
483
+ {"type":"record", "name":"Rec2", "aliases":["Rec1"], "fields":[
484
+ {"name":"field2", "aliases":["field1"], "type":"int"}
485
+ ]}
486
+ SCHEMA
487
+ writer, * = write_datum({ 'field1' => 1 }, writers_schema)
488
+ datum_read = read_datum(writer, writers_schema, readers_schema)
489
+ assert_equal(datum_read, { 'field2' => 1 })
490
+ end
491
+
411
492
  def test_snappy_backward_compat
412
493
  # a snappy-compressed block payload without the checksum
413
494
  # this has no back-references, just one literal so the last 9
@@ -151,6 +151,20 @@ class TestSchema < Test::Unit::TestCase
151
151
  }
152
152
  end
153
153
 
154
+ def test_to_avro_includes_aliases
155
+ hash = {
156
+ 'type' => 'record',
157
+ 'name' => 'test_record',
158
+ 'aliases' => %w(alt_record),
159
+ 'fields' => [
160
+ { 'name' => 'f', 'type' => { 'type' => 'fixed', 'size' => 2, 'name' => 'test_fixed', 'aliases' => %w(alt_fixed) } },
161
+ { 'name' => 'e', 'type' => { 'type' => 'enum', 'symbols' => %w(A B), 'name' => 'test_enum', 'aliases' => %w(alt_enum) } }
162
+ ]
163
+ }
164
+ schema = hash_to_schema(hash)
165
+ assert_equal(schema.to_avro, hash)
166
+ end
167
+
154
168
  def test_unknown_named_type
155
169
  error = assert_raise Avro::UnknownSchemaError do
156
170
  Avro::Schema.parse <<-SCHEMA
@@ -163,6 +177,42 @@ class TestSchema < Test::Unit::TestCase
163
177
  assert_equal '"MissingType" is not a schema we know about.', error.message
164
178
  end
165
179
 
180
+ def test_invalid_name
181
+ error = assert_raise Avro::SchemaParseError do
182
+ Avro::Schema.parse <<-SCHEMA
183
+ {"type": "record", "name": "my-invalid-name", "fields": [
184
+ {"name": "id", "type": "int"}
185
+ ]}
186
+ SCHEMA
187
+ end
188
+
189
+ assert_equal "Name my-invalid-name is invalid for type record!", error.message
190
+ end
191
+
192
+ def test_invalid_name_with_two_periods
193
+ error = assert_raise Avro::SchemaParseError do
194
+ Avro::Schema.parse <<-SCHEMA
195
+ {"type": "record", "name": "my..invalid.name", "fields": [
196
+ {"name": "id", "type": "int"}
197
+ ]}
198
+ SCHEMA
199
+ end
200
+
201
+ assert_equal "Name my..invalid.name is invalid for type record!", error.message
202
+ end
203
+
204
+ def test_invalid_name_with_validation_disabled
205
+ Avro.disable_schema_name_validation = true
206
+ assert_nothing_raised do
207
+ Avro::Schema.parse <<-SCHEMA
208
+ {"type": "record", "name": "my-invalid-name", "fields": [
209
+ {"name": "id", "type": "int"}
210
+ ]}
211
+ SCHEMA
212
+ end
213
+ Avro.disable_schema_name_validation = false
214
+ end
215
+
166
216
  def test_to_avro_handles_falsey_defaults
167
217
  schema = Avro::Schema.parse <<-SCHEMA
168
218
  {"type": "record", "name": "Record", "namespace": "my.name.space",
@@ -278,6 +328,40 @@ class TestSchema < Test::Unit::TestCase
278
328
  assert_equal enum_schema_hash, enum_schema_json.to_avro
279
329
  end
280
330
 
331
+ def test_enum_default_attribute
332
+ enum_schema = Avro::Schema.parse <<-SCHEMA
333
+ {
334
+ "type": "enum",
335
+ "name": "fruit",
336
+ "default": "apples",
337
+ "symbols": ["apples", "oranges"]
338
+ }
339
+ SCHEMA
340
+
341
+ enum_schema_hash = {
342
+ 'type' => 'enum',
343
+ 'name' => 'fruit',
344
+ 'default' => 'apples',
345
+ 'symbols' => %w(apples oranges)
346
+ }
347
+
348
+ assert_equal(enum_schema.default, "apples")
349
+ assert_equal(enum_schema_hash, enum_schema.to_avro)
350
+ end
351
+
352
+ def test_validate_enum_default
353
+ exception = assert_raise(Avro::SchemaParseError) do
354
+ hash_to_schema(
355
+ type: 'enum',
356
+ name: 'fruit',
357
+ default: 'bananas',
358
+ symbols: %w(apples oranges)
359
+ )
360
+ end
361
+ assert_equal("Default 'bananas' is not a valid symbol for enum fruit",
362
+ exception.to_s)
363
+ end
364
+
281
365
  def test_empty_record
282
366
  schema = Avro::Schema.parse('{"type":"record", "name":"Empty"}')
283
367
  assert_empty(schema.fields)
@@ -455,5 +539,189 @@ class TestSchema < Test::Unit::TestCase
455
539
  end
456
540
  assert_equal('Error validating default for veggies: at . expected type null, got string with value "apple"',
457
541
  exception.to_s)
542
+ end
543
+
544
+ def test_bytes_decimal_to_include_precision_scale
545
+ schema = Avro::Schema.parse <<-SCHEMA
546
+ {
547
+ "type": "bytes",
548
+ "logicalType": "decimal",
549
+ "precision": 9,
550
+ "scale": 2
551
+ }
552
+ SCHEMA
553
+
554
+ schema_hash =
555
+ {
556
+ 'type' => 'bytes',
557
+ 'logicalType' => 'decimal',
558
+ 'precision' => 9,
559
+ 'scale' => 2
560
+ }
561
+
562
+ assert_equal schema_hash, schema.to_avro
563
+ end
564
+
565
+ def test_bytes_decimal_to_without_precision_scale
566
+ schema = Avro::Schema.parse <<-SCHEMA
567
+ {
568
+ "type": "bytes",
569
+ "logicalType": "decimal"
570
+ }
571
+ SCHEMA
572
+
573
+ schema_hash =
574
+ {
575
+ 'type' => 'bytes',
576
+ 'logicalType' => 'decimal'
577
+ }
578
+
579
+ assert_equal schema_hash, schema.to_avro
580
+ end
581
+
582
+ def test_bytes_schema
583
+ schema = Avro::Schema.parse <<-SCHEMA
584
+ {
585
+ "type": "bytes"
586
+ }
587
+ SCHEMA
588
+
589
+ schema_str = 'bytes'
590
+ assert_equal schema_str, schema.to_avro
591
+ end
592
+
593
+ def test_validate_duplicate_symbols
594
+ exception = assert_raise(Avro::SchemaParseError) do
595
+ hash_to_schema(
596
+ type: 'enum',
597
+ name: 'name',
598
+ symbols: ['erica', 'erica']
599
+ )
600
+ end
601
+ assert_equal(
602
+ 'Duplicate symbol: ["erica", "erica"]',
603
+ exception.to_s
604
+ )
605
+ end
606
+
607
+ def test_validate_enum_symbols
608
+ exception = assert_raise(Avro::SchemaParseError) do
609
+ hash_to_schema(
610
+ type: 'enum',
611
+ name: 'things',
612
+ symbols: ['good_symbol', '_GOOD_SYMBOL_2', '8ad_symbol', 'also-bad-symbol', '>=', '$']
613
+ )
614
+ end
615
+
616
+ assert_equal(
617
+ "Invalid symbols for things: 8ad_symbol, also-bad-symbol, >=, $ don't match #{Avro::Schema::EnumSchema::SYMBOL_REGEX.inspect}",
618
+ exception.to_s
619
+ )
620
+ end
621
+
622
+ def test_enum_symbol_validation_disabled_via_env
623
+ Avro.disable_enum_symbol_validation = nil
624
+ ENV['AVRO_DISABLE_ENUM_SYMBOL_VALIDATION'] = '1'
625
+
626
+ hash_to_schema(
627
+ type: 'enum',
628
+ name: 'things',
629
+ symbols: ['good_symbol', '_GOOD_SYMBOL_2', '8ad_symbol', 'also-bad-symbol', '>=', '$'],
630
+ )
631
+ ensure
632
+ ENV.delete('AVRO_DISABLE_ENUM_SYMBOL_VALIDATION')
633
+ Avro.disable_enum_symbol_validation = nil
634
+ end
635
+
636
+ def test_enum_symbol_validation_disabled_via_class_method
637
+ Avro.disable_enum_symbol_validation = true
638
+
639
+ hash_to_schema(
640
+ type: 'enum',
641
+ name: 'things',
642
+ symbols: ['good_symbol', '_GOOD_SYMBOL_2', '8ad_symbol', 'also-bad-symbol', '>=', '$'],
643
+ )
644
+ ensure
645
+ Avro.disable_enum_symbol_validation = nil
646
+ end
647
+
648
+ def test_validate_field_aliases
649
+ exception = assert_raise(Avro::SchemaParseError) do
650
+ hash_to_schema(
651
+ type: 'record',
652
+ name: 'fruits',
653
+ fields: [
654
+ { name: 'banana', type: 'string', aliases: 'banane' }
655
+ ]
656
+ )
657
+ end
658
+
659
+ assert_match(/Invalid aliases value "banane" for "string" banana/, exception.to_s)
660
+ end
661
+
662
+ def test_validate_same_alias_multiple_fields
663
+ exception = assert_raise(Avro::SchemaParseError) do
664
+ hash_to_schema(
665
+ type: 'record',
666
+ name: 'fruits',
667
+ fields: [
668
+ { name: 'banana', type: 'string', aliases: %w(yellow) },
669
+ { name: 'lemo', type: 'string', aliases: %w(yellow) }
670
+ ]
671
+ )
672
+ end
673
+
674
+ assert_match('Alias ["yellow"] already in use', exception.to_s)
675
+ end
676
+
677
+ def test_validate_repeated_aliases
678
+ assert_nothing_raised do
679
+ hash_to_schema(
680
+ type: 'record',
681
+ name: 'fruits',
682
+ fields: [
683
+ { name: 'banana', type: 'string', aliases: %w(yellow yellow) },
684
+ ]
685
+ )
686
+ end
687
+ end
688
+
689
+ def test_validate_record_aliases
690
+ exception = assert_raise(Avro::SchemaParseError) do
691
+ hash_to_schema(
692
+ type: 'record',
693
+ name: 'fruits',
694
+ aliases: ["foods", 2],
695
+ fields: []
696
+ )
697
+ end
698
+
699
+ assert_match(/Invalid aliases value \["foods", 2\] for record fruits/, exception.to_s)
700
+ end
701
+
702
+ def test_validate_enum_aliases
703
+ exception = assert_raise(Avro::SchemaParseError) do
704
+ hash_to_schema(
705
+ type: 'enum',
706
+ name: 'vowels',
707
+ aliases: [1, 2],
708
+ symbols: %w(A E I O U)
709
+ )
458
710
  end
711
+
712
+ assert_match(/Invalid aliases value \[1, 2\] for enum vowels/, exception.to_s)
713
+ end
714
+
715
+ def test_validate_fixed_aliases
716
+ exception = assert_raise(Avro::SchemaParseError) do
717
+ hash_to_schema(
718
+ type: 'fixed',
719
+ name: 'uuid',
720
+ size: 36,
721
+ aliases: "unique_id"
722
+ )
723
+ end
724
+
725
+ assert_match(/Invalid aliases value "unique_id" for fixed uuid/, exception.to_s)
726
+ end
459
727
  end