avro 1.9.0 → 1.10.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,7 +6,7 @@
6
6
  # "License"); you may not use this file except in compliance
7
7
  # with the License. You may obtain a copy of the License at
8
8
  #
9
- # http://www.apache.org/licenses/LICENSE-2.0
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
10
  #
11
11
  # Unless required by applicable law or agreed to in writing, software
12
12
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -6,7 +6,7 @@
6
6
  # "License"); you may not use this file except in compliance
7
7
  # with the License. You may obtain a copy of the License at
8
8
  #
9
- # http://www.apache.org/licenses/LICENSE-2.0
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
10
  #
11
11
  # Unless required by applicable law or agreed to in writing, software
12
12
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -22,16 +22,18 @@ module Avro
22
22
  LONG_RANGE = Schema::LONG_MIN_VALUE..Schema::LONG_MAX_VALUE
23
23
  COMPLEX_TYPES = [:array, :error, :map, :record, :request].freeze
24
24
  BOOLEAN_VALUES = [true, false].freeze
25
+ DEFAULT_VALIDATION_OPTIONS = { recursive: true, encoded: false, fail_on_extra_fields: false }.freeze
26
+ RECURSIVE_SIMPLE_VALIDATION_OPTIONS = { encoded: true }.freeze
27
+ RUBY_CLASS_TO_AVRO_TYPE = {
28
+ NilClass => 'null'.freeze,
29
+ String => 'string'.freeze,
30
+ Float => 'float'.freeze,
31
+ Hash => 'record'.freeze
32
+ }.freeze
25
33
 
26
34
  class Result
27
- attr_reader :errors
28
-
29
- def initialize
30
- @errors = []
31
- end
32
-
33
35
  def <<(error)
34
- @errors << error
36
+ errors << error
35
37
  end
36
38
 
37
39
  def add_error(path, message)
@@ -39,11 +41,16 @@ module Avro
39
41
  end
40
42
 
41
43
  def failure?
42
- @errors.any?
44
+ defined?(@errors) && errors.any?
43
45
  end
44
46
 
45
47
  def to_s
46
- errors.join("\n")
48
+ failure? ? errors.join("\n") : ''
49
+ end
50
+
51
+ def errors
52
+ # Use less memory for success results by lazily creating the errors array
53
+ @errors ||= []
47
54
  end
48
55
  end
49
56
 
@@ -63,12 +70,9 @@ module Avro
63
70
  TypeMismatchError = Class.new(ValidationError)
64
71
 
65
72
  class << self
66
- def validate!(expected_schema, logical_datum, options = { recursive: true, encoded: false, fail_on_extra_fields: false })
67
- options ||= {}
68
- options[:recursive] = true unless options.key?(:recursive)
69
-
73
+ def validate!(expected_schema, logical_datum, options = DEFAULT_VALIDATION_OPTIONS)
70
74
  result = Result.new
71
- if options[:recursive]
75
+ if options.fetch(:recursive, true)
72
76
  validate_recursive(expected_schema, logical_datum, ROOT_IDENTIFIER, result, options)
73
77
  else
74
78
  validate_simple(expected_schema, logical_datum, ROOT_IDENTIFIER, result, options)
@@ -79,10 +83,10 @@ module Avro
79
83
 
80
84
  private
81
85
 
82
- def validate_recursive(expected_schema, logical_datum, path, result, options = {})
86
+ def validate_recursive(expected_schema, logical_datum, path, result, options)
83
87
  datum = resolve_datum(expected_schema, logical_datum, options[:encoded])
84
88
 
85
- validate_simple(expected_schema, datum, path, result, encoded: true)
89
+ validate_simple(expected_schema, datum, path, result, RECURSIVE_SIMPLE_VALIDATION_OPTIONS)
86
90
 
87
91
  case expected_schema.type_sym
88
92
  when :array
@@ -95,7 +99,8 @@ module Avro
95
99
  fail TypeMismatchError unless datum.is_a?(Hash)
96
100
  expected_schema.fields.each do |field|
97
101
  deeper_path = deeper_path_for_hash(field.name, path)
98
- validate_recursive(field.type, datum[field.name], deeper_path, result, options)
102
+ nested_value = datum.key?(field.name) ? datum[field.name] : datum[field.name.to_sym]
103
+ validate_recursive(field.type, nested_value, deeper_path, result, options)
99
104
  end
100
105
  if options[:fail_on_extra_fields]
101
106
  datum_fields = datum.keys.map(&:to_s)
@@ -109,7 +114,7 @@ module Avro
109
114
  result.add_error(path, "expected type #{expected_schema.type_sym}, got #{actual_value_message(datum)}")
110
115
  end
111
116
 
112
- def validate_simple(expected_schema, logical_datum, path, result, options = {})
117
+ def validate_simple(expected_schema, logical_datum, path, result, options)
113
118
  datum = resolve_datum(expected_schema, logical_datum, options[:encoded])
114
119
  validate_type(expected_schema)
115
120
 
@@ -163,14 +168,14 @@ module Avro
163
168
  "expected enum with values #{symbols}, got #{actual_value_message(datum)}"
164
169
  end
165
170
 
166
- def validate_array(expected_schema, datum, path, result, options = {})
171
+ def validate_array(expected_schema, datum, path, result, options)
167
172
  fail TypeMismatchError unless datum.is_a?(Array)
168
173
  datum.each_with_index do |d, i|
169
- validate_recursive(expected_schema.items, d, path + "[#{i}]", result, options)
174
+ validate_recursive(expected_schema.items, d, "#{path}[#{i}]", result, options)
170
175
  end
171
176
  end
172
177
 
173
- def validate_map(expected_schema, datum, path, result, options = {})
178
+ def validate_map(expected_schema, datum, path, result, options)
174
179
  fail TypeMismatchError unless datum.is_a?(Hash)
175
180
  datum.keys.each do |k|
176
181
  result.add_error(path, "unexpected key type '#{ruby_to_avro_type(k.class)}' in map") unless k.is_a?(String)
@@ -181,7 +186,7 @@ module Avro
181
186
  end
182
187
  end
183
188
 
184
- def validate_union(expected_schema, datum, path, result, options = {})
189
+ def validate_union(expected_schema, datum, path, result, options)
185
190
  if expected_schema.schemas.size == 1
186
191
  validate_recursive(expected_schema.schemas.first, datum, path, result, options)
187
192
  return
@@ -201,6 +206,9 @@ module Avro
201
206
 
202
207
  def first_compatible_type(datum, expected_schema, path, failures, options = {})
203
208
  expected_schema.schemas.find do |schema|
209
+ # Avoid expensive validation if we're just validating a nil
210
+ next datum.nil? if schema.type_sym == :null
211
+
204
212
  result = Result.new
205
213
  validate_recursive(schema, datum, path, result, options)
206
214
  failures << { type: schema.type_sym, result: result } if result.failure?
@@ -209,7 +217,9 @@ module Avro
209
217
  end
210
218
 
211
219
  def deeper_path_for_hash(sub_key, path)
212
- "#{path}#{PATH_SEPARATOR}#{sub_key}".squeeze(PATH_SEPARATOR)
220
+ deeper_path = "#{path}#{PATH_SEPARATOR}#{sub_key}"
221
+ deeper_path.squeeze!(PATH_SEPARATOR)
222
+ deeper_path
213
223
  end
214
224
 
215
225
  def actual_value_message(value)
@@ -226,16 +236,11 @@ module Avro
226
236
  end
227
237
 
228
238
  def ruby_to_avro_type(ruby_class)
229
- {
230
- NilClass => 'null',
231
- String => 'string',
232
- Float => 'float',
233
- Hash => 'record'
234
- }.fetch(ruby_class, ruby_class)
239
+ RUBY_CLASS_TO_AVRO_TYPE.fetch(ruby_class, ruby_class)
235
240
  end
236
241
 
237
242
  def ruby_integer_to_avro_type(value)
238
- INT_RANGE.cover?(value) ? 'int' : 'long'
243
+ INT_RANGE.cover?(value) ? 'int'.freeze : 'long'.freeze
239
244
  end
240
245
  end
241
246
  end
data/test/case_finder.rb CHANGED
@@ -7,7 +7,7 @@
7
7
  # "License"); you may not use this file except in compliance
8
8
  # with the License. You may obtain a copy of the License at
9
9
  #
10
- # http://www.apache.org/licenses/LICENSE-2.0
10
+ # https://www.apache.org/licenses/LICENSE-2.0
11
11
  #
12
12
  # Unless required by applicable law or agreed to in writing,
13
13
  # software distributed under the License is distributed on an
@@ -44,13 +44,18 @@ class CaseFinder
44
44
  private
45
45
 
46
46
  def scan_case
47
- if id = @scanner.scan(/\/\/ \d+\n/)
47
+ if (id = @scanner.scan(/\/\/ \d+\n/))
48
48
  while @scanner.skip(/\/\/ .*\n/); end
49
49
 
50
50
  input = scan_input
51
51
  canonical = scan_canonical
52
52
  fingerprint = scan_fingerprint
53
-
53
+ if not fingerprint and @cases
54
+ fingerprint = @cases[-1].fingerprint
55
+ end
56
+ if fingerprint
57
+ fingerprint = fingerprint.to_i & 0xFFFF_FFFF_FFFF_FFFF
58
+ end
54
59
  Case.new(id, input, canonical, fingerprint)
55
60
  else
56
61
  @scanner.skip(/.*\n/)
@@ -61,7 +66,7 @@ class CaseFinder
61
66
  def scan_item(name)
62
67
  if @scanner.scan(/<<#{name}\n/)
63
68
  lines = []
64
- while line = @scanner.scan(/.+\n/)
69
+ while (line = @scanner.scan(/.+\n/))
65
70
  break if line.chomp == name
66
71
  lines << line
67
72
  end
data/test/random_data.rb CHANGED
@@ -1,4 +1,3 @@
1
- #!/usr/bin/env ruby
2
1
  # Licensed to the Apache Software Foundation (ASF) under one
3
2
  # or more contributor license agreements. See the NOTICE file
4
3
  # distributed with this work for additional information
@@ -7,7 +6,7 @@
7
6
  # "License"); you may not use this file except in compliance
8
7
  # with the License. You may obtain a copy of the License at
9
8
  #
10
- # http://www.apache.org/licenses/LICENSE-2.0
9
+ # https://www.apache.org/licenses/LICENSE-2.0
11
10
  #
12
11
  # Unless required by applicable law or agreed to in writing, software
13
12
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -85,8 +84,10 @@ class RandomData
85
84
  case schm.logical_type
86
85
  when 'date'
87
86
  Avro::LogicalTypes::IntDate.decode(rand_int)
88
- when 'timestamp-millis', 'timestamp-micros'
87
+ when 'timestamp-micros'
89
88
  Avro::LogicalTypes::TimestampMicros.decode(rand_long)
89
+ when 'timestamp-millis'
90
+ Avro::LogicalTypes::TimestampMillis.decode(rand_long)
90
91
  end
91
92
  end
92
93
 
@@ -7,7 +7,7 @@
7
7
  # "License"); you may not use this file except in compliance
8
8
  # with the License. You may obtain a copy of the License at
9
9
  #
10
- # http://www.apache.org/licenses/LICENSE-2.0
10
+ # https://www.apache.org/licenses/LICENSE-2.0
11
11
  #
12
12
  # Unless required by applicable law or agreed to in writing, software
13
13
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -7,7 +7,7 @@
7
7
  # "License"); you may not use this file except in compliance
8
8
  # with the License. You may obtain a copy of the License at
9
9
  #
10
- # http://www.apache.org/licenses/LICENSE-2.0
10
+ # https://www.apache.org/licenses/LICENSE-2.0
11
11
  #
12
12
  # Unless required by applicable law or agreed to in writing, software
13
13
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -6,7 +6,7 @@
6
6
  # "License"); you may not use this file except in compliance
7
7
  # with the License. You may obtain a copy of the License at
8
8
  #
9
- # http://www.apache.org/licenses/LICENSE-2.0
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
10
  #
11
11
  # Unless required by applicable law or agreed to in writing, software
12
12
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -7,7 +7,7 @@
7
7
  # "License"); you may not use this file except in compliance
8
8
  # with the License. You may obtain a copy of the License at
9
9
  #
10
- # http://www.apache.org/licenses/LICENSE-2.0
10
+ # https://www.apache.org/licenses/LICENSE-2.0
11
11
  #
12
12
  # Unless required by applicable law or agreed to in writing, software
13
13
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -7,7 +7,7 @@
7
7
  # "License"); you may not use this file except in compliance
8
8
  # with the License. You may obtain a copy of the License at
9
9
  #
10
- # http://www.apache.org/licenses/LICENSE-2.0
10
+ # https://www.apache.org/licenses/LICENSE-2.0
11
11
  #
12
12
  # Unless required by applicable law or agreed to in writing, software
13
13
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -180,6 +180,19 @@ JSON
180
180
  assert_equal records, ['a' * 10_000]
181
181
  end
182
182
 
183
+ def test_zstandard
184
+ Avro::DataFile.open('data.avr', 'w', '"string"', :zstandard) do |writer|
185
+ writer << 'a' * 10_000
186
+ end
187
+ assert(File.size('data.avr') < 600)
188
+
189
+ records = []
190
+ Avro::DataFile.open('data.avr') do |reader|
191
+ reader.each {|record| records << record }
192
+ end
193
+ assert_equal records, ['a' * 10_000]
194
+ end
195
+
183
196
  def test_append_to_deflated_file
184
197
  schema = Avro::Schema.parse('"string"')
185
198
  writer = Avro::IO::DatumWriter.new(schema)
@@ -6,7 +6,7 @@
6
6
  # "License"); you may not use this file except in compliance
7
7
  # with the License. You may obtain a copy of the License at
8
8
  #
9
- # http://www.apache.org/licenses/LICENSE-2.0
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
10
  #
11
11
  # Unless required by applicable law or agreed to in writing, software
12
12
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -34,4 +34,23 @@ class TestFingerprints < Test::Unit::TestCase
34
34
  assert_equal 28572620203319713300323544804233350633246234624932075150020181448463213378117,
35
35
  schema.sha256_fingerprint
36
36
  end
37
+
38
+ def test_crc_64_avro_fingerprint
39
+ schema = Avro::Schema.parse <<-SCHEMA
40
+ { "type": "int" }
41
+ SCHEMA
42
+
43
+ assert_equal 8247732601305521295, # hex: 0x7275d51a3f395c8f
44
+ schema.crc_64_avro_fingerprint
45
+ end
46
+
47
+ # This definitely belongs somewhere else
48
+ def test_single_object_encoding_header
49
+ schema = Avro::Schema.parse <<-SCHEMA
50
+ { "type": "int" }
51
+ SCHEMA
52
+
53
+ assert_equal ["c3", "01", "8f", "5c", "39", "3f", "1a", "D5", "75", "72"].map{|e| e.to_i(16) },
54
+ schema.single_object_encoding_header
55
+ end
37
56
  end
data/test/test_help.rb CHANGED
@@ -6,7 +6,7 @@
6
6
  # "License"); you may not use this file except in compliance
7
7
  # with the License. You may obtain a copy of the License at
8
8
  #
9
- # http://www.apache.org/licenses/LICENSE-2.0
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
10
  #
11
11
  # Unless required by applicable law or agreed to in writing, software
12
12
  # distributed under the License is distributed on an "AS IS" BASIS,
data/test/test_io.rb CHANGED
@@ -6,7 +6,7 @@
6
6
  # "License"); you may not use this file except in compliance
7
7
  # with the License. You may obtain a copy of the License at
8
8
  #
9
- # http://www.apache.org/licenses/LICENSE-2.0
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
10
  #
11
11
  # Unless required by applicable law or agreed to in writing, software
12
12
  # distributed under the License is distributed on an "AS IS" BASIS,
@@ -90,7 +90,10 @@ EOS
90
90
  "name": "Test",
91
91
  "fields": [{"name": "ts",
92
92
  "type": {"type": "long",
93
- "logicalType": "timestamp-micros"}}]}
93
+ "logicalType": "timestamp-micros"}},
94
+ {"name": "ts2",
95
+ "type": {"type": "long",
96
+ "logicalType": "timestamp-millis"}}]}
94
97
  EOS
95
98
  check(record_schema)
96
99
  end
@@ -112,6 +115,13 @@ EOS
112
115
  check_default(enum_schema, '"B"', "B")
113
116
  end
114
117
 
118
+ def test_enum_with_default
119
+ enum_schema = '{"type": "enum", "name": "Test", "symbols": ["A", "B"], "default": "A"}'
120
+ check(enum_schema)
121
+ # Field default is used for missing field.
122
+ check_default(enum_schema, '"B"', "B")
123
+ end
124
+
115
125
  def test_recursive
116
126
  recursive_schema = <<EOS
117
127
  {"type": "record",
@@ -158,6 +168,17 @@ EOS
158
168
  check_default(fixed_schema, '"a"', "a")
159
169
  end
160
170
 
171
+ def test_record_variable_key_types
172
+ datum = { sym: "foo", "str"=>"bar"}
173
+ ret_val = { "sym"=> "foo", "str"=>"bar"}
174
+ schema = Schema.parse('{"type":"record", "name":"rec", "fields":[{"name":"sym", "type":"string"}, {"name":"str", "type":"string"}]}')
175
+
176
+ writer, _encoder, _datum_writer = write_datum(datum, schema)
177
+
178
+ ret_datum = read_datum(writer, schema)
179
+ assert_equal ret_datum, ret_val
180
+ end
181
+
161
182
  def test_record_with_nil
162
183
  schema = Avro::Schema.parse('{"type":"record", "name":"rec", "fields":[{"type":"int", "name":"i"}]}')
163
184
  assert_raise(Avro::IO::AvroTypeError) do
@@ -390,6 +411,50 @@ EOS
390
411
  assert_equal(incorrect, 0)
391
412
  end
392
413
 
414
+ def test_unknown_enum_symbol
415
+ writers_schema = Avro::Schema.parse(<<-SCHEMA)
416
+ {
417
+ "type": "enum",
418
+ "name": "test",
419
+ "symbols": ["B", "C"]
420
+ }
421
+ SCHEMA
422
+ readers_schema = Avro::Schema.parse(<<-SCHEMA)
423
+ {
424
+ "type": "enum",
425
+ "name": "test",
426
+ "symbols": ["A", "B"]
427
+ }
428
+ SCHEMA
429
+ datum_to_write = "C"
430
+ writer, * = write_datum(datum_to_write, writers_schema)
431
+ datum_read = read_datum(writer, writers_schema, readers_schema)
432
+ # Ruby implementation did not follow the spec and returns the writer's symbol here
433
+ assert_equal(datum_read, datum_to_write)
434
+ end
435
+
436
+ def test_unknown_enum_symbol_with_enum_default
437
+ writers_schema = Avro::Schema.parse(<<-SCHEMA)
438
+ {
439
+ "type": "enum",
440
+ "name": "test",
441
+ "symbols": ["B", "C"]
442
+ }
443
+ SCHEMA
444
+ readers_schema = Avro::Schema.parse(<<-SCHEMA)
445
+ {
446
+ "type": "enum",
447
+ "name": "test",
448
+ "symbols": ["A", "B", "UNKNOWN"],
449
+ "default": "UNKNOWN"
450
+ }
451
+ SCHEMA
452
+ datum_to_write = "C"
453
+ writer, * = write_datum(datum_to_write, writers_schema)
454
+ datum_read = read_datum(writer, writers_schema, readers_schema)
455
+ assert_equal(datum_read, "UNKNOWN")
456
+ end
457
+
393
458
  def test_array_schema_promotion
394
459
  writers_schema = Avro::Schema.parse('{"type":"array", "items":"int"}')
395
460
  readers_schema = Avro::Schema.parse('{"type":"array", "items":"long"}')
@@ -408,6 +473,22 @@ EOS
408
473
  assert_equal(datum_read, datum_to_write)
409
474
  end
410
475
 
476
+ def test_aliased
477
+ writers_schema = Avro::Schema.parse(<<-SCHEMA)
478
+ {"type":"record", "name":"Rec1", "fields":[
479
+ {"name":"field1", "type":"int"}
480
+ ]}
481
+ SCHEMA
482
+ readers_schema = Avro::Schema.parse(<<-SCHEMA)
483
+ {"type":"record", "name":"Rec2", "aliases":["Rec1"], "fields":[
484
+ {"name":"field2", "aliases":["field1"], "type":"int"}
485
+ ]}
486
+ SCHEMA
487
+ writer, * = write_datum({ 'field1' => 1 }, writers_schema)
488
+ datum_read = read_datum(writer, writers_schema, readers_schema)
489
+ assert_equal(datum_read, { 'field2' => 1 })
490
+ end
491
+
411
492
  def test_snappy_backward_compat
412
493
  # a snappy-compressed block payload without the checksum
413
494
  # this has no back-references, just one literal so the last 9