avro 1.9.2 → 1.10.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5cff0cff918530cc4b15352758c668a08cb78aa0
4
- data.tar.gz: 2e67c0829395100b1f98b58956057d35968e453b
3
+ metadata.gz: eab84afb7b30d99dee71a7e360dba2a36957500b
4
+ data.tar.gz: 8211aea9ad1e00a4f94bb3edc2aeaf9ab06edffa
5
5
  SHA512:
6
- metadata.gz: fbb6b03c4b320ca6be7db6114367e1e93b711d4e9b3c903b54ca56c32197d2d6aae5bc48b5ff0fffc6204139a715e8bb786c14286235dc01456002738c0f49a6
7
- data.tar.gz: a9cde162a3f19c3c8a873e8ced87970aa4e963d9c0ec5ea8b8cd110c8bef90013e94c31f3274448003bc9c3857b4ff357ddb80af4cc7a168a211a384557195fa
6
+ metadata.gz: c51889575e3cd689f7288bdd7a0d8250a8147a8eaf568805f726991f9f115c28f8ca9fa396262c687021859cbd9a26c5f196b7f73a174a2da9dd7f22764fe8d8
7
+ data.tar.gz: 0d936920e8b8ea8edd70583164bd519ac46108de4ab9f7d359d6edb7e8d1941eb3bad06ed8462cdc6bac2d3152c16faba49833d115cc6240e26d3cf51f746c08
data/Rakefile CHANGED
@@ -23,8 +23,8 @@ Echoe.new('avro', VERSION) do |p|
23
23
  p.summary = "Apache Avro for Ruby"
24
24
  p.description = "Avro is a data serialization and RPC format"
25
25
  p.url = "https://avro.apache.org/"
26
- p.runtime_dependencies = %w[multi_json]
27
- p.licenses = ["Apache License 2.0 (Apache-2.0)"]
26
+ p.runtime_dependencies = ["multi_json ~>1"]
27
+ p.licenses = ["Apache-2.0"]
28
28
  end
29
29
 
30
30
  t = Rake::TestTask.new(:interop)
@@ -38,17 +38,13 @@ task :generate_interop do
38
38
 
39
39
  schema = Avro::Schema.parse(File.read(SCHEMAS + '/interop.avsc'))
40
40
  r = RandomData.new(schema, ENV['SEED'])
41
- f = File.open(BUILD + '/interop/data/ruby.avro', 'w')
42
- writer = Avro::DataFile::Writer.new(f, Avro::IO::DatumWriter.new(schema), schema)
43
- begin
44
- writer << r.next
45
- writer << r.next
46
- ensure
47
- writer.close
48
- end
49
-
50
- Avro::DataFile.open(BUILD + '/interop/data/ruby_deflate.avro', 'w', schema.to_s, :deflate) do |writer|
51
- 20.times { writer << r.next }
41
+ Avro::DataFile.codecs.each do |name, codec|
42
+ next unless codec
43
+ filename = name == 'null' ? 'ruby.avro' : "ruby_#{name}.avro"
44
+ path = File.join(BUILD, 'interop/data', filename)
45
+ Avro::DataFile.open(path, 'w', schema.to_s, name) do |writer|
46
+ writer << r.next
47
+ end
52
48
  end
53
49
  end
54
50
 
@@ -1,35 +1,35 @@
1
1
  # -*- encoding: utf-8 -*-
2
- # stub: avro 1.9.2 ruby lib
2
+ # stub: avro 1.10.0 ruby lib
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "avro".freeze
6
- s.version = "1.9.2"
6
+ s.version = "1.10.0"
7
7
 
8
8
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2".freeze) if s.respond_to? :required_rubygems_version=
9
9
  s.require_paths = ["lib".freeze]
10
10
  s.authors = ["Apache Software Foundation".freeze]
11
- s.date = "2020-02-07"
11
+ s.date = "2020-06-22"
12
12
  s.description = "Avro is a data serialization and RPC format".freeze
13
13
  s.email = "dev@avro.apache.org".freeze
14
14
  s.extra_rdoc_files = ["CHANGELOG".freeze, "LICENSE".freeze, "lib/avro.rb".freeze, "lib/avro/data_file.rb".freeze, "lib/avro/io.rb".freeze, "lib/avro/ipc.rb".freeze, "lib/avro/logical_types.rb".freeze, "lib/avro/protocol.rb".freeze, "lib/avro/schema.rb".freeze, "lib/avro/schema_compatibility.rb".freeze, "lib/avro/schema_normalization.rb".freeze, "lib/avro/schema_validator.rb".freeze]
15
15
  s.files = ["CHANGELOG".freeze, "LICENSE".freeze, "Manifest".freeze, "NOTICE".freeze, "Rakefile".freeze, "avro.gemspec".freeze, "interop/test_interop.rb".freeze, "lib/avro.rb".freeze, "lib/avro/data_file.rb".freeze, "lib/avro/io.rb".freeze, "lib/avro/ipc.rb".freeze, "lib/avro/logical_types.rb".freeze, "lib/avro/protocol.rb".freeze, "lib/avro/schema.rb".freeze, "lib/avro/schema_compatibility.rb".freeze, "lib/avro/schema_normalization.rb".freeze, "lib/avro/schema_validator.rb".freeze, "test/case_finder.rb".freeze, "test/random_data.rb".freeze, "test/sample_ipc_client.rb".freeze, "test/sample_ipc_http_client.rb".freeze, "test/sample_ipc_http_server.rb".freeze, "test/sample_ipc_server.rb".freeze, "test/test_datafile.rb".freeze, "test/test_fingerprints.rb".freeze, "test/test_help.rb".freeze, "test/test_io.rb".freeze, "test/test_logical_types.rb".freeze, "test/test_protocol.rb".freeze, "test/test_schema.rb".freeze, "test/test_schema_compatibility.rb".freeze, "test/test_schema_normalization.rb".freeze, "test/test_schema_validator.rb".freeze, "test/test_socket_transport.rb".freeze, "test/tool.rb".freeze]
16
16
  s.homepage = "https://avro.apache.org/".freeze
17
- s.licenses = ["Apache License 2.0 (Apache-2.0)".freeze]
17
+ s.licenses = ["Apache-2.0".freeze]
18
18
  s.rdoc_options = ["--line-numbers".freeze, "--title".freeze, "Avro".freeze]
19
19
  s.rubyforge_project = "avro".freeze
20
20
  s.rubygems_version = "2.5.2.1".freeze
21
21
  s.summary = "Apache Avro for Ruby".freeze
22
- s.test_files = ["test/test_schema.rb".freeze, "test/test_socket_transport.rb".freeze, "test/test_io.rb".freeze, "test/test_logical_types.rb".freeze, "test/test_help.rb".freeze, "test/test_datafile.rb".freeze, "test/test_protocol.rb".freeze, "test/test_schema_validator.rb".freeze, "test/test_schema_compatibility.rb".freeze, "test/test_schema_normalization.rb".freeze, "test/test_fingerprints.rb".freeze]
22
+ s.test_files = ["test/test_help.rb".freeze, "test/test_io.rb".freeze, "test/test_socket_transport.rb".freeze, "test/test_fingerprints.rb".freeze, "test/test_schema.rb".freeze, "test/test_schema_compatibility.rb".freeze, "test/test_schema_validator.rb".freeze, "test/test_schema_normalization.rb".freeze, "test/test_protocol.rb".freeze, "test/test_datafile.rb".freeze, "test/test_logical_types.rb".freeze]
23
23
 
24
24
  if s.respond_to? :specification_version then
25
25
  s.specification_version = 4
26
26
 
27
27
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
28
- s.add_runtime_dependency(%q<multi_json>.freeze, [">= 0"])
28
+ s.add_runtime_dependency(%q<multi_json>.freeze, ["~> 1"])
29
29
  else
30
- s.add_dependency(%q<multi_json>.freeze, [">= 0"])
30
+ s.add_dependency(%q<multi_json>.freeze, ["~> 1"])
31
31
  end
32
32
  else
33
- s.add_dependency(%q<multi_json>.freeze, [">= 0"])
33
+ s.add_dependency(%q<multi_json>.freeze, ["~> 1"])
34
34
  end
35
35
  end
@@ -19,7 +19,7 @@ require 'rubygems'
19
19
  require 'test/unit'
20
20
  require 'avro'
21
21
 
22
- CODECS_TO_VALIDATE = ['deflate'] # The 'null' codec is implicitly included
22
+ CODECS_TO_VALIDATE = ['deflate', 'snappy', 'zstandard'] # The 'null' codec is implicitly included
23
23
 
24
24
  class TestInterop < Test::Unit::TestCase
25
25
  HERE = File.expand_path(File.dirname(__FILE__))
@@ -27,12 +27,14 @@ class TestInterop < Test::Unit::TestCase
27
27
  SCHEMAS = SHARE + '/test/schemas'
28
28
 
29
29
  files = Dir[HERE + '/../../../build/interop/data/*.avro'].select do |fn|
30
- sep, codec = File.basename(fn, 'avro').rpartition('_')[1, 2]
30
+ sep, codec = File.basename(fn, '.avro').rpartition('_')[1, 2]
31
31
  sep.empty? || CODECS_TO_VALIDATE.include?(codec)
32
32
  end
33
+ puts "The following files will be tested:"
34
+ puts files
33
35
 
34
36
  files.each do |fn|
35
- define_method("test_read_#{File.basename(fn, 'avro')}") do
37
+ define_method("test_read_#{File.basename(fn, '.avro')}") do
36
38
  projection = Avro::Schema.parse(File.read(SCHEMAS+'/interop.avsc'))
37
39
 
38
40
  File.open(fn) do |f|
@@ -28,18 +28,30 @@ module Avro
28
28
 
29
29
  class AvroTypeError < Avro::AvroError
30
30
  def initialize(schm=nil, datum=nil, msg=nil)
31
- msg ||= "Not a #{schm.to_s}: #{datum}"
31
+ msg ||= "Not a #{schm}: #{datum}"
32
32
  super(msg)
33
33
  end
34
34
  end
35
35
 
36
36
  class << self
37
+ attr_writer :disable_enum_symbol_validation
37
38
  attr_writer :disable_field_default_validation
39
+ attr_writer :disable_schema_name_validation
40
+
41
+ def disable_enum_symbol_validation
42
+ @disable_enum_symbol_validation ||=
43
+ ENV.fetch('AVRO_DISABLE_ENUM_SYMBOL_VALIDATION', '') != ''
44
+ end
38
45
 
39
46
  def disable_field_default_validation
40
47
  @disable_field_default_validation ||=
41
48
  ENV.fetch('AVRO_DISABLE_FIELD_DEFAULT_VALIDATION', '') != ''
42
49
  end
50
+
51
+ def disable_schema_name_validation
52
+ @disable_schema_name_validation ||=
53
+ ENV.fetch('AVRO_DISABLE_SCHEMA_NAME_VALIDATION', '') != ''
54
+ end
43
55
  end
44
56
  end
45
57
 
@@ -372,9 +372,32 @@ module Avro
372
372
  end
373
373
  end
374
374
 
375
+ class ZstandardCodec
376
+ def codec_name; 'zstandard'; end
377
+
378
+ def decompress(data)
379
+ load_zstandard!
380
+ Zstd.decompress(data)
381
+ end
382
+
383
+ def compress(data)
384
+ load_zstandard!
385
+ Zstd.compress(data)
386
+ end
387
+
388
+ private
389
+
390
+ def load_zstandard!
391
+ require 'zstd-ruby' unless defined?(Zstd)
392
+ rescue LoadError
393
+ raise LoadError, "Zstandard compression is not available, please install the `zstd-ruby` gem."
394
+ end
395
+ end
396
+
375
397
  DataFile.register_codec NullCodec
376
398
  DataFile.register_codec DeflateCodec
377
399
  DataFile.register_codec SnappyCodec
400
+ DataFile.register_codec ZstandardCodec
378
401
 
379
402
  # TODO this constant won't be updated if you register another codec.
380
403
  # Deprecated in favor of Avro::DataFile::codecs
@@ -172,7 +172,7 @@ module Avro
172
172
  end
173
173
 
174
174
  # null is written as zero bytes
175
- def write_null(datum)
175
+ def write_null(_datum)
176
176
  nil
177
177
  end
178
178
 
@@ -292,7 +292,7 @@ module Avro
292
292
  readers_schema.type_adapter.decode(datum)
293
293
  end
294
294
 
295
- def read_fixed(writers_schema, readers_schema, decoder)
295
+ def read_fixed(writers_schema, _readers_schema, decoder)
296
296
  decoder.read(writers_schema.size)
297
297
  end
298
298
 
@@ -300,12 +300,12 @@ module Avro
300
300
  index_of_symbol = decoder.read_int
301
301
  read_symbol = writers_schema.symbols[index_of_symbol]
302
302
 
303
- # TODO(jmhodges): figure out what unset means for resolution
304
- # schema resolution
305
- unless readers_schema.symbols.include?(read_symbol)
306
- # 'unset' here
303
+ if !readers_schema.symbols.include?(read_symbol) && readers_schema.default
304
+ read_symbol = readers_schema.default
307
305
  end
308
306
 
307
+ # This implementation deviates from the spec by always returning
308
+ # a symbol.
309
309
  read_symbol
310
310
  end
311
311
 
@@ -359,26 +359,28 @@ module Avro
359
359
  readers_fields_hash = readers_schema.fields_hash
360
360
  read_record = {}
361
361
  writers_schema.fields.each do |field|
362
- if readers_field = readers_fields_hash[field.name]
362
+ readers_field = readers_fields_hash[field.name]
363
+ if readers_field
363
364
  field_val = read_data(field.type, readers_field.type, decoder)
364
365
  read_record[field.name] = field_val
366
+ elsif readers_schema.fields_by_alias.key?(field.name)
367
+ readers_field = readers_schema.fields_by_alias[field.name]
368
+ field_val = read_data(field.type, readers_field.type, decoder)
369
+ read_record[readers_field.name] = field_val
365
370
  else
366
371
  skip_data(field.type, decoder)
367
372
  end
368
373
  end
369
374
 
370
375
  # fill in the default values
371
- if readers_fields_hash.size > read_record.size
372
- writers_fields_hash = writers_schema.fields_hash
373
- readers_fields_hash.each do |field_name, field|
374
- unless writers_fields_hash.has_key? field_name
375
- if field.default?
376
- field_val = read_default_value(field.type, field.default)
377
- read_record[field.name] = field_val
378
- else
379
- raise AvroError, "Missing data for #{field.type} with no default"
380
- end
381
- end
376
+ readers_fields_hash.each do |field_name, field|
377
+ next if read_record.key?(field_name)
378
+
379
+ if field.default?
380
+ field_val = read_default_value(field.type, field.default)
381
+ read_record[field.name] = field_val
382
+ else
383
+ raise AvroError, "Missing data for #{field.type} with no default"
382
384
  end
383
385
  end
384
386
 
@@ -468,7 +470,7 @@ module Avro
468
470
  decoder.skip(writers_schema.size)
469
471
  end
470
472
 
471
- def skip_enum(writers_schema, decoder)
473
+ def skip_enum(_writers_schema, decoder)
472
474
  decoder.skip_int
473
475
  end
474
476
 
@@ -545,7 +547,7 @@ module Avro
545
547
  end
546
548
  end
547
549
 
548
- def write_fixed(writers_schema, datum, encoder)
550
+ def write_fixed(_writers_schema, datum, encoder)
549
551
  encoder.write(datum)
550
552
  end
551
553
 
@@ -591,7 +593,7 @@ module Avro
591
593
  def write_record(writers_schema, datum, encoder)
592
594
  raise AvroTypeError.new(writers_schema, datum) unless datum.is_a?(Hash)
593
595
  writers_schema.fields.each do |field|
594
- write_data(field.type, datum[field.name], encoder)
596
+ write_data(field.type, datum.key?(field.name) ? datum[field.name] : datum[field.name.to_sym], encoder)
595
597
  end
596
598
  end
597
599
  end # DatumWriter
@@ -278,7 +278,7 @@ module Avro::IPC
278
278
  response = call(local_message, request)
279
279
  rescue AvroRemoteError => e
280
280
  error = e
281
- rescue Exception => e
281
+ rescue Exception => e # rubocop:disable Lint/RescueException
282
282
  error = AvroRemoteError.new(e.to_s)
283
283
  end
284
284
 
@@ -350,7 +350,7 @@ module Avro::IPC
350
350
  remote_protocol
351
351
  end
352
352
 
353
- def call(local_message, request)
353
+ def call(_local_message, _request)
354
354
  # Actual work done by server: cf. handler in thrift.
355
355
  raise NotImplementedError
356
356
  end
@@ -29,6 +29,8 @@ module Avro
29
29
  NAMED_TYPES_SYM = Set.new(NAMED_TYPES.map(&:to_sym))
30
30
  VALID_TYPES_SYM = Set.new(VALID_TYPES.map(&:to_sym))
31
31
 
32
+ NAME_REGEX = /^([A-Za-z_][A-Za-z0-9_]*)(\.([A-Za-z_][A-Za-z0-9_]*))*$/
33
+
32
34
  INT_MIN_VALUE = -(1 << 31)
33
35
  INT_MAX_VALUE = (1 << 31) - 1
34
36
  LONG_MIN_VALUE = -(1 << 63)
@@ -53,23 +55,34 @@ module Avro
53
55
 
54
56
  type_sym = type.to_sym
55
57
  if PRIMITIVE_TYPES_SYM.include?(type_sym)
56
- return PrimitiveSchema.new(type_sym, logical_type)
57
-
58
+ case type_sym
59
+ when :bytes
60
+ precision = json_obj['precision']
61
+ scale = json_obj['scale']
62
+ return BytesSchema.new(type_sym, logical_type, precision, scale)
63
+ else
64
+ return PrimitiveSchema.new(type_sym, logical_type)
65
+ end
58
66
  elsif NAMED_TYPES_SYM.include? type_sym
59
67
  name = json_obj['name']
68
+ if !Avro.disable_schema_name_validation && name !~ NAME_REGEX
69
+ raise SchemaParseError, "Name #{name} is invalid for type #{type}!"
70
+ end
60
71
  namespace = json_obj.include?('namespace') ? json_obj['namespace'] : default_namespace
72
+ aliases = json_obj['aliases']
61
73
  case type_sym
62
74
  when :fixed
63
75
  size = json_obj['size']
64
- return FixedSchema.new(name, namespace, size, names, logical_type)
76
+ return FixedSchema.new(name, namespace, size, names, logical_type, aliases)
65
77
  when :enum
66
78
  symbols = json_obj['symbols']
67
79
  doc = json_obj['doc']
68
- return EnumSchema.new(name, namespace, symbols, names, doc)
80
+ default = json_obj['default']
81
+ return EnumSchema.new(name, namespace, symbols, names, doc, default, aliases)
69
82
  when :record, :error
70
83
  fields = json_obj['fields']
71
84
  doc = json_obj['doc']
72
- return RecordSchema.new(name, namespace, fields, names, type_sym, doc)
85
+ return RecordSchema.new(name, namespace, fields, names, type_sym, doc, aliases)
73
86
  else
74
87
  raise SchemaParseError.new("Unknown named type: #{type}")
75
88
  end
@@ -131,6 +144,49 @@ module Avro
131
144
  Digest::SHA256.hexdigest(parsing_form).to_i(16)
132
145
  end
133
146
 
147
+ CRC_EMPTY = 0xc15d213aa4d7a795
148
+
149
+ # The java library caches this value after initialized, so this pattern
150
+ # mimics that.
151
+ @@fp_table = nil
152
+ def initFPTable
153
+ @@fp_table = Array.new(256)
154
+ 256.times do |i|
155
+ fp = i
156
+ 8.times do
157
+ fp = (fp >> 1) ^ ( CRC_EMPTY & -( fp & 1 ) )
158
+ end
159
+ @@fp_table[i] = fp
160
+ end
161
+ end
162
+
163
+ def crc_64_avro_fingerprint
164
+ parsing_form = Avro::SchemaNormalization.to_parsing_form(self)
165
+ data_bytes = parsing_form.unpack("C*")
166
+
167
+ initFPTable unless @@fp_table
168
+
169
+ fp = CRC_EMPTY
170
+ data_bytes.each do |b|
171
+ fp = (fp >> 8) ^ @@fp_table[ (fp ^ b) & 0xff ]
172
+ end
173
+ fp
174
+ end
175
+
176
+ SINGLE_OBJECT_MAGIC_NUMBER = [0xC3, 0x01]
177
+ def single_object_encoding_header
178
+ [SINGLE_OBJECT_MAGIC_NUMBER, single_object_schema_fingerprint].flatten
179
+ end
180
+ def single_object_schema_fingerprint
181
+ working = crc_64_avro_fingerprint
182
+ bytes = Array.new(8)
183
+ 8.times do |i|
184
+ bytes[7 - i] = (working & 0xff)
185
+ working = working >> 8
186
+ end
187
+ bytes
188
+ end
189
+
134
190
  def read?(writers_schema)
135
191
  SchemaCompatibility.can_read?(writers_schema, self)
136
192
  end
@@ -143,11 +199,11 @@ module Avro
143
199
  SchemaCompatibility.mutual_read?(other_schema, self)
144
200
  end
145
201
 
146
- def ==(other, seen=nil)
202
+ def ==(other, _seen=nil)
147
203
  other.is_a?(Schema) && type_sym == other.type_sym
148
204
  end
149
205
 
150
- def hash(seen=nil)
206
+ def hash(_seen=nil)
151
207
  type_sym.hash
152
208
  end
153
209
 
@@ -165,7 +221,7 @@ module Avro
165
221
  end
166
222
  end
167
223
 
168
- def to_avro(names=nil)
224
+ def to_avro(_names=nil)
169
225
  props = {'type' => type}
170
226
  props['logicalType'] = logical_type if logical_type
171
227
  props
@@ -175,14 +231,26 @@ module Avro
175
231
  MultiJson.dump to_avro
176
232
  end
177
233
 
234
+ def validate_aliases!
235
+ unless aliases.nil? ||
236
+ (aliases.is_a?(Array) && aliases.all? { |a| a.is_a?(String) })
237
+
238
+ raise Avro::SchemaParseError,
239
+ "Invalid aliases value #{aliases.inspect} for #{type} #{name}. Must be an array of strings."
240
+ end
241
+ end
242
+ private :validate_aliases!
243
+
178
244
  class NamedSchema < Schema
179
- attr_reader :name, :namespace
245
+ attr_reader :name, :namespace, :aliases
180
246
 
181
- def initialize(type, name, namespace=nil, names=nil, doc=nil, logical_type=nil)
247
+ def initialize(type, name, namespace=nil, names=nil, doc=nil, logical_type=nil, aliases=nil)
182
248
  super(type, logical_type)
183
249
  @name, @namespace = Name.extract_namespace(name, namespace)
184
- @doc = doc
185
- names = Name.add_name(names, self)
250
+ @doc = doc
251
+ @aliases = aliases
252
+ validate_aliases! if aliases
253
+ Name.add_name(names, self)
186
254
  end
187
255
 
188
256
  def to_avro(names=Set.new)
@@ -192,33 +260,53 @@ module Avro
192
260
  end
193
261
  props = {'name' => @name}
194
262
  props.merge!('namespace' => @namespace) if @namespace
195
- props.merge!('doc' => @doc) if @doc
263
+ props['namespace'] = @namespace if @namespace
264
+ props['doc'] = @doc if @doc
265
+ props['aliases'] = aliases if aliases && aliases.any?
196
266
  super.merge props
197
267
  end
198
268
 
199
269
  def fullname
200
270
  @fullname ||= Name.make_fullname(@name, @namespace)
201
271
  end
272
+
273
+ def fullname_aliases
274
+ @fullname_aliases ||= if aliases
275
+ aliases.map { |a| Name.make_fullname(a, namespace) }
276
+ else
277
+ []
278
+ end
279
+ end
280
+
281
+ def match_fullname?(name)
282
+ name == fullname || fullname_aliases.include?(name)
283
+ end
202
284
  end
203
285
 
204
286
  class RecordSchema < NamedSchema
205
287
  attr_reader :fields, :doc
206
288
 
207
289
  def self.make_field_objects(field_data, names, namespace=nil)
208
- field_objects, field_names = [], Set.new
209
- field_data.each_with_index do |field, i|
290
+ field_objects, field_names, alias_names = [], Set.new, Set.new
291
+ field_data.each do |field|
210
292
  if field.respond_to?(:[]) # TODO(jmhodges) wtffffff
211
293
  type = field['type']
212
294
  name = field['name']
213
295
  default = field.key?('default') ? field['default'] : :no_default
214
296
  order = field['order']
215
297
  doc = field['doc']
216
- new_field = Field.new(type, name, default, order, names, namespace, doc)
298
+ aliases = field['aliases']
299
+ new_field = Field.new(type, name, default, order, names, namespace, doc, aliases)
217
300
  # make sure field name has not been used yet
218
301
  if field_names.include?(new_field.name)
219
302
  raise SchemaParseError, "Field name #{new_field.name.inspect} is already in use"
220
303
  end
221
304
  field_names << new_field.name
305
+ # make sure alias has not be been used yet
306
+ if new_field.aliases && alias_names.intersect?(new_field.aliases.to_set)
307
+ raise SchemaParseError, "Alias #{(alias_names & new_field.aliases).to_a} already in use"
308
+ end
309
+ alias_names.merge(new_field.aliases) if new_field.aliases
222
310
  else
223
311
  raise SchemaParseError, "Not a valid field: #{field}"
224
312
  end
@@ -227,14 +315,14 @@ module Avro
227
315
  field_objects
228
316
  end
229
317
 
230
- def initialize(name, namespace, fields, names=nil, schema_type=:record, doc=nil)
318
+ def initialize(name, namespace, fields, names=nil, schema_type=:record, doc=nil, aliases=nil)
231
319
  if schema_type == :request || schema_type == 'request'
232
320
  @type_sym = schema_type.to_sym
233
321
  @namespace = namespace
234
322
  @name = nil
235
323
  @doc = nil
236
324
  else
237
- super(schema_type, name, namespace, names, doc)
325
+ super(schema_type, name, namespace, names, doc, nil, aliases)
238
326
  end
239
327
  @fields = if fields
240
328
  RecordSchema.make_field_objects(fields, names, self.namespace)
@@ -247,6 +335,16 @@ module Avro
247
335
  @fields_hash ||= fields.inject({}){|hsh, field| hsh[field.name] = field; hsh }
248
336
  end
249
337
 
338
+ def fields_by_alias
339
+ @fields_by_alias ||= fields.each_with_object({}) do |field, hash|
340
+ if field.aliases
341
+ field.aliases.each do |a|
342
+ hash[a] = field
343
+ end
344
+ end
345
+ end
346
+ end
347
+
250
348
  def to_avro(names=Set.new)
251
349
  hsh = super
252
350
  return hsh unless hsh.is_a?(Hash)
@@ -313,20 +411,41 @@ module Avro
313
411
  end
314
412
 
315
413
  class EnumSchema < NamedSchema
316
- attr_reader :symbols, :doc
414
+ SYMBOL_REGEX = /^[A-Za-z_][A-Za-z0-9_]*$/
415
+
416
+ attr_reader :symbols, :doc, :default
317
417
 
318
- def initialize(name, space, symbols, names=nil, doc=nil)
418
+ def initialize(name, space, symbols, names=nil, doc=nil, default=nil, aliases=nil)
319
419
  if symbols.uniq.length < symbols.length
320
420
  fail_msg = "Duplicate symbol: #{symbols}"
321
421
  raise Avro::SchemaParseError, fail_msg
322
422
  end
323
- super(:enum, name, space, names, doc)
423
+
424
+ if !Avro.disable_enum_symbol_validation
425
+ invalid_symbols = symbols.select { |symbol| symbol !~ SYMBOL_REGEX }
426
+
427
+ if invalid_symbols.any?
428
+ raise SchemaParseError,
429
+ "Invalid symbols for #{name}: #{invalid_symbols.join(', ')} don't match #{SYMBOL_REGEX.inspect}"
430
+ end
431
+ end
432
+
433
+ if default && !symbols.include?(default)
434
+ raise Avro::SchemaParseError, "Default '#{default}' is not a valid symbol for enum #{name}"
435
+ end
436
+
437
+ super(:enum, name, space, names, doc, nil, aliases)
438
+ @default = default
324
439
  @symbols = symbols
325
440
  end
326
441
 
327
- def to_avro(names=Set.new)
442
+ def to_avro(_names=Set.new)
328
443
  avro = super
329
- avro.is_a?(Hash) ? avro.merge('symbols' => symbols) : avro
444
+ if avro.is_a?(Hash)
445
+ avro['symbols'] = symbols
446
+ avro['default'] = default if default
447
+ end
448
+ avro
330
449
  end
331
450
  end
332
451
 
@@ -348,14 +467,32 @@ module Avro
348
467
  end
349
468
  end
350
469
 
470
+ class BytesSchema < PrimitiveSchema
471
+ attr_reader :precision, :scale
472
+ def initialize(type, logical_type=nil, precision=nil, scale=nil)
473
+ super(type.to_sym, logical_type)
474
+ @precision = precision
475
+ @scale = scale
476
+ end
477
+
478
+ def to_avro(names=nil)
479
+ avro = super
480
+ return avro if avro.is_a?(String)
481
+
482
+ avro['precision'] = precision if precision
483
+ avro['scale'] = scale if scale
484
+ avro
485
+ end
486
+ end
487
+
351
488
  class FixedSchema < NamedSchema
352
489
  attr_reader :size
353
- def initialize(name, space, size, names=nil, logical_type=nil)
490
+ def initialize(name, space, size, names=nil, logical_type=nil, aliases=nil)
354
491
  # Ensure valid cto args
355
492
  unless size.is_a?(Integer)
356
493
  raise AvroError, 'Fixed Schema requires a valid integer for size property.'
357
494
  end
358
- super(:fixed, name, space, names, nil, logical_type)
495
+ super(:fixed, name, space, names, nil, logical_type, aliases)
359
496
  @size = size
360
497
  end
361
498
 
@@ -366,14 +503,16 @@ module Avro
366
503
  end
367
504
 
368
505
  class Field < Schema
369
- attr_reader :type, :name, :default, :order, :doc
506
+ attr_reader :type, :name, :default, :order, :doc, :aliases
370
507
 
371
- def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil, doc=nil)
508
+ def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil, doc=nil, aliases=nil)
372
509
  @type = subparse(type, names, namespace)
373
510
  @name = name
374
511
  @default = default
375
512
  @order = order
376
513
  @doc = doc
514
+ @aliases = aliases
515
+ validate_aliases! if aliases
377
516
  validate_default! if default? && !Avro.disable_field_default_validation
378
517
  end
379
518
 
@@ -389,6 +528,10 @@ module Avro
389
528
  end
390
529
  end
391
530
 
531
+ def alias_names
532
+ @alias_names ||= Array(aliases)
533
+ end
534
+
392
535
  private
393
536
 
394
537
  def validate_default!