avro 1.9.1 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 96c7660634a5f7e85c04d6895dc94ae20a2ea9c8
4
- data.tar.gz: 4e68260b9070cc565aa152988d9809a3f7671772
3
+ metadata.gz: eab84afb7b30d99dee71a7e360dba2a36957500b
4
+ data.tar.gz: 8211aea9ad1e00a4f94bb3edc2aeaf9ab06edffa
5
5
  SHA512:
6
- metadata.gz: '064323997a22d0341e21d93bda5855a27d3859e8f247a85032854198e00330a278912f12cf6037fb374e1e322b009bd902fdff112ededb78b994f6978d6db020'
7
- data.tar.gz: ecdec673bb86ca8d8571198eebf0bada327f71134ddc176dbc327bdea27be31b62838f37a5952edaf33398f21591677268f4b15bee28176cac88b839b8df44b6
6
+ metadata.gz: c51889575e3cd689f7288bdd7a0d8250a8147a8eaf568805f726991f9f115c28f8ca9fa396262c687021859cbd9a26c5f196b7f73a174a2da9dd7f22764fe8d8
7
+ data.tar.gz: 0d936920e8b8ea8edd70583164bd519ac46108de4ab9f7d359d6edb7e8d1941eb3bad06ed8462cdc6bac2d3152c16faba49833d115cc6240e26d3cf51f746c08
data/Rakefile CHANGED
@@ -23,8 +23,8 @@ Echoe.new('avro', VERSION) do |p|
23
23
  p.summary = "Apache Avro for Ruby"
24
24
  p.description = "Avro is a data serialization and RPC format"
25
25
  p.url = "https://avro.apache.org/"
26
- p.runtime_dependencies = %w[multi_json]
27
- p.licenses = ["Apache License 2.0 (Apache-2.0)"]
26
+ p.runtime_dependencies = ["multi_json ~>1"]
27
+ p.licenses = ["Apache-2.0"]
28
28
  end
29
29
 
30
30
  t = Rake::TestTask.new(:interop)
@@ -38,17 +38,13 @@ task :generate_interop do
38
38
 
39
39
  schema = Avro::Schema.parse(File.read(SCHEMAS + '/interop.avsc'))
40
40
  r = RandomData.new(schema, ENV['SEED'])
41
- f = File.open(BUILD + '/interop/data/ruby.avro', 'w')
42
- writer = Avro::DataFile::Writer.new(f, Avro::IO::DatumWriter.new(schema), schema)
43
- begin
44
- writer << r.next
45
- writer << r.next
46
- ensure
47
- writer.close
48
- end
49
-
50
- Avro::DataFile.open(BUILD + '/interop/data/ruby_deflate.avro', 'w', schema.to_s, :deflate) do |writer|
51
- 20.times { writer << r.next }
41
+ Avro::DataFile.codecs.each do |name, codec|
42
+ next unless codec
43
+ filename = name == 'null' ? 'ruby.avro' : "ruby_#{name}.avro"
44
+ path = File.join(BUILD, 'interop/data', filename)
45
+ Avro::DataFile.open(path, 'w', schema.to_s, name) do |writer|
46
+ writer << r.next
47
+ end
52
48
  end
53
49
  end
54
50
 
@@ -1,35 +1,35 @@
1
1
  # -*- encoding: utf-8 -*-
2
- # stub: avro 1.9.1 ruby lib
2
+ # stub: avro 1.10.0 ruby lib
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "avro".freeze
6
- s.version = "1.9.1"
6
+ s.version = "1.10.0"
7
7
 
8
8
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2".freeze) if s.respond_to? :required_rubygems_version=
9
9
  s.require_paths = ["lib".freeze]
10
10
  s.authors = ["Apache Software Foundation".freeze]
11
- s.date = "2019-08-28"
11
+ s.date = "2020-06-22"
12
12
  s.description = "Avro is a data serialization and RPC format".freeze
13
13
  s.email = "dev@avro.apache.org".freeze
14
14
  s.extra_rdoc_files = ["CHANGELOG".freeze, "LICENSE".freeze, "lib/avro.rb".freeze, "lib/avro/data_file.rb".freeze, "lib/avro/io.rb".freeze, "lib/avro/ipc.rb".freeze, "lib/avro/logical_types.rb".freeze, "lib/avro/protocol.rb".freeze, "lib/avro/schema.rb".freeze, "lib/avro/schema_compatibility.rb".freeze, "lib/avro/schema_normalization.rb".freeze, "lib/avro/schema_validator.rb".freeze]
15
15
  s.files = ["CHANGELOG".freeze, "LICENSE".freeze, "Manifest".freeze, "NOTICE".freeze, "Rakefile".freeze, "avro.gemspec".freeze, "interop/test_interop.rb".freeze, "lib/avro.rb".freeze, "lib/avro/data_file.rb".freeze, "lib/avro/io.rb".freeze, "lib/avro/ipc.rb".freeze, "lib/avro/logical_types.rb".freeze, "lib/avro/protocol.rb".freeze, "lib/avro/schema.rb".freeze, "lib/avro/schema_compatibility.rb".freeze, "lib/avro/schema_normalization.rb".freeze, "lib/avro/schema_validator.rb".freeze, "test/case_finder.rb".freeze, "test/random_data.rb".freeze, "test/sample_ipc_client.rb".freeze, "test/sample_ipc_http_client.rb".freeze, "test/sample_ipc_http_server.rb".freeze, "test/sample_ipc_server.rb".freeze, "test/test_datafile.rb".freeze, "test/test_fingerprints.rb".freeze, "test/test_help.rb".freeze, "test/test_io.rb".freeze, "test/test_logical_types.rb".freeze, "test/test_protocol.rb".freeze, "test/test_schema.rb".freeze, "test/test_schema_compatibility.rb".freeze, "test/test_schema_normalization.rb".freeze, "test/test_schema_validator.rb".freeze, "test/test_socket_transport.rb".freeze, "test/tool.rb".freeze]
16
16
  s.homepage = "https://avro.apache.org/".freeze
17
- s.licenses = ["Apache License 2.0 (Apache-2.0)".freeze]
17
+ s.licenses = ["Apache-2.0".freeze]
18
18
  s.rdoc_options = ["--line-numbers".freeze, "--title".freeze, "Avro".freeze]
19
19
  s.rubyforge_project = "avro".freeze
20
20
  s.rubygems_version = "2.5.2.1".freeze
21
21
  s.summary = "Apache Avro for Ruby".freeze
22
- s.test_files = ["test/test_schema_validator.rb".freeze, "test/test_help.rb".freeze, "test/test_schema_normalization.rb".freeze, "test/test_datafile.rb".freeze, "test/test_schema.rb".freeze, "test/test_io.rb".freeze, "test/test_socket_transport.rb".freeze, "test/test_schema_compatibility.rb".freeze, "test/test_logical_types.rb".freeze, "test/test_fingerprints.rb".freeze, "test/test_protocol.rb".freeze]
22
+ s.test_files = ["test/test_help.rb".freeze, "test/test_io.rb".freeze, "test/test_socket_transport.rb".freeze, "test/test_fingerprints.rb".freeze, "test/test_schema.rb".freeze, "test/test_schema_compatibility.rb".freeze, "test/test_schema_validator.rb".freeze, "test/test_schema_normalization.rb".freeze, "test/test_protocol.rb".freeze, "test/test_datafile.rb".freeze, "test/test_logical_types.rb".freeze]
23
23
 
24
24
  if s.respond_to? :specification_version then
25
25
  s.specification_version = 4
26
26
 
27
27
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
28
- s.add_runtime_dependency(%q<multi_json>.freeze, [">= 0"])
28
+ s.add_runtime_dependency(%q<multi_json>.freeze, ["~> 1"])
29
29
  else
30
- s.add_dependency(%q<multi_json>.freeze, [">= 0"])
30
+ s.add_dependency(%q<multi_json>.freeze, ["~> 1"])
31
31
  end
32
32
  else
33
- s.add_dependency(%q<multi_json>.freeze, [">= 0"])
33
+ s.add_dependency(%q<multi_json>.freeze, ["~> 1"])
34
34
  end
35
35
  end
@@ -19,12 +19,22 @@ require 'rubygems'
19
19
  require 'test/unit'
20
20
  require 'avro'
21
21
 
22
+ CODECS_TO_VALIDATE = ['deflate', 'snappy', 'zstandard'] # The 'null' codec is implicitly included
23
+
22
24
  class TestInterop < Test::Unit::TestCase
23
25
  HERE = File.expand_path(File.dirname(__FILE__))
24
26
  SHARE = HERE + '/../../../share'
25
27
  SCHEMAS = SHARE + '/test/schemas'
26
- Dir[HERE + '/../../../build/interop/data/*'].each do |fn|
27
- define_method("test_read_#{File.basename(fn, 'avro')}") do
28
+
29
+ files = Dir[HERE + '/../../../build/interop/data/*.avro'].select do |fn|
30
+ sep, codec = File.basename(fn, '.avro').rpartition('_')[1, 2]
31
+ sep.empty? || CODECS_TO_VALIDATE.include?(codec)
32
+ end
33
+ puts "The following files will be tested:"
34
+ puts files
35
+
36
+ files.each do |fn|
37
+ define_method("test_read_#{File.basename(fn, '.avro')}") do
28
38
  projection = Avro::Schema.parse(File.read(SCHEMAS+'/interop.avsc'))
29
39
 
30
40
  File.open(fn) do |f|
@@ -28,18 +28,30 @@ module Avro
28
28
 
29
29
  class AvroTypeError < Avro::AvroError
30
30
  def initialize(schm=nil, datum=nil, msg=nil)
31
- msg ||= "Not a #{schm.to_s}: #{datum}"
31
+ msg ||= "Not a #{schm}: #{datum}"
32
32
  super(msg)
33
33
  end
34
34
  end
35
35
 
36
36
  class << self
37
+ attr_writer :disable_enum_symbol_validation
37
38
  attr_writer :disable_field_default_validation
39
+ attr_writer :disable_schema_name_validation
40
+
41
+ def disable_enum_symbol_validation
42
+ @disable_enum_symbol_validation ||=
43
+ ENV.fetch('AVRO_DISABLE_ENUM_SYMBOL_VALIDATION', '') != ''
44
+ end
38
45
 
39
46
  def disable_field_default_validation
40
47
  @disable_field_default_validation ||=
41
48
  ENV.fetch('AVRO_DISABLE_FIELD_DEFAULT_VALIDATION', '') != ''
42
49
  end
50
+
51
+ def disable_schema_name_validation
52
+ @disable_schema_name_validation ||=
53
+ ENV.fetch('AVRO_DISABLE_SCHEMA_NAME_VALIDATION', '') != ''
54
+ end
43
55
  end
44
56
  end
45
57
 
@@ -372,9 +372,32 @@ module Avro
372
372
  end
373
373
  end
374
374
 
375
+ class ZstandardCodec
376
+ def codec_name; 'zstandard'; end
377
+
378
+ def decompress(data)
379
+ load_zstandard!
380
+ Zstd.decompress(data)
381
+ end
382
+
383
+ def compress(data)
384
+ load_zstandard!
385
+ Zstd.compress(data)
386
+ end
387
+
388
+ private
389
+
390
+ def load_zstandard!
391
+ require 'zstd-ruby' unless defined?(Zstd)
392
+ rescue LoadError
393
+ raise LoadError, "Zstandard compression is not available, please install the `zstd-ruby` gem."
394
+ end
395
+ end
396
+
375
397
  DataFile.register_codec NullCodec
376
398
  DataFile.register_codec DeflateCodec
377
399
  DataFile.register_codec SnappyCodec
400
+ DataFile.register_codec ZstandardCodec
378
401
 
379
402
  # TODO this constant won't be updated if you register another codec.
380
403
  # Deprecated in favor of Avro::DataFile::codecs
@@ -172,7 +172,7 @@ module Avro
172
172
  end
173
173
 
174
174
  # null is written as zero bytes
175
- def write_null(datum)
175
+ def write_null(_datum)
176
176
  nil
177
177
  end
178
178
 
@@ -292,7 +292,7 @@ module Avro
292
292
  readers_schema.type_adapter.decode(datum)
293
293
  end
294
294
 
295
- def read_fixed(writers_schema, readers_schema, decoder)
295
+ def read_fixed(writers_schema, _readers_schema, decoder)
296
296
  decoder.read(writers_schema.size)
297
297
  end
298
298
 
@@ -300,12 +300,12 @@ module Avro
300
300
  index_of_symbol = decoder.read_int
301
301
  read_symbol = writers_schema.symbols[index_of_symbol]
302
302
 
303
- # TODO(jmhodges): figure out what unset means for resolution
304
- # schema resolution
305
- unless readers_schema.symbols.include?(read_symbol)
306
- # 'unset' here
303
+ if !readers_schema.symbols.include?(read_symbol) && readers_schema.default
304
+ read_symbol = readers_schema.default
307
305
  end
308
306
 
307
+ # This implementation deviates from the spec by always returning
308
+ # a symbol.
309
309
  read_symbol
310
310
  end
311
311
 
@@ -359,26 +359,28 @@ module Avro
359
359
  readers_fields_hash = readers_schema.fields_hash
360
360
  read_record = {}
361
361
  writers_schema.fields.each do |field|
362
- if readers_field = readers_fields_hash[field.name]
362
+ readers_field = readers_fields_hash[field.name]
363
+ if readers_field
363
364
  field_val = read_data(field.type, readers_field.type, decoder)
364
365
  read_record[field.name] = field_val
366
+ elsif readers_schema.fields_by_alias.key?(field.name)
367
+ readers_field = readers_schema.fields_by_alias[field.name]
368
+ field_val = read_data(field.type, readers_field.type, decoder)
369
+ read_record[readers_field.name] = field_val
365
370
  else
366
371
  skip_data(field.type, decoder)
367
372
  end
368
373
  end
369
374
 
370
375
  # fill in the default values
371
- if readers_fields_hash.size > read_record.size
372
- writers_fields_hash = writers_schema.fields_hash
373
- readers_fields_hash.each do |field_name, field|
374
- unless writers_fields_hash.has_key? field_name
375
- if field.default?
376
- field_val = read_default_value(field.type, field.default)
377
- read_record[field.name] = field_val
378
- else
379
- raise AvroError, "Missing data for #{field.type} with no default"
380
- end
381
- end
376
+ readers_fields_hash.each do |field_name, field|
377
+ next if read_record.key?(field_name)
378
+
379
+ if field.default?
380
+ field_val = read_default_value(field.type, field.default)
381
+ read_record[field.name] = field_val
382
+ else
383
+ raise AvroError, "Missing data for #{field.type} with no default"
382
384
  end
383
385
  end
384
386
 
@@ -468,7 +470,7 @@ module Avro
468
470
  decoder.skip(writers_schema.size)
469
471
  end
470
472
 
471
- def skip_enum(writers_schema, decoder)
473
+ def skip_enum(_writers_schema, decoder)
472
474
  decoder.skip_int
473
475
  end
474
476
 
@@ -545,7 +547,7 @@ module Avro
545
547
  end
546
548
  end
547
549
 
548
- def write_fixed(writers_schema, datum, encoder)
550
+ def write_fixed(_writers_schema, datum, encoder)
549
551
  encoder.write(datum)
550
552
  end
551
553
 
@@ -591,7 +593,7 @@ module Avro
591
593
  def write_record(writers_schema, datum, encoder)
592
594
  raise AvroTypeError.new(writers_schema, datum) unless datum.is_a?(Hash)
593
595
  writers_schema.fields.each do |field|
594
- write_data(field.type, datum[field.name], encoder)
596
+ write_data(field.type, datum.key?(field.name) ? datum[field.name] : datum[field.name.to_sym], encoder)
595
597
  end
596
598
  end
597
599
  end # DatumWriter
@@ -278,7 +278,7 @@ module Avro::IPC
278
278
  response = call(local_message, request)
279
279
  rescue AvroRemoteError => e
280
280
  error = e
281
- rescue Exception => e
281
+ rescue Exception => e # rubocop:disable Lint/RescueException
282
282
  error = AvroRemoteError.new(e.to_s)
283
283
  end
284
284
 
@@ -350,7 +350,7 @@ module Avro::IPC
350
350
  remote_protocol
351
351
  end
352
352
 
353
- def call(local_message, request)
353
+ def call(_local_message, _request)
354
354
  # Actual work done by server: cf. handler in thrift.
355
355
  raise NotImplementedError
356
356
  end
@@ -29,6 +29,8 @@ module Avro
29
29
  NAMED_TYPES_SYM = Set.new(NAMED_TYPES.map(&:to_sym))
30
30
  VALID_TYPES_SYM = Set.new(VALID_TYPES.map(&:to_sym))
31
31
 
32
+ NAME_REGEX = /^([A-Za-z_][A-Za-z0-9_]*)(\.([A-Za-z_][A-Za-z0-9_]*))*$/
33
+
32
34
  INT_MIN_VALUE = -(1 << 31)
33
35
  INT_MAX_VALUE = (1 << 31) - 1
34
36
  LONG_MIN_VALUE = -(1 << 63)
@@ -53,23 +55,34 @@ module Avro
53
55
 
54
56
  type_sym = type.to_sym
55
57
  if PRIMITIVE_TYPES_SYM.include?(type_sym)
56
- return PrimitiveSchema.new(type_sym, logical_type)
57
-
58
+ case type_sym
59
+ when :bytes
60
+ precision = json_obj['precision']
61
+ scale = json_obj['scale']
62
+ return BytesSchema.new(type_sym, logical_type, precision, scale)
63
+ else
64
+ return PrimitiveSchema.new(type_sym, logical_type)
65
+ end
58
66
  elsif NAMED_TYPES_SYM.include? type_sym
59
67
  name = json_obj['name']
68
+ if !Avro.disable_schema_name_validation && name !~ NAME_REGEX
69
+ raise SchemaParseError, "Name #{name} is invalid for type #{type}!"
70
+ end
60
71
  namespace = json_obj.include?('namespace') ? json_obj['namespace'] : default_namespace
72
+ aliases = json_obj['aliases']
61
73
  case type_sym
62
74
  when :fixed
63
75
  size = json_obj['size']
64
- return FixedSchema.new(name, namespace, size, names, logical_type)
76
+ return FixedSchema.new(name, namespace, size, names, logical_type, aliases)
65
77
  when :enum
66
78
  symbols = json_obj['symbols']
67
79
  doc = json_obj['doc']
68
- return EnumSchema.new(name, namespace, symbols, names, doc)
80
+ default = json_obj['default']
81
+ return EnumSchema.new(name, namespace, symbols, names, doc, default, aliases)
69
82
  when :record, :error
70
83
  fields = json_obj['fields']
71
84
  doc = json_obj['doc']
72
- return RecordSchema.new(name, namespace, fields, names, type_sym, doc)
85
+ return RecordSchema.new(name, namespace, fields, names, type_sym, doc, aliases)
73
86
  else
74
87
  raise SchemaParseError.new("Unknown named type: #{type}")
75
88
  end
@@ -131,6 +144,49 @@ module Avro
131
144
  Digest::SHA256.hexdigest(parsing_form).to_i(16)
132
145
  end
133
146
 
147
+ CRC_EMPTY = 0xc15d213aa4d7a795
148
+
149
+ # The java library caches this value after initialized, so this pattern
150
+ # mimics that.
151
+ @@fp_table = nil
152
+ def initFPTable
153
+ @@fp_table = Array.new(256)
154
+ 256.times do |i|
155
+ fp = i
156
+ 8.times do
157
+ fp = (fp >> 1) ^ ( CRC_EMPTY & -( fp & 1 ) )
158
+ end
159
+ @@fp_table[i] = fp
160
+ end
161
+ end
162
+
163
+ def crc_64_avro_fingerprint
164
+ parsing_form = Avro::SchemaNormalization.to_parsing_form(self)
165
+ data_bytes = parsing_form.unpack("C*")
166
+
167
+ initFPTable unless @@fp_table
168
+
169
+ fp = CRC_EMPTY
170
+ data_bytes.each do |b|
171
+ fp = (fp >> 8) ^ @@fp_table[ (fp ^ b) & 0xff ]
172
+ end
173
+ fp
174
+ end
175
+
176
+ SINGLE_OBJECT_MAGIC_NUMBER = [0xC3, 0x01]
177
+ def single_object_encoding_header
178
+ [SINGLE_OBJECT_MAGIC_NUMBER, single_object_schema_fingerprint].flatten
179
+ end
180
+ def single_object_schema_fingerprint
181
+ working = crc_64_avro_fingerprint
182
+ bytes = Array.new(8)
183
+ 8.times do |i|
184
+ bytes[7 - i] = (working & 0xff)
185
+ working = working >> 8
186
+ end
187
+ bytes
188
+ end
189
+
134
190
  def read?(writers_schema)
135
191
  SchemaCompatibility.can_read?(writers_schema, self)
136
192
  end
@@ -143,11 +199,11 @@ module Avro
143
199
  SchemaCompatibility.mutual_read?(other_schema, self)
144
200
  end
145
201
 
146
- def ==(other, seen=nil)
202
+ def ==(other, _seen=nil)
147
203
  other.is_a?(Schema) && type_sym == other.type_sym
148
204
  end
149
205
 
150
- def hash(seen=nil)
206
+ def hash(_seen=nil)
151
207
  type_sym.hash
152
208
  end
153
209
 
@@ -165,7 +221,7 @@ module Avro
165
221
  end
166
222
  end
167
223
 
168
- def to_avro(names=nil)
224
+ def to_avro(_names=nil)
169
225
  props = {'type' => type}
170
226
  props['logicalType'] = logical_type if logical_type
171
227
  props
@@ -175,14 +231,26 @@ module Avro
175
231
  MultiJson.dump to_avro
176
232
  end
177
233
 
234
+ def validate_aliases!
235
+ unless aliases.nil? ||
236
+ (aliases.is_a?(Array) && aliases.all? { |a| a.is_a?(String) })
237
+
238
+ raise Avro::SchemaParseError,
239
+ "Invalid aliases value #{aliases.inspect} for #{type} #{name}. Must be an array of strings."
240
+ end
241
+ end
242
+ private :validate_aliases!
243
+
178
244
  class NamedSchema < Schema
179
- attr_reader :name, :namespace
245
+ attr_reader :name, :namespace, :aliases
180
246
 
181
- def initialize(type, name, namespace=nil, names=nil, doc=nil, logical_type=nil)
247
+ def initialize(type, name, namespace=nil, names=nil, doc=nil, logical_type=nil, aliases=nil)
182
248
  super(type, logical_type)
183
249
  @name, @namespace = Name.extract_namespace(name, namespace)
184
- @doc = doc
185
- names = Name.add_name(names, self)
250
+ @doc = doc
251
+ @aliases = aliases
252
+ validate_aliases! if aliases
253
+ Name.add_name(names, self)
186
254
  end
187
255
 
188
256
  def to_avro(names=Set.new)
@@ -192,33 +260,53 @@ module Avro
192
260
  end
193
261
  props = {'name' => @name}
194
262
  props.merge!('namespace' => @namespace) if @namespace
195
- props.merge!('doc' => @doc) if @doc
263
+ props['namespace'] = @namespace if @namespace
264
+ props['doc'] = @doc if @doc
265
+ props['aliases'] = aliases if aliases && aliases.any?
196
266
  super.merge props
197
267
  end
198
268
 
199
269
  def fullname
200
270
  @fullname ||= Name.make_fullname(@name, @namespace)
201
271
  end
272
+
273
+ def fullname_aliases
274
+ @fullname_aliases ||= if aliases
275
+ aliases.map { |a| Name.make_fullname(a, namespace) }
276
+ else
277
+ []
278
+ end
279
+ end
280
+
281
+ def match_fullname?(name)
282
+ name == fullname || fullname_aliases.include?(name)
283
+ end
202
284
  end
203
285
 
204
286
  class RecordSchema < NamedSchema
205
287
  attr_reader :fields, :doc
206
288
 
207
289
  def self.make_field_objects(field_data, names, namespace=nil)
208
- field_objects, field_names = [], Set.new
209
- field_data.each_with_index do |field, i|
290
+ field_objects, field_names, alias_names = [], Set.new, Set.new
291
+ field_data.each do |field|
210
292
  if field.respond_to?(:[]) # TODO(jmhodges) wtffffff
211
293
  type = field['type']
212
294
  name = field['name']
213
295
  default = field.key?('default') ? field['default'] : :no_default
214
296
  order = field['order']
215
297
  doc = field['doc']
216
- new_field = Field.new(type, name, default, order, names, namespace, doc)
298
+ aliases = field['aliases']
299
+ new_field = Field.new(type, name, default, order, names, namespace, doc, aliases)
217
300
  # make sure field name has not been used yet
218
301
  if field_names.include?(new_field.name)
219
302
  raise SchemaParseError, "Field name #{new_field.name.inspect} is already in use"
220
303
  end
221
304
  field_names << new_field.name
305
+ # make sure alias has not be been used yet
306
+ if new_field.aliases && alias_names.intersect?(new_field.aliases.to_set)
307
+ raise SchemaParseError, "Alias #{(alias_names & new_field.aliases).to_a} already in use"
308
+ end
309
+ alias_names.merge(new_field.aliases) if new_field.aliases
222
310
  else
223
311
  raise SchemaParseError, "Not a valid field: #{field}"
224
312
  end
@@ -227,14 +315,14 @@ module Avro
227
315
  field_objects
228
316
  end
229
317
 
230
- def initialize(name, namespace, fields, names=nil, schema_type=:record, doc=nil)
318
+ def initialize(name, namespace, fields, names=nil, schema_type=:record, doc=nil, aliases=nil)
231
319
  if schema_type == :request || schema_type == 'request'
232
320
  @type_sym = schema_type.to_sym
233
321
  @namespace = namespace
234
322
  @name = nil
235
323
  @doc = nil
236
324
  else
237
- super(schema_type, name, namespace, names, doc)
325
+ super(schema_type, name, namespace, names, doc, nil, aliases)
238
326
  end
239
327
  @fields = if fields
240
328
  RecordSchema.make_field_objects(fields, names, self.namespace)
@@ -247,6 +335,16 @@ module Avro
247
335
  @fields_hash ||= fields.inject({}){|hsh, field| hsh[field.name] = field; hsh }
248
336
  end
249
337
 
338
+ def fields_by_alias
339
+ @fields_by_alias ||= fields.each_with_object({}) do |field, hash|
340
+ if field.aliases
341
+ field.aliases.each do |a|
342
+ hash[a] = field
343
+ end
344
+ end
345
+ end
346
+ end
347
+
250
348
  def to_avro(names=Set.new)
251
349
  hsh = super
252
350
  return hsh unless hsh.is_a?(Hash)
@@ -313,20 +411,41 @@ module Avro
313
411
  end
314
412
 
315
413
  class EnumSchema < NamedSchema
316
- attr_reader :symbols, :doc
414
+ SYMBOL_REGEX = /^[A-Za-z_][A-Za-z0-9_]*$/
415
+
416
+ attr_reader :symbols, :doc, :default
317
417
 
318
- def initialize(name, space, symbols, names=nil, doc=nil)
418
+ def initialize(name, space, symbols, names=nil, doc=nil, default=nil, aliases=nil)
319
419
  if symbols.uniq.length < symbols.length
320
420
  fail_msg = "Duplicate symbol: #{symbols}"
321
421
  raise Avro::SchemaParseError, fail_msg
322
422
  end
323
- super(:enum, name, space, names, doc)
423
+
424
+ if !Avro.disable_enum_symbol_validation
425
+ invalid_symbols = symbols.select { |symbol| symbol !~ SYMBOL_REGEX }
426
+
427
+ if invalid_symbols.any?
428
+ raise SchemaParseError,
429
+ "Invalid symbols for #{name}: #{invalid_symbols.join(', ')} don't match #{SYMBOL_REGEX.inspect}"
430
+ end
431
+ end
432
+
433
+ if default && !symbols.include?(default)
434
+ raise Avro::SchemaParseError, "Default '#{default}' is not a valid symbol for enum #{name}"
435
+ end
436
+
437
+ super(:enum, name, space, names, doc, nil, aliases)
438
+ @default = default
324
439
  @symbols = symbols
325
440
  end
326
441
 
327
- def to_avro(names=Set.new)
442
+ def to_avro(_names=Set.new)
328
443
  avro = super
329
- avro.is_a?(Hash) ? avro.merge('symbols' => symbols) : avro
444
+ if avro.is_a?(Hash)
445
+ avro['symbols'] = symbols
446
+ avro['default'] = default if default
447
+ end
448
+ avro
330
449
  end
331
450
  end
332
451
 
@@ -348,14 +467,32 @@ module Avro
348
467
  end
349
468
  end
350
469
 
470
+ class BytesSchema < PrimitiveSchema
471
+ attr_reader :precision, :scale
472
+ def initialize(type, logical_type=nil, precision=nil, scale=nil)
473
+ super(type.to_sym, logical_type)
474
+ @precision = precision
475
+ @scale = scale
476
+ end
477
+
478
+ def to_avro(names=nil)
479
+ avro = super
480
+ return avro if avro.is_a?(String)
481
+
482
+ avro['precision'] = precision if precision
483
+ avro['scale'] = scale if scale
484
+ avro
485
+ end
486
+ end
487
+
351
488
  class FixedSchema < NamedSchema
352
489
  attr_reader :size
353
- def initialize(name, space, size, names=nil, logical_type=nil)
490
+ def initialize(name, space, size, names=nil, logical_type=nil, aliases=nil)
354
491
  # Ensure valid cto args
355
492
  unless size.is_a?(Integer)
356
493
  raise AvroError, 'Fixed Schema requires a valid integer for size property.'
357
494
  end
358
- super(:fixed, name, space, names, nil, logical_type)
495
+ super(:fixed, name, space, names, nil, logical_type, aliases)
359
496
  @size = size
360
497
  end
361
498
 
@@ -366,14 +503,16 @@ module Avro
366
503
  end
367
504
 
368
505
  class Field < Schema
369
- attr_reader :type, :name, :default, :order, :doc
506
+ attr_reader :type, :name, :default, :order, :doc, :aliases
370
507
 
371
- def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil, doc=nil)
508
+ def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil, doc=nil, aliases=nil)
372
509
  @type = subparse(type, names, namespace)
373
510
  @name = name
374
511
  @default = default
375
512
  @order = order
376
513
  @doc = doc
514
+ @aliases = aliases
515
+ validate_aliases! if aliases
377
516
  validate_default! if default? && !Avro.disable_field_default_validation
378
517
  end
379
518
 
@@ -389,6 +528,10 @@ module Avro
389
528
  end
390
529
  end
391
530
 
531
+ def alias_names
532
+ @alias_names ||= Array(aliases)
533
+ end
534
+
392
535
  private
393
536
 
394
537
  def validate_default!