avro 1.7.4 → 1.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,6 +17,7 @@
17
17
  module Avro
18
18
  class Protocol
19
19
  VALID_TYPE_SCHEMA_TYPES = Set.new(%w[enum record error fixed])
20
+ VALID_TYPE_SCHEMA_TYPES_SYM = Set.new(VALID_TYPE_SCHEMA_TYPES.map(&:to_sym))
20
21
  class ProtocolParseError < Avro::AvroError; end
21
22
 
22
23
  attr_reader :name, :namespace, :types, :messages, :md5
@@ -70,8 +71,8 @@ module Avro
70
71
  types.collect do |type|
71
72
  # FIXME adding type.name to type_names is not defined in the
72
73
  # spec. Possible bug in the python impl and the spec.
73
- type_object = Schema.real_parse(type, type_names)
74
- unless VALID_TYPE_SCHEMA_TYPES.include?(type_object.type)
74
+ type_object = Schema.real_parse(type, type_names, namespace)
75
+ unless VALID_TYPE_SCHEMA_TYPES_SYM.include?(type_object.type_sym)
75
76
  msg = "Type #{type} not an enum, record, fixed or error."
76
77
  raise ProtocolParseError, msg
77
78
  end
@@ -91,47 +92,42 @@ module Avro
91
92
  request = body['request']
92
93
  response = body['response']
93
94
  errors = body['errors']
94
- message_objects[name] = Message.new(name, request, response, errors, names)
95
+ message_objects[name] = Message.new(name, request, response, errors, names, namespace)
95
96
  end
96
97
  message_objects
97
98
  end
98
99
 
99
100
  protected
100
- def to_avro
101
+ def to_avro(names=Set.new)
101
102
  hsh = {'protocol' => name}
102
103
  hsh['namespace'] = namespace if namespace
103
- hsh['types'] = types.map{|t| t.to_avro } if types
104
+ hsh['types'] = types.map{|t| t.to_avro(names) } if types
104
105
 
105
106
  if messages
106
- hsh['messages'] = messages.collect_hash{|k,t| [k, t.to_avro] }
107
+ hsh['messages'] = messages.collect_hash{|k,t| [k, t.to_avro(names)] }
107
108
  end
108
109
 
109
110
  hsh
110
111
  end
111
112
 
112
113
  class Message
113
- attr_reader :name, :response_from_names, :request, :response, :errors
114
- def initialize(name, request, response, errors=nil, names=nil)
115
- @name = name
116
- @response_from_names = false
114
+ attr_reader :name, :request, :response, :errors, :default_namespace
117
115
 
116
+ def initialize(name, request, response, errors=nil, names=nil, default_namespace=nil)
117
+ @name = name
118
+ @default_namespace = default_namespace
118
119
  @request = parse_request(request, names)
119
120
  @response = parse_response(response, names)
120
121
  @errors = parse_errors(errors, names) if errors
121
122
  end
122
123
 
123
- def to_avro
124
- hsh = {'request' => request.to_avro}
125
- if response_from_names
126
- hsh['response'] = response.fullname
127
- else
128
- hsh['response'] = response.to_avro
124
+ def to_avro(names=Set.new)
125
+ {
126
+ 'request' => request.to_avro(names),
127
+ 'response' => response.to_avro(names)
128
+ }.tap do |hash|
129
+ hash['errors'] = errors.to_avro(names) if errors
129
130
  end
130
-
131
- if errors
132
- hsh['errors'] = errors.to_avro
133
- end
134
- hsh
135
131
  end
136
132
 
137
133
  def to_s
@@ -142,23 +138,23 @@ module Avro
142
138
  unless request.is_a?(Array)
143
139
  raise ProtocolParseError, "Request property not an Array: #{request.inspect}"
144
140
  end
145
- Schema::RecordSchema.new(nil, nil, request, names, 'request')
141
+ Schema::RecordSchema.new(nil, default_namespace, request, names, :request)
146
142
  end
147
143
 
148
144
  def parse_response(response, names)
149
- if response.is_a?(String) && names[response]
150
- @response_from_names = true
151
- names[response]
152
- else
153
- Schema.real_parse(response, names)
145
+ if response.is_a?(String) && names
146
+ fullname = Name.make_fullname(response, default_namespace)
147
+ return names[fullname] if names.include?(fullname)
154
148
  end
149
+
150
+ Schema.real_parse(response, names, default_namespace)
155
151
  end
156
152
 
157
153
  def parse_errors(errors, names)
158
154
  unless errors.is_a?(Array)
159
155
  raise ProtocolParseError, "Errors property not an Array: #{errors}"
160
156
  end
161
- Schema.real_parse(errors, names)
157
+ Schema.real_parse(errors, names, default_namespace)
162
158
  end
163
159
  end
164
160
  end
@@ -16,12 +16,17 @@
16
16
 
17
17
  module Avro
18
18
  class Schema
19
- # FIXME turn these into symbols to prevent some gc pressure
19
+ # Sets of strings, for backwards compatibility. See below for sets of symbols,
20
+ # for better performance.
20
21
  PRIMITIVE_TYPES = Set.new(%w[null boolean string bytes int long float double])
21
22
  NAMED_TYPES = Set.new(%w[fixed enum record error])
22
23
 
23
24
  VALID_TYPES = PRIMITIVE_TYPES + NAMED_TYPES + Set.new(%w[array map union request])
24
25
 
26
+ PRIMITIVE_TYPES_SYM = Set.new(PRIMITIVE_TYPES.map(&:to_sym))
27
+ NAMED_TYPES_SYM = Set.new(NAMED_TYPES.map(&:to_sym))
28
+ VALID_TYPES_SYM = Set.new(VALID_TYPES.map(&:to_sym))
29
+
25
30
  INT_MIN_VALUE = -(1 << 31)
26
31
  INT_MAX_VALUE = (1 << 31) - 1
27
32
  LONG_MIN_VALUE = -(1 << 63)
@@ -32,44 +37,52 @@ module Avro
32
37
  end
33
38
 
34
39
  # Build Avro Schema from data parsed out of JSON string.
35
- def self.real_parse(json_obj, names=nil)
40
+ def self.real_parse(json_obj, names=nil, default_namespace=nil)
36
41
  if json_obj.is_a? Hash
37
42
  type = json_obj['type']
38
- if PRIMITIVE_TYPES.include?(type)
39
- return PrimitiveSchema.new(type)
40
- elsif NAMED_TYPES.include? type
43
+ raise SchemaParseError, %Q(No "type" property: #{json_obj}) if type.nil?
44
+
45
+ # Check that the type is valid before calling #to_sym, since symbols are never garbage
46
+ # collected (important to avoid DoS if we're accepting schemas from untrusted clients)
47
+ unless VALID_TYPES.include?(type)
48
+ raise SchemaParseError, "Unknown type: #{type}"
49
+ end
50
+
51
+ type_sym = type.to_sym
52
+ if PRIMITIVE_TYPES_SYM.include?(type_sym)
53
+ return PrimitiveSchema.new(type_sym)
54
+
55
+ elsif NAMED_TYPES_SYM.include? type_sym
41
56
  name = json_obj['name']
42
- namespace = json_obj['namespace']
43
- case type
44
- when 'fixed'
57
+ namespace = json_obj.include?('namespace') ? json_obj['namespace'] : default_namespace
58
+ case type_sym
59
+ when :fixed
45
60
  size = json_obj['size']
46
61
  return FixedSchema.new(name, namespace, size, names)
47
- when 'enum'
62
+ when :enum
48
63
  symbols = json_obj['symbols']
49
64
  return EnumSchema.new(name, namespace, symbols, names)
50
- when 'record', 'error'
65
+ when :record, :error
51
66
  fields = json_obj['fields']
52
- return RecordSchema.new(name, namespace, fields, names, type)
67
+ return RecordSchema.new(name, namespace, fields, names, type_sym)
53
68
  else
54
69
  raise SchemaParseError.new("Unknown named type: #{type}")
55
70
  end
56
- elsif VALID_TYPES.include?(type)
57
- case type
58
- when 'array'
59
- return ArraySchema.new(json_obj['items'], names)
60
- when 'map'
61
- return MapSchema.new(json_obj['values'], names)
71
+
72
+ else
73
+ case type_sym
74
+ when :array
75
+ return ArraySchema.new(json_obj['items'], names, default_namespace)
76
+ when :map
77
+ return MapSchema.new(json_obj['values'], names, default_namespace)
62
78
  else
63
79
  raise SchemaParseError.new("Unknown Valid Type: #{type}")
64
80
  end
65
- elsif type.nil?
66
- raise SchemaParseError.new("No \"type\" property: #{json_obj}")
67
- else
68
- raise SchemaParseError.new("Undefined type: #{type}")
69
81
  end
82
+
70
83
  elsif json_obj.is_a? Array
71
84
  # JSON array (union)
72
- return UnionSchema.new(json_obj, names)
85
+ return UnionSchema.new(json_obj, names, default_namespace)
73
86
  elsif PRIMITIVE_TYPES.include? json_obj
74
87
  return PrimitiveSchema.new(json_obj)
75
88
  else
@@ -80,34 +93,34 @@ module Avro
80
93
 
81
94
  # Determine if a ruby datum is an instance of a schema
82
95
  def self.validate(expected_schema, datum)
83
- case expected_schema.type
84
- when 'null'
96
+ case expected_schema.type_sym
97
+ when :null
85
98
  datum.nil?
86
- when 'boolean'
99
+ when :boolean
87
100
  datum == true || datum == false
88
- when 'string', 'bytes'
101
+ when :string, :bytes
89
102
  datum.is_a? String
90
- when 'int'
103
+ when :int
91
104
  (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
92
105
  (INT_MIN_VALUE <= datum) && (datum <= INT_MAX_VALUE)
93
- when 'long'
106
+ when :long
94
107
  (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
95
108
  (LONG_MIN_VALUE <= datum) && (datum <= LONG_MAX_VALUE)
96
- when 'float', 'double'
109
+ when :float, :double
97
110
  datum.is_a?(Float) || datum.is_a?(Fixnum) || datum.is_a?(Bignum)
98
- when 'fixed'
111
+ when :fixed
99
112
  datum.is_a?(String) && datum.size == expected_schema.size
100
- when 'enum'
113
+ when :enum
101
114
  expected_schema.symbols.include? datum
102
- when 'array'
115
+ when :array
103
116
  datum.is_a?(Array) &&
104
117
  datum.all?{|d| validate(expected_schema.items, d) }
105
- when 'map'
118
+ when :map
106
119
  datum.keys.all?{|k| k.is_a? String } &&
107
120
  datum.values.all?{|v| validate(expected_schema.values, v) }
108
- when 'union'
121
+ when :union
109
122
  expected_schema.schemas.any?{|s| validate(s, datum) }
110
- when 'record', 'error', 'request'
123
+ when :record, :error, :request
111
124
  datum.is_a?(Hash) &&
112
125
  expected_schema.fields.all?{|f| validate(f.type, datum[f.name]) }
113
126
  else
@@ -116,30 +129,39 @@ module Avro
116
129
  end
117
130
 
118
131
  def initialize(type)
119
- @type = type
132
+ @type_sym = type.is_a?(Symbol) ? type : type.to_sym
120
133
  end
121
134
 
122
- def type; @type; end
135
+ attr_reader :type_sym
136
+
137
+ # Returns the type as a string (rather than a symbol), for backwards compatibility.
138
+ # Deprecated in favor of {#type_sym}.
139
+ def type; @type_sym.to_s; end
123
140
 
124
141
  def ==(other, seen=nil)
125
- other.is_a?(Schema) && @type == other.type
142
+ other.is_a?(Schema) && type_sym == other.type_sym
126
143
  end
127
144
 
128
145
  def hash(seen=nil)
129
- @type.hash
146
+ type_sym.hash
130
147
  end
131
148
 
132
- def subparse(json_obj, names=nil)
149
+ def subparse(json_obj, names=nil, namespace=nil)
150
+ if json_obj.is_a?(String) && names
151
+ fullname = Name.make_fullname(json_obj, namespace)
152
+ return names[fullname] if names.include?(fullname)
153
+ end
154
+
133
155
  begin
134
- Schema.real_parse(json_obj, names)
156
+ Schema.real_parse(json_obj, names, namespace)
135
157
  rescue => e
136
158
  raise e if e.is_a? SchemaParseError
137
159
  raise SchemaParseError, "Sub-schema for #{self.class.name} not a valid Avro schema. Bad schema: #{json_obj}"
138
160
  end
139
161
  end
140
162
 
141
- def to_avro
142
- {'type' => @type}
163
+ def to_avro(names=nil)
164
+ {'type' => type}
143
165
  end
144
166
 
145
167
  def to_s
@@ -154,21 +176,25 @@ module Avro
154
176
  names = Name.add_name(names, self)
155
177
  end
156
178
 
157
- def to_avro
179
+ def to_avro(names=Set.new)
180
+ if @name
181
+ return fullname if names.include?(fullname)
182
+ names << fullname
183
+ end
158
184
  props = {'name' => @name}
159
185
  props.merge!('namespace' => @namespace) if @namespace
160
186
  super.merge props
161
187
  end
162
188
 
163
189
  def fullname
164
- Name.make_fullname(@name, @namespace)
190
+ @fullname ||= Name.make_fullname(@name, @namespace)
165
191
  end
166
192
  end
167
193
 
168
194
  class RecordSchema < NamedSchema
169
195
  attr_reader :fields
170
196
 
171
- def self.make_field_objects(field_data, names)
197
+ def self.make_field_objects(field_data, names, namespace=nil)
172
198
  field_objects, field_names = [], Set.new
173
199
  field_data.each_with_index do |field, i|
174
200
  if field.respond_to?(:[]) # TODO(jmhodges) wtffffff
@@ -176,7 +202,7 @@ module Avro
176
202
  name = field['name']
177
203
  default = field['default']
178
204
  order = field['order']
179
- new_field = Field.new(type, name, default, order, names)
205
+ new_field = Field.new(type, name, default, order, names, namespace)
180
206
  # make sure field name has not been used yet
181
207
  if field_names.include?(new_field.name)
182
208
  raise SchemaParseError, "Field name #{new_field.name.inspect} is already in use"
@@ -190,22 +216,25 @@ module Avro
190
216
  field_objects
191
217
  end
192
218
 
193
- def initialize(name, namespace, fields, names=nil, schema_type='record')
194
- if schema_type == 'request'
195
- @type = schema_type
219
+ def initialize(name, namespace, fields, names=nil, schema_type=:record)
220
+ if schema_type == :request || schema_type == 'request'
221
+ @type_sym = schema_type.to_sym
222
+ @namespace = namespace
196
223
  else
197
224
  super(schema_type, name, namespace, names)
198
225
  end
199
- @fields = RecordSchema.make_field_objects(fields, names)
226
+ @fields = RecordSchema.make_field_objects(fields, names, self.namespace)
200
227
  end
201
228
 
202
229
  def fields_hash
203
- fields.inject({}){|hsh, field| hsh[field.name] = field; hsh }
230
+ @fields_hash ||= fields.inject({}){|hsh, field| hsh[field.name] = field; hsh }
204
231
  end
205
232
 
206
- def to_avro
207
- hsh = super.merge('fields' => @fields.map {|f| f.to_avro } )
208
- if type == 'request'
233
+ def to_avro(names=Set.new)
234
+ hsh = super
235
+ return hsh unless hsh.is_a?(Hash)
236
+ hsh['fields'] = @fields.map {|f| f.to_avro(names) }
237
+ if type_sym == :request
209
238
  hsh['fields']
210
239
  else
211
240
  hsh
@@ -214,99 +243,57 @@ module Avro
214
243
  end
215
244
 
216
245
  class ArraySchema < Schema
217
- attr_reader :items, :items_schema_from_names
218
- def initialize(items, names=nil)
219
- @items_schema_from_names = false
246
+ attr_reader :items
220
247
 
221
- super('array')
222
-
223
- if items.is_a?(String) && names.has_key?(items)
224
- @items = names[items]
225
- @items_schema_from_names = true
226
- else
227
- @items = subparse(items, names)
228
- end
248
+ def initialize(items, names=nil, default_namespace=nil)
249
+ super(:array)
250
+ @items = subparse(items, names, default_namespace)
229
251
  end
230
252
 
231
- def to_avro
232
- name_or_json = if items_schema_from_names
233
- items.fullname
234
- else
235
- items.to_avro
236
- end
237
- super.merge('items' => name_or_json)
253
+ def to_avro(names=Set.new)
254
+ super.merge('items' => items.to_avro(names))
238
255
  end
239
256
  end
240
257
 
241
258
  class MapSchema < Schema
242
- attr_reader :values, :values_schema_from_names
243
-
244
- def initialize(values, names=nil)
245
- @values_schema_from_names = false
246
- super('map')
247
- if values.is_a?(String) && names.has_key?(values)
248
- values_schema = names[values]
249
- @values_schema_from_names = true
250
- else
251
- values_schema = subparse(values, names)
252
- end
253
- @values = values_schema
259
+ attr_reader :values
260
+
261
+ def initialize(values, names=nil, default_namespace=nil)
262
+ super(:map)
263
+ @values = subparse(values, names, default_namespace)
254
264
  end
255
265
 
256
- def to_avro
257
- to_dump = super
258
- if values_schema_from_names
259
- to_dump['values'] = values
260
- else
261
- to_dump['values'] = values.to_avro
262
- end
263
- to_dump
266
+ def to_avro(names=Set.new)
267
+ super.merge('values' => values.to_avro(names))
264
268
  end
265
269
  end
266
270
 
267
271
  class UnionSchema < Schema
268
- attr_reader :schemas, :schema_from_names_indices
269
- def initialize(schemas, names=nil)
270
- super('union')
272
+ attr_reader :schemas
273
+
274
+ def initialize(schemas, names=nil, default_namespace=nil)
275
+ super(:union)
271
276
 
272
277
  schema_objects = []
273
- @schema_from_names_indices = []
274
278
  schemas.each_with_index do |schema, i|
275
- from_names = false
276
- if schema.is_a?(String) && names.has_key?(schema)
277
- new_schema = names[schema]
278
- from_names = true
279
- else
280
- new_schema = subparse(schema, names)
281
- end
279
+ new_schema = subparse(schema, names, default_namespace)
280
+ ns_type = new_schema.type_sym
282
281
 
283
- ns_type = new_schema.type
284
- if VALID_TYPES.include?(ns_type) &&
285
- !NAMED_TYPES.include?(ns_type) &&
286
- schema_objects.map{|o| o.type }.include?(ns_type)
282
+ if VALID_TYPES_SYM.include?(ns_type) &&
283
+ !NAMED_TYPES_SYM.include?(ns_type) &&
284
+ schema_objects.any?{|o| o.type_sym == ns_type }
287
285
  raise SchemaParseError, "#{ns_type} is already in Union"
288
- elsif ns_type == 'union'
286
+ elsif ns_type == :union
289
287
  raise SchemaParseError, "Unions cannot contain other unions"
290
288
  else
291
289
  schema_objects << new_schema
292
- @schema_from_names_indices << i if from_names
293
290
  end
294
291
  @schemas = schema_objects
295
292
  end
296
293
  end
297
294
 
298
- def to_avro
299
- # FIXME(jmhodges) this from_name pattern is really weird and
300
- # seems code-smelly.
301
- to_dump = []
302
- schemas.each_with_index do |schema, i|
303
- if schema_from_names_indices.include?(i)
304
- to_dump << schema.fullname
305
- else
306
- to_dump << schema.to_avro
307
- end
308
- end
309
- to_dump
295
+ def to_avro(names=Set.new)
296
+ schemas.map {|schema| schema.to_avro(names) }
310
297
  end
311
298
  end
312
299
 
@@ -317,26 +304,29 @@ module Avro
317
304
  fail_msg = 'Duplicate symbol: %s' % symbols
318
305
  raise Avro::SchemaParseError, fail_msg
319
306
  end
320
- super('enum', name, space, names)
307
+ super(:enum, name, space, names)
321
308
  @symbols = symbols
322
309
  end
323
310
 
324
- def to_avro
325
- super.merge('symbols' => symbols)
311
+ def to_avro(names=Set.new)
312
+ avro = super
313
+ avro.is_a?(Hash) ? avro.merge('symbols' => symbols) : avro
326
314
  end
327
315
  end
328
316
 
329
317
  # Valid primitive types are in PRIMITIVE_TYPES.
330
318
  class PrimitiveSchema < Schema
331
319
  def initialize(type)
332
- unless PRIMITIVE_TYPES.include? type
320
+ if PRIMITIVE_TYPES_SYM.include?(type)
321
+ super(type)
322
+ elsif PRIMITIVE_TYPES.include?(type)
323
+ super(type.to_sym)
324
+ else
333
325
  raise AvroError.new("#{type} is not a valid primitive type.")
334
326
  end
335
-
336
- super(type)
337
327
  end
338
328
 
339
- def to_avro
329
+ def to_avro(names=nil)
340
330
  hsh = super
341
331
  hsh.size == 1 ? type : hsh
342
332
  end
@@ -349,40 +339,31 @@ module Avro
349
339
  unless size.is_a?(Fixnum) || size.is_a?(Bignum)
350
340
  raise AvroError, 'Fixed Schema requires a valid integer for size property.'
351
341
  end
352
- super('fixed', name, space, names)
342
+ super(:fixed, name, space, names)
353
343
  @size = size
354
344
  end
355
345
 
356
- def to_avro
357
- super.merge('size' => @size)
346
+ def to_avro(names=Set.new)
347
+ avro = super
348
+ avro.is_a?(Hash) ? avro.merge('size' => size) : avro
358
349
  end
359
350
  end
360
351
 
361
352
  class Field < Schema
362
- attr_reader :type, :name, :default, :order, :type_from_names
363
- def initialize(type, name, default=nil, order=nil, names=nil)
364
- @type_from_names = false
365
- if type.is_a?(String) && names && names.has_key?(type)
366
- type_schema = names[type]
367
- @type_from_names = true
368
- else
369
- type_schema = subparse(type, names)
370
- end
371
- @type = type_schema
353
+ attr_reader :type, :name, :default, :order
354
+
355
+ def initialize(type, name, default=nil, order=nil, names=nil, namespace=nil)
356
+ @type = subparse(type, names, namespace)
372
357
  @name = name
373
358
  @default = default
374
359
  @order = order
375
360
  end
376
361
 
377
- def to_avro
378
- sigh_type = type_from_names ? type.fullname : type.to_avro
379
- hsh = {
380
- 'name' => name,
381
- 'type' => sigh_type
382
- }
383
- hsh['default'] = default if default
384
- hsh['order'] = order if order
385
- hsh
362
+ def to_avro(names=Set.new)
363
+ {'name' => name, 'type' => type.to_avro(names)}.tap do |avro|
364
+ avro['default'] = default if default
365
+ avro['order'] = order if order
366
+ end
386
367
  end
387
368
  end
388
369
  end