avro 1.7.4 → 1.7.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -17,6 +17,7 @@
17
17
  module Avro
18
18
  class Protocol
19
19
  VALID_TYPE_SCHEMA_TYPES = Set.new(%w[enum record error fixed])
20
+ VALID_TYPE_SCHEMA_TYPES_SYM = Set.new(VALID_TYPE_SCHEMA_TYPES.map(&:to_sym))
20
21
  class ProtocolParseError < Avro::AvroError; end
21
22
 
22
23
  attr_reader :name, :namespace, :types, :messages, :md5
@@ -70,8 +71,8 @@ module Avro
70
71
  types.collect do |type|
71
72
  # FIXME adding type.name to type_names is not defined in the
72
73
  # spec. Possible bug in the python impl and the spec.
73
- type_object = Schema.real_parse(type, type_names)
74
- unless VALID_TYPE_SCHEMA_TYPES.include?(type_object.type)
74
+ type_object = Schema.real_parse(type, type_names, namespace)
75
+ unless VALID_TYPE_SCHEMA_TYPES_SYM.include?(type_object.type_sym)
75
76
  msg = "Type #{type} not an enum, record, fixed or error."
76
77
  raise ProtocolParseError, msg
77
78
  end
@@ -91,47 +92,42 @@ module Avro
91
92
  request = body['request']
92
93
  response = body['response']
93
94
  errors = body['errors']
94
- message_objects[name] = Message.new(name, request, response, errors, names)
95
+ message_objects[name] = Message.new(name, request, response, errors, names, namespace)
95
96
  end
96
97
  message_objects
97
98
  end
98
99
 
99
100
  protected
100
- def to_avro
101
+ def to_avro(names=Set.new)
101
102
  hsh = {'protocol' => name}
102
103
  hsh['namespace'] = namespace if namespace
103
- hsh['types'] = types.map{|t| t.to_avro } if types
104
+ hsh['types'] = types.map{|t| t.to_avro(names) } if types
104
105
 
105
106
  if messages
106
- hsh['messages'] = messages.collect_hash{|k,t| [k, t.to_avro] }
107
+ hsh['messages'] = messages.collect_hash{|k,t| [k, t.to_avro(names)] }
107
108
  end
108
109
 
109
110
  hsh
110
111
  end
111
112
 
112
113
  class Message
113
- attr_reader :name, :response_from_names, :request, :response, :errors
114
- def initialize(name, request, response, errors=nil, names=nil)
115
- @name = name
116
- @response_from_names = false
114
+ attr_reader :name, :request, :response, :errors, :default_namespace
117
115
 
116
+ def initialize(name, request, response, errors=nil, names=nil, default_namespace=nil)
117
+ @name = name
118
+ @default_namespace = default_namespace
118
119
  @request = parse_request(request, names)
119
120
  @response = parse_response(response, names)
120
121
  @errors = parse_errors(errors, names) if errors
121
122
  end
122
123
 
123
- def to_avro
124
- hsh = {'request' => request.to_avro}
125
- if response_from_names
126
- hsh['response'] = response.fullname
127
- else
128
- hsh['response'] = response.to_avro
124
+ def to_avro(names=Set.new)
125
+ {
126
+ 'request' => request.to_avro(names),
127
+ 'response' => response.to_avro(names)
128
+ }.tap do |hash|
129
+ hash['errors'] = errors.to_avro(names) if errors
129
130
  end
130
-
131
- if errors
132
- hsh['errors'] = errors.to_avro
133
- end
134
- hsh
135
131
  end
136
132
 
137
133
  def to_s
@@ -142,23 +138,23 @@ module Avro
142
138
  unless request.is_a?(Array)
143
139
  raise ProtocolParseError, "Request property not an Array: #{request.inspect}"
144
140
  end
145
- Schema::RecordSchema.new(nil, nil, request, names, 'request')
141
+ Schema::RecordSchema.new(nil, default_namespace, request, names, :request)
146
142
  end
147
143
 
148
144
  def parse_response(response, names)
149
- if response.is_a?(String) && names[response]
150
- @response_from_names = true
151
- names[response]
152
- else
153
- Schema.real_parse(response, names)
145
+ if response.is_a?(String) && names
146
+ fullname = Name.make_fullname(response, default_namespace)
147
+ return names[fullname] if names.include?(fullname)
154
148
  end
149
+
150
+ Schema.real_parse(response, names, default_namespace)
155
151
  end
156
152
 
157
153
  def parse_errors(errors, names)
158
154
  unless errors.is_a?(Array)
159
155
  raise ProtocolParseError, "Errors property not an Array: #{errors}"
160
156
  end
161
- Schema.real_parse(errors, names)
157
+ Schema.real_parse(errors, names, default_namespace)
162
158
  end
163
159
  end
164
160
  end
@@ -16,12 +16,17 @@
16
16
 
17
17
  module Avro
18
18
  class Schema
19
- # FIXME turn these into symbols to prevent some gc pressure
19
+ # Sets of strings, for backwards compatibility. See below for sets of symbols,
20
+ # for better performance.
20
21
  PRIMITIVE_TYPES = Set.new(%w[null boolean string bytes int long float double])
21
22
  NAMED_TYPES = Set.new(%w[fixed enum record error])
22
23
 
23
24
  VALID_TYPES = PRIMITIVE_TYPES + NAMED_TYPES + Set.new(%w[array map union request])
24
25
 
26
+ PRIMITIVE_TYPES_SYM = Set.new(PRIMITIVE_TYPES.map(&:to_sym))
27
+ NAMED_TYPES_SYM = Set.new(NAMED_TYPES.map(&:to_sym))
28
+ VALID_TYPES_SYM = Set.new(VALID_TYPES.map(&:to_sym))
29
+
25
30
  INT_MIN_VALUE = -(1 << 31)
26
31
  INT_MAX_VALUE = (1 << 31) - 1
27
32
  LONG_MIN_VALUE = -(1 << 63)
@@ -32,44 +37,52 @@ module Avro
32
37
  end
33
38
 
34
39
  # Build Avro Schema from data parsed out of JSON string.
35
- def self.real_parse(json_obj, names=nil)
40
+ def self.real_parse(json_obj, names=nil, default_namespace=nil)
36
41
  if json_obj.is_a? Hash
37
42
  type = json_obj['type']
38
- if PRIMITIVE_TYPES.include?(type)
39
- return PrimitiveSchema.new(type)
40
- elsif NAMED_TYPES.include? type
43
+ raise SchemaParseError, %Q(No "type" property: #{json_obj}) if type.nil?
44
+
45
+ # Check that the type is valid before calling #to_sym, since symbols are never garbage
46
+ # collected (important to avoid DoS if we're accepting schemas from untrusted clients)
47
+ unless VALID_TYPES.include?(type)
48
+ raise SchemaParseError, "Unknown type: #{type}"
49
+ end
50
+
51
+ type_sym = type.to_sym
52
+ if PRIMITIVE_TYPES_SYM.include?(type_sym)
53
+ return PrimitiveSchema.new(type_sym)
54
+
55
+ elsif NAMED_TYPES_SYM.include? type_sym
41
56
  name = json_obj['name']
42
- namespace = json_obj['namespace']
43
- case type
44
- when 'fixed'
57
+ namespace = json_obj.include?('namespace') ? json_obj['namespace'] : default_namespace
58
+ case type_sym
59
+ when :fixed
45
60
  size = json_obj['size']
46
61
  return FixedSchema.new(name, namespace, size, names)
47
- when 'enum'
62
+ when :enum
48
63
  symbols = json_obj['symbols']
49
64
  return EnumSchema.new(name, namespace, symbols, names)
50
- when 'record', 'error'
65
+ when :record, :error
51
66
  fields = json_obj['fields']
52
- return RecordSchema.new(name, namespace, fields, names, type)
67
+ return RecordSchema.new(name, namespace, fields, names, type_sym)
53
68
  else
54
69
  raise SchemaParseError.new("Unknown named type: #{type}")
55
70
  end
56
- elsif VALID_TYPES.include?(type)
57
- case type
58
- when 'array'
59
- return ArraySchema.new(json_obj['items'], names)
60
- when 'map'
61
- return MapSchema.new(json_obj['values'], names)
71
+
72
+ else
73
+ case type_sym
74
+ when :array
75
+ return ArraySchema.new(json_obj['items'], names, default_namespace)
76
+ when :map
77
+ return MapSchema.new(json_obj['values'], names, default_namespace)
62
78
  else
63
79
  raise SchemaParseError.new("Unknown Valid Type: #{type}")
64
80
  end
65
- elsif type.nil?
66
- raise SchemaParseError.new("No \"type\" property: #{json_obj}")
67
- else
68
- raise SchemaParseError.new("Undefined type: #{type}")
69
81
  end
82
+
70
83
  elsif json_obj.is_a? Array
71
84
  # JSON array (union)
72
- return UnionSchema.new(json_obj, names)
85
+ return UnionSchema.new(json_obj, names, default_namespace)
73
86
  elsif PRIMITIVE_TYPES.include? json_obj
74
87
  return PrimitiveSchema.new(json_obj)
75
88
  else
@@ -80,34 +93,34 @@ module Avro
80
93
 
81
94
  # Determine if a ruby datum is an instance of a schema
82
95
  def self.validate(expected_schema, datum)
83
- case expected_schema.type
84
- when 'null'
96
+ case expected_schema.type_sym
97
+ when :null
85
98
  datum.nil?
86
- when 'boolean'
99
+ when :boolean
87
100
  datum == true || datum == false
88
- when 'string', 'bytes'
101
+ when :string, :bytes
89
102
  datum.is_a? String
90
- when 'int'
103
+ when :int
91
104
  (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
92
105
  (INT_MIN_VALUE <= datum) && (datum <= INT_MAX_VALUE)
93
- when 'long'
106
+ when :long
94
107
  (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
95
108
  (LONG_MIN_VALUE <= datum) && (datum <= LONG_MAX_VALUE)
96
- when 'float', 'double'
109
+ when :float, :double
97
110
  datum.is_a?(Float) || datum.is_a?(Fixnum) || datum.is_a?(Bignum)
98
- when 'fixed'
111
+ when :fixed
99
112
  datum.is_a?(String) && datum.size == expected_schema.size
100
- when 'enum'
113
+ when :enum
101
114
  expected_schema.symbols.include? datum
102
- when 'array'
115
+ when :array
103
116
  datum.is_a?(Array) &&
104
117
  datum.all?{|d| validate(expected_schema.items, d) }
105
- when 'map'
118
+ when :map
106
119
  datum.keys.all?{|k| k.is_a? String } &&
107
120
  datum.values.all?{|v| validate(expected_schema.values, v) }
108
- when 'union'
121
+ when :union
109
122
  expected_schema.schemas.any?{|s| validate(s, datum) }
110
- when 'record', 'error', 'request'
123
+ when :record, :error, :request
111
124
  datum.is_a?(Hash) &&
112
125
  expected_schema.fields.all?{|f| validate(f.type, datum[f.name]) }
113
126
  else
@@ -116,30 +129,39 @@ module Avro
116
129
  end
117
130
 
118
131
  def initialize(type)
119
- @type = type
132
+ @type_sym = type.is_a?(Symbol) ? type : type.to_sym
120
133
  end
121
134
 
122
- def type; @type; end
135
+ attr_reader :type_sym
136
+
137
+ # Returns the type as a string (rather than a symbol), for backwards compatibility.
138
+ # Deprecated in favor of {#type_sym}.
139
+ def type; @type_sym.to_s; end
123
140
 
124
141
  def ==(other, seen=nil)
125
- other.is_a?(Schema) && @type == other.type
142
+ other.is_a?(Schema) && type_sym == other.type_sym
126
143
  end
127
144
 
128
145
  def hash(seen=nil)
129
- @type.hash
146
+ type_sym.hash
130
147
  end
131
148
 
132
- def subparse(json_obj, names=nil)
149
+ def subparse(json_obj, names=nil, namespace=nil)
150
+ if json_obj.is_a?(String) && names
151
+ fullname = Name.make_fullname(json_obj, namespace)
152
+ return names[fullname] if names.include?(fullname)
153
+ end
154
+
133
155
  begin
134
- Schema.real_parse(json_obj, names)
156
+ Schema.real_parse(json_obj, names, namespace)
135
157
  rescue => e
136
158
  raise e if e.is_a? SchemaParseError
137
159
  raise SchemaParseError, "Sub-schema for #{self.class.name} not a valid Avro schema. Bad schema: #{json_obj}"
138
160
  end
139
161
  end
140
162
 
141
- def to_avro
142
- {'type' => @type}
163
+ def to_avro(names=nil)
164
+ {'type' => type}
143
165
  end
144
166
 
145
167
  def to_s
@@ -154,21 +176,25 @@ module Avro
154
176
  names = Name.add_name(names, self)
155
177
  end
156
178
 
157
- def to_avro
179
+ def to_avro(names=Set.new)
180
+ if @name
181
+ return fullname if names.include?(fullname)
182
+ names << fullname
183
+ end
158
184
  props = {'name' => @name}
159
185
  props.merge!('namespace' => @namespace) if @namespace
160
186
  super.merge props
161
187
  end
162
188
 
163
189
  def fullname
164
- Name.make_fullname(@name, @namespace)
190
+ @fullname ||= Name.make_fullname(@name, @namespace)
165
191
  end
166
192
  end
167
193
 
168
194
  class RecordSchema < NamedSchema
169
195
  attr_reader :fields
170
196
 
171
- def self.make_field_objects(field_data, names)
197
+ def self.make_field_objects(field_data, names, namespace=nil)
172
198
  field_objects, field_names = [], Set.new
173
199
  field_data.each_with_index do |field, i|
174
200
  if field.respond_to?(:[]) # TODO(jmhodges) wtffffff
@@ -176,7 +202,7 @@ module Avro
176
202
  name = field['name']
177
203
  default = field['default']
178
204
  order = field['order']
179
- new_field = Field.new(type, name, default, order, names)
205
+ new_field = Field.new(type, name, default, order, names, namespace)
180
206
  # make sure field name has not been used yet
181
207
  if field_names.include?(new_field.name)
182
208
  raise SchemaParseError, "Field name #{new_field.name.inspect} is already in use"
@@ -190,22 +216,25 @@ module Avro
190
216
  field_objects
191
217
  end
192
218
 
193
- def initialize(name, namespace, fields, names=nil, schema_type='record')
194
- if schema_type == 'request'
195
- @type = schema_type
219
+ def initialize(name, namespace, fields, names=nil, schema_type=:record)
220
+ if schema_type == :request || schema_type == 'request'
221
+ @type_sym = schema_type.to_sym
222
+ @namespace = namespace
196
223
  else
197
224
  super(schema_type, name, namespace, names)
198
225
  end
199
- @fields = RecordSchema.make_field_objects(fields, names)
226
+ @fields = RecordSchema.make_field_objects(fields, names, self.namespace)
200
227
  end
201
228
 
202
229
  def fields_hash
203
- fields.inject({}){|hsh, field| hsh[field.name] = field; hsh }
230
+ @fields_hash ||= fields.inject({}){|hsh, field| hsh[field.name] = field; hsh }
204
231
  end
205
232
 
206
- def to_avro
207
- hsh = super.merge('fields' => @fields.map {|f| f.to_avro } )
208
- if type == 'request'
233
+ def to_avro(names=Set.new)
234
+ hsh = super
235
+ return hsh unless hsh.is_a?(Hash)
236
+ hsh['fields'] = @fields.map {|f| f.to_avro(names) }
237
+ if type_sym == :request
209
238
  hsh['fields']
210
239
  else
211
240
  hsh
@@ -214,99 +243,57 @@ module Avro
214
243
  end
215
244
 
216
245
  class ArraySchema < Schema
217
- attr_reader :items, :items_schema_from_names
218
- def initialize(items, names=nil)
219
- @items_schema_from_names = false
246
+ attr_reader :items
220
247
 
221
- super('array')
222
-
223
- if items.is_a?(String) && names.has_key?(items)
224
- @items = names[items]
225
- @items_schema_from_names = true
226
- else
227
- @items = subparse(items, names)
228
- end
248
+ def initialize(items, names=nil, default_namespace=nil)
249
+ super(:array)
250
+ @items = subparse(items, names, default_namespace)
229
251
  end
230
252
 
231
- def to_avro
232
- name_or_json = if items_schema_from_names
233
- items.fullname
234
- else
235
- items.to_avro
236
- end
237
- super.merge('items' => name_or_json)
253
+ def to_avro(names=Set.new)
254
+ super.merge('items' => items.to_avro(names))
238
255
  end
239
256
  end
240
257
 
241
258
  class MapSchema < Schema
242
- attr_reader :values, :values_schema_from_names
243
-
244
- def initialize(values, names=nil)
245
- @values_schema_from_names = false
246
- super('map')
247
- if values.is_a?(String) && names.has_key?(values)
248
- values_schema = names[values]
249
- @values_schema_from_names = true
250
- else
251
- values_schema = subparse(values, names)
252
- end
253
- @values = values_schema
259
+ attr_reader :values
260
+
261
+ def initialize(values, names=nil, default_namespace=nil)
262
+ super(:map)
263
+ @values = subparse(values, names, default_namespace)
254
264
  end
255
265
 
256
- def to_avro
257
- to_dump = super
258
- if values_schema_from_names
259
- to_dump['values'] = values
260
- else
261
- to_dump['values'] = values.to_avro
262
- end
263
- to_dump
266
+ def to_avro(names=Set.new)
267
+ super.merge('values' => values.to_avro(names))
264
268
  end
265
269
  end
266
270
 
267
271
  class UnionSchema < Schema
268
- attr_reader :schemas, :schema_from_names_indices
269
- def initialize(schemas, names=nil)
270
- super('union')
272
+ attr_reader :schemas
273
+
274
+ def initialize(schemas, names=nil, default_namespace=nil)
275
+ super(:union)
271
276
 
272
277
  schema_objects = []
273
- @schema_from_names_indices = []
274
278
  schemas.each_with_index do |schema, i|
275
- from_names = false
276
- if schema.is_a?(String) && names.has_key?(schema)
277
- new_schema = names[schema]
278
- from_names = true
279
- else
280
- new_schema = subparse(schema, names)
281
- end
279
+ new_schema = subparse(schema, names, default_namespace)
280
+ ns_type = new_schema.type_sym
282
281
 
283
- ns_type = new_schema.type
284
- if VALID_TYPES.include?(ns_type) &&
285
- !NAMED_TYPES.include?(ns_type) &&
286
- schema_objects.map{|o| o.type }.include?(ns_type)
282
+ if VALID_TYPES_SYM.include?(ns_type) &&
283
+ !NAMED_TYPES_SYM.include?(ns_type) &&
284
+ schema_objects.any?{|o| o.type_sym == ns_type }
287
285
  raise SchemaParseError, "#{ns_type} is already in Union"
288
- elsif ns_type == 'union'
286
+ elsif ns_type == :union
289
287
  raise SchemaParseError, "Unions cannot contain other unions"
290
288
  else
291
289
  schema_objects << new_schema
292
- @schema_from_names_indices << i if from_names
293
290
  end
294
291
  @schemas = schema_objects
295
292
  end
296
293
  end
297
294
 
298
- def to_avro
299
- # FIXME(jmhodges) this from_name pattern is really weird and
300
- # seems code-smelly.
301
- to_dump = []
302
- schemas.each_with_index do |schema, i|
303
- if schema_from_names_indices.include?(i)
304
- to_dump << schema.fullname
305
- else
306
- to_dump << schema.to_avro
307
- end
308
- end
309
- to_dump
295
+ def to_avro(names=Set.new)
296
+ schemas.map {|schema| schema.to_avro(names) }
310
297
  end
311
298
  end
312
299
 
@@ -317,26 +304,29 @@ module Avro
317
304
  fail_msg = 'Duplicate symbol: %s' % symbols
318
305
  raise Avro::SchemaParseError, fail_msg
319
306
  end
320
- super('enum', name, space, names)
307
+ super(:enum, name, space, names)
321
308
  @symbols = symbols
322
309
  end
323
310
 
324
- def to_avro
325
- super.merge('symbols' => symbols)
311
+ def to_avro(names=Set.new)
312
+ avro = super
313
+ avro.is_a?(Hash) ? avro.merge('symbols' => symbols) : avro
326
314
  end
327
315
  end
328
316
 
329
317
  # Valid primitive types are in PRIMITIVE_TYPES.
330
318
  class PrimitiveSchema < Schema
331
319
  def initialize(type)
332
- unless PRIMITIVE_TYPES.include? type
320
+ if PRIMITIVE_TYPES_SYM.include?(type)
321
+ super(type)
322
+ elsif PRIMITIVE_TYPES.include?(type)
323
+ super(type.to_sym)
324
+ else
333
325
  raise AvroError.new("#{type} is not a valid primitive type.")
334
326
  end
335
-
336
- super(type)
337
327
  end
338
328
 
339
- def to_avro
329
+ def to_avro(names=nil)
340
330
  hsh = super
341
331
  hsh.size == 1 ? type : hsh
342
332
  end
@@ -349,40 +339,31 @@ module Avro
349
339
  unless size.is_a?(Fixnum) || size.is_a?(Bignum)
350
340
  raise AvroError, 'Fixed Schema requires a valid integer for size property.'
351
341
  end
352
- super('fixed', name, space, names)
342
+ super(:fixed, name, space, names)
353
343
  @size = size
354
344
  end
355
345
 
356
- def to_avro
357
- super.merge('size' => @size)
346
+ def to_avro(names=Set.new)
347
+ avro = super
348
+ avro.is_a?(Hash) ? avro.merge('size' => size) : avro
358
349
  end
359
350
  end
360
351
 
361
352
  class Field < Schema
362
- attr_reader :type, :name, :default, :order, :type_from_names
363
- def initialize(type, name, default=nil, order=nil, names=nil)
364
- @type_from_names = false
365
- if type.is_a?(String) && names && names.has_key?(type)
366
- type_schema = names[type]
367
- @type_from_names = true
368
- else
369
- type_schema = subparse(type, names)
370
- end
371
- @type = type_schema
353
+ attr_reader :type, :name, :default, :order
354
+
355
+ def initialize(type, name, default=nil, order=nil, names=nil, namespace=nil)
356
+ @type = subparse(type, names, namespace)
372
357
  @name = name
373
358
  @default = default
374
359
  @order = order
375
360
  end
376
361
 
377
- def to_avro
378
- sigh_type = type_from_names ? type.fullname : type.to_avro
379
- hsh = {
380
- 'name' => name,
381
- 'type' => sigh_type
382
- }
383
- hsh['default'] = default if default
384
- hsh['order'] = order if order
385
- hsh
362
+ def to_avro(names=Set.new)
363
+ {'name' => name, 'type' => type.to_avro(names)}.tap do |avro|
364
+ avro['default'] = default if default
365
+ avro['order'] = order if order
366
+ end
386
367
  end
387
368
  end
388
369
  end