avro-jruby 1.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,161 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ module Avro
18
+ class Protocol
19
+ VALID_TYPE_SCHEMA_TYPES = Set.new(%w[enum record error fixed])
20
+ VALID_TYPE_SCHEMA_TYPES_SYM = Set.new(VALID_TYPE_SCHEMA_TYPES.map(&:to_sym))
21
+ class ProtocolParseError < Avro::AvroError; end
22
+
23
+ attr_reader :name, :namespace, :types, :messages, :md5
24
+ def self.parse(protocol_string)
25
+ json_data = MultiJson.load(protocol_string)
26
+
27
+ if json_data.is_a? Hash
28
+ name = json_data['protocol']
29
+ namespace = json_data['namespace']
30
+ types = json_data['types']
31
+ messages = json_data['messages']
32
+ Protocol.new(name, namespace, types, messages)
33
+ else
34
+ raise ProtocolParseError, "Not a JSON object: #{json_data}"
35
+ end
36
+ end
37
+
38
+ def initialize(name, namespace=nil, types=nil, messages=nil)
39
+ # Ensure valid ctor args
40
+ if !name
41
+ raise ProtocolParseError, 'Protocols must have a non-empty name.'
42
+ elsif !name.is_a?(String)
43
+ raise ProtocolParseError, 'The name property must be a string.'
44
+ elsif !namespace.is_a?(String)
45
+ raise ProtocolParseError, 'The namespace property must be a string.'
46
+ elsif !types.is_a?(Array)
47
+ raise ProtocolParseError, 'The types property must be a list.'
48
+ elsif !messages.is_a?(Hash)
49
+ raise ProtocolParseError, 'The messages property must be a JSON object.'
50
+ end
51
+
52
+ @name = name
53
+ @namespace = namespace
54
+ type_names = {}
55
+ @types = parse_types(types, type_names)
56
+ @messages = parse_messages(messages, type_names)
57
+ @md5 = Digest::MD5.digest(to_s)
58
+ end
59
+
60
+ def to_s
61
+ MultiJson.dump to_avro
62
+ end
63
+
64
+ def ==(other)
65
+ to_avro == other.to_avro
66
+ end
67
+
68
+ private
69
+ def parse_types(types, type_names)
70
+ type_objects = []
71
+ types.collect do |type|
72
+ # FIXME adding type.name to type_names is not defined in the
73
+ # spec. Possible bug in the python impl and the spec.
74
+ type_object = Schema.real_parse(type, type_names, namespace)
75
+ unless VALID_TYPE_SCHEMA_TYPES_SYM.include?(type_object.type_sym)
76
+ msg = "Type #{type} not an enum, record, fixed or error."
77
+ raise ProtocolParseError, msg
78
+ end
79
+ type_object
80
+ end
81
+ end
82
+
83
+ def parse_messages(messages, names)
84
+ message_objects = {}
85
+ messages.each do |name, body|
86
+ if message_objects.has_key?(name)
87
+ raise ProtocolParseError, "Message name \"#{name}\" repeated."
88
+ elsif !body.is_a?(Hash)
89
+ raise ProtocolParseError, "Message name \"#{name}\" has non-object body #{body.inspect}"
90
+ end
91
+
92
+ request = body['request']
93
+ response = body['response']
94
+ errors = body['errors']
95
+ message_objects[name] = Message.new(name, request, response, errors, names, namespace)
96
+ end
97
+ message_objects
98
+ end
99
+
100
+ protected
101
+ def to_avro(names=Set.new)
102
+ hsh = {'protocol' => name}
103
+ hsh['namespace'] = namespace if namespace
104
+ hsh['types'] = types.map{|t| t.to_avro(names) } if types
105
+
106
+ if messages
107
+ hsh['messages'] = messages.collect_hash{|k,t| [k, t.to_avro(names)] }
108
+ end
109
+
110
+ hsh
111
+ end
112
+
113
+ class Message
114
+ attr_reader :name, :request, :response, :errors, :default_namespace
115
+
116
+ def initialize(name, request, response, errors=nil, names=nil, default_namespace=nil)
117
+ @name = name
118
+ @default_namespace = default_namespace
119
+ @request = parse_request(request, names)
120
+ @response = parse_response(response, names)
121
+ @errors = parse_errors(errors, names) if errors
122
+ end
123
+
124
+ def to_avro(names=Set.new)
125
+ {
126
+ 'request' => request.to_avro(names),
127
+ 'response' => response.to_avro(names)
128
+ }.tap do |hash|
129
+ hash['errors'] = errors.to_avro(names) if errors
130
+ end
131
+ end
132
+
133
+ def to_s
134
+ Yajl.dump to_avro
135
+ end
136
+
137
+ def parse_request(request, names)
138
+ unless request.is_a?(Array)
139
+ raise ProtocolParseError, "Request property not an Array: #{request.inspect}"
140
+ end
141
+ Schema::RecordSchema.new(nil, default_namespace, request, names, :request)
142
+ end
143
+
144
+ def parse_response(response, names)
145
+ if response.is_a?(String) && names
146
+ fullname = Name.make_fullname(response, default_namespace)
147
+ return names[fullname] if names.include?(fullname)
148
+ end
149
+
150
+ Schema.real_parse(response, names, default_namespace)
151
+ end
152
+
153
+ def parse_errors(errors, names)
154
+ unless errors.is_a?(Array)
155
+ raise ProtocolParseError, "Errors property not an Array: #{errors}"
156
+ end
157
+ Schema.real_parse(errors, names, default_namespace)
158
+ end
159
+ end
160
+ end
161
+ end
@@ -0,0 +1,405 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ module Avro
18
+ class Schema
19
+ # Sets of strings, for backwards compatibility. See below for sets of symbols,
20
+ # for better performance.
21
+ PRIMITIVE_TYPES = Set.new(%w[null boolean string bytes int long float double])
22
+ NAMED_TYPES = Set.new(%w[fixed enum record error])
23
+
24
+ VALID_TYPES = PRIMITIVE_TYPES + NAMED_TYPES + Set.new(%w[array map union request])
25
+
26
+ PRIMITIVE_TYPES_SYM = Set.new(PRIMITIVE_TYPES.map(&:to_sym))
27
+ NAMED_TYPES_SYM = Set.new(NAMED_TYPES.map(&:to_sym))
28
+ VALID_TYPES_SYM = Set.new(VALID_TYPES.map(&:to_sym))
29
+
30
+ INT_MIN_VALUE = -(1 << 31)
31
+ INT_MAX_VALUE = (1 << 31) - 1
32
+ LONG_MIN_VALUE = -(1 << 63)
33
+ LONG_MAX_VALUE = (1 << 63) - 1
34
+
35
+ def self.parse(json_string)
36
+ real_parse(MultiJson.load(json_string), {})
37
+ end
38
+
39
+ # Build Avro Schema from data parsed out of JSON string.
40
+ def self.real_parse(json_obj, names=nil, default_namespace=nil)
41
+ if json_obj.is_a? Hash
42
+ type = json_obj['type']
43
+ raise SchemaParseError, %Q(No "type" property: #{json_obj}) if type.nil?
44
+
45
+ # Check that the type is valid before calling #to_sym, since symbols are never garbage
46
+ # collected (important to avoid DoS if we're accepting schemas from untrusted clients)
47
+ unless VALID_TYPES.include?(type)
48
+ raise SchemaParseError, "Unknown type: #{type}"
49
+ end
50
+
51
+ type_sym = type.to_sym
52
+ if PRIMITIVE_TYPES_SYM.include?(type_sym)
53
+ return PrimitiveSchema.new(type_sym)
54
+
55
+ elsif NAMED_TYPES_SYM.include? type_sym
56
+ name = json_obj['name']
57
+ namespace = json_obj.include?('namespace') ? json_obj['namespace'] : default_namespace
58
+ case type_sym
59
+ when :fixed
60
+ size = json_obj['size']
61
+ return FixedSchema.new(name, namespace, size, names)
62
+ when :enum
63
+ symbols = json_obj['symbols']
64
+ return EnumSchema.new(name, namespace, symbols, names)
65
+ when :record, :error
66
+ fields = json_obj['fields']
67
+ return RecordSchema.new(name, namespace, fields, names, type_sym)
68
+ else
69
+ raise SchemaParseError.new("Unknown named type: #{type}")
70
+ end
71
+
72
+ else
73
+ case type_sym
74
+ when :array
75
+ return ArraySchema.new(json_obj['items'], names, default_namespace)
76
+ when :map
77
+ return MapSchema.new(json_obj['values'], names, default_namespace)
78
+ else
79
+ raise SchemaParseError.new("Unknown Valid Type: #{type}")
80
+ end
81
+ end
82
+
83
+ elsif json_obj.is_a? Array
84
+ # JSON array (union)
85
+ return UnionSchema.new(json_obj, names, default_namespace)
86
+ elsif PRIMITIVE_TYPES.include? json_obj
87
+ return PrimitiveSchema.new(json_obj)
88
+ else
89
+ msg = "#{json_obj.inspect} is not a schema we know about."
90
+ raise SchemaParseError.new(msg)
91
+ end
92
+ end
93
+
94
+ # Determine if a ruby datum is an instance of a schema
95
+ def self.validate(expected_schema, datum)
96
+ case expected_schema.type_sym
97
+ when :null
98
+ datum.nil?
99
+ when :boolean
100
+ datum == true || datum == false
101
+ when :string, :bytes
102
+ datum.is_a? String
103
+ when :int
104
+ (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
105
+ (INT_MIN_VALUE <= datum) && (datum <= INT_MAX_VALUE)
106
+ when :long
107
+ (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
108
+ (LONG_MIN_VALUE <= datum) && (datum <= LONG_MAX_VALUE)
109
+ when :float, :double
110
+ datum.is_a?(Float) || datum.is_a?(Fixnum) || datum.is_a?(Bignum)
111
+ when :fixed
112
+ datum.is_a?(String) && datum.size == expected_schema.size
113
+ when :enum
114
+ expected_schema.symbols.include? datum
115
+ when :array
116
+ datum.is_a?(Array) &&
117
+ datum.all?{|d| validate(expected_schema.items, d) }
118
+ when :map
119
+ datum.keys.all?{|k| k.is_a? String } &&
120
+ datum.values.all?{|v| validate(expected_schema.values, v) }
121
+ when :union
122
+ expected_schema.schemas.any?{|s| validate(s, datum) }
123
+ when :record, :error, :request
124
+ datum.is_a?(Hash) &&
125
+ expected_schema.fields.all?{|f| validate(f.type, datum[f.name]) }
126
+ else
127
+ raise "you suck #{expected_schema.inspect} is not allowed."
128
+ end
129
+ end
130
+
131
+ def initialize(type)
132
+ @type_sym = type.is_a?(Symbol) ? type : type.to_sym
133
+ end
134
+
135
+ attr_reader :type_sym
136
+
137
+ # Returns the type as a string (rather than a symbol), for backwards compatibility.
138
+ # Deprecated in favor of {#type_sym}.
139
+ def type; @type_sym.to_s; end
140
+
141
+ def ==(other, seen=nil)
142
+ other.is_a?(Schema) && type_sym == other.type_sym
143
+ end
144
+
145
+ def hash(seen=nil)
146
+ type_sym.hash
147
+ end
148
+
149
+ def subparse(json_obj, names=nil, namespace=nil)
150
+ if json_obj.is_a?(String) && names
151
+ fullname = Name.make_fullname(json_obj, namespace)
152
+ return names[fullname] if names.include?(fullname)
153
+ end
154
+
155
+ begin
156
+ Schema.real_parse(json_obj, names, namespace)
157
+ rescue => e
158
+ raise e if e.is_a? SchemaParseError
159
+ raise SchemaParseError, "Sub-schema for #{self.class.name} not a valid Avro schema. Bad schema: #{json_obj}"
160
+ end
161
+ end
162
+
163
+ def to_avro(names=nil)
164
+ {'type' => type}
165
+ end
166
+
167
+ def to_s
168
+ MultiJson.dump to_avro
169
+ end
170
+
171
+ class NamedSchema < Schema
172
+ attr_reader :name, :namespace
173
+ def initialize(type, name, namespace=nil, names=nil)
174
+ super(type)
175
+ @name, @namespace = Name.extract_namespace(name, namespace)
176
+ names = Name.add_name(names, self)
177
+ end
178
+
179
+ def to_avro(names=Set.new)
180
+ if @name
181
+ return fullname if names.include?(fullname)
182
+ names << fullname
183
+ end
184
+ props = {'name' => @name}
185
+ props.merge!('namespace' => @namespace) if @namespace
186
+ super.merge props
187
+ end
188
+
189
+ def fullname
190
+ @fullname ||= Name.make_fullname(@name, @namespace)
191
+ end
192
+ end
193
+
194
+ class RecordSchema < NamedSchema
195
+ attr_reader :fields
196
+
197
+ def self.make_field_objects(field_data, names, namespace=nil)
198
+ field_objects, field_names = [], Set.new
199
+ field_data.each_with_index do |field, i|
200
+ if field.respond_to?(:[]) # TODO(jmhodges) wtffffff
201
+ type = field['type']
202
+ name = field['name']
203
+ default = field['default']
204
+ order = field['order']
205
+ new_field = Field.new(type, name, default, order, names, namespace)
206
+ # make sure field name has not been used yet
207
+ if field_names.include?(new_field.name)
208
+ raise SchemaParseError, "Field name #{new_field.name.inspect} is already in use"
209
+ end
210
+ field_names << new_field.name
211
+ else
212
+ raise SchemaParseError, "Not a valid field: #{field}"
213
+ end
214
+ field_objects << new_field
215
+ end
216
+ field_objects
217
+ end
218
+
219
+ def initialize(name, namespace, fields, names=nil, schema_type=:record)
220
+ if schema_type == :request || schema_type == 'request'
221
+ @type_sym = schema_type.to_sym
222
+ @namespace = namespace
223
+ else
224
+ super(schema_type, name, namespace, names)
225
+ end
226
+ @fields = RecordSchema.make_field_objects(fields, names, self.namespace)
227
+ end
228
+
229
+ def fields_hash
230
+ @fields_hash ||= fields.inject({}){|hsh, field| hsh[field.name] = field; hsh }
231
+ end
232
+
233
+ def to_avro(names=Set.new)
234
+ hsh = super
235
+ return hsh unless hsh.is_a?(Hash)
236
+ hsh['fields'] = @fields.map {|f| f.to_avro(names) }
237
+ if type_sym == :request
238
+ hsh['fields']
239
+ else
240
+ hsh
241
+ end
242
+ end
243
+ end
244
+
245
+ class ArraySchema < Schema
246
+ attr_reader :items
247
+
248
+ def initialize(items, names=nil, default_namespace=nil)
249
+ super(:array)
250
+ @items = subparse(items, names, default_namespace)
251
+ end
252
+
253
+ def to_avro(names=Set.new)
254
+ super.merge('items' => items.to_avro(names))
255
+ end
256
+ end
257
+
258
+ class MapSchema < Schema
259
+ attr_reader :values
260
+
261
+ def initialize(values, names=nil, default_namespace=nil)
262
+ super(:map)
263
+ @values = subparse(values, names, default_namespace)
264
+ end
265
+
266
+ def to_avro(names=Set.new)
267
+ super.merge('values' => values.to_avro(names))
268
+ end
269
+ end
270
+
271
+ class UnionSchema < Schema
272
+ attr_reader :schemas
273
+
274
+ def initialize(schemas, names=nil, default_namespace=nil)
275
+ super(:union)
276
+
277
+ schema_objects = []
278
+ schemas.each_with_index do |schema, i|
279
+ new_schema = subparse(schema, names, default_namespace)
280
+ ns_type = new_schema.type_sym
281
+
282
+ if VALID_TYPES_SYM.include?(ns_type) &&
283
+ !NAMED_TYPES_SYM.include?(ns_type) &&
284
+ schema_objects.any?{|o| o.type_sym == ns_type }
285
+ raise SchemaParseError, "#{ns_type} is already in Union"
286
+ elsif ns_type == :union
287
+ raise SchemaParseError, "Unions cannot contain other unions"
288
+ else
289
+ schema_objects << new_schema
290
+ end
291
+ @schemas = schema_objects
292
+ end
293
+ end
294
+
295
+ def to_avro(names=Set.new)
296
+ schemas.map {|schema| schema.to_avro(names) }
297
+ end
298
+ end
299
+
300
+ class EnumSchema < NamedSchema
301
+ attr_reader :symbols
302
+ def initialize(name, space, symbols, names=nil)
303
+ if symbols.uniq.length < symbols.length
304
+ fail_msg = 'Duplicate symbol: %s' % symbols
305
+ raise Avro::SchemaParseError, fail_msg
306
+ end
307
+ super(:enum, name, space, names)
308
+ @symbols = symbols
309
+ end
310
+
311
+ def to_avro(names=Set.new)
312
+ avro = super
313
+ avro.is_a?(Hash) ? avro.merge('symbols' => symbols) : avro
314
+ end
315
+ end
316
+
317
+ # Valid primitive types are in PRIMITIVE_TYPES.
318
+ class PrimitiveSchema < Schema
319
+ def initialize(type)
320
+ if PRIMITIVE_TYPES_SYM.include?(type)
321
+ super(type)
322
+ elsif PRIMITIVE_TYPES.include?(type)
323
+ super(type.to_sym)
324
+ else
325
+ raise AvroError.new("#{type} is not a valid primitive type.")
326
+ end
327
+ end
328
+
329
+ def to_avro(names=nil)
330
+ hsh = super
331
+ hsh.size == 1 ? type : hsh
332
+ end
333
+ end
334
+
335
+ class FixedSchema < NamedSchema
336
+ attr_reader :size
337
+ def initialize(name, space, size, names=nil)
338
+ # Ensure valid cto args
339
+ unless size.is_a?(Fixnum) || size.is_a?(Bignum)
340
+ raise AvroError, 'Fixed Schema requires a valid integer for size property.'
341
+ end
342
+ super(:fixed, name, space, names)
343
+ @size = size
344
+ end
345
+
346
+ def to_avro(names=Set.new)
347
+ avro = super
348
+ avro.is_a?(Hash) ? avro.merge('size' => size) : avro
349
+ end
350
+ end
351
+
352
+ class Field < Schema
353
+ attr_reader :type, :name, :default, :order
354
+
355
+ def initialize(type, name, default=nil, order=nil, names=nil, namespace=nil)
356
+ @type = subparse(type, names, namespace)
357
+ @name = name
358
+ @default = default
359
+ @order = order
360
+ end
361
+
362
+ def to_avro(names=Set.new)
363
+ {'name' => name, 'type' => type.to_avro(names)}.tap do |avro|
364
+ avro['default'] = default if default
365
+ avro['order'] = order if order
366
+ end
367
+ end
368
+ end
369
+ end
370
+
371
+ class SchemaParseError < AvroError; end
372
+
373
+ module Name
374
+ def self.extract_namespace(name, namespace)
375
+ parts = name.split('.')
376
+ if parts.size > 1
377
+ namespace, name = parts[0..-2].join('.'), parts.last
378
+ end
379
+ return name, namespace
380
+ end
381
+
382
+ # Add a new schema object to the names dictionary (in place).
383
+ def self.add_name(names, new_schema)
384
+ new_fullname = new_schema.fullname
385
+ if Avro::Schema::VALID_TYPES.include?(new_fullname)
386
+ raise SchemaParseError, "#{new_fullname} is a reserved type name."
387
+ elsif names.nil?
388
+ names = {}
389
+ elsif names.has_key?(new_fullname)
390
+ raise SchemaParseError, "The name \"#{new_fullname}\" is already in use."
391
+ end
392
+
393
+ names[new_fullname] = new_schema
394
+ names
395
+ end
396
+
397
+ def self.make_fullname(name, namespace)
398
+ if !name.include?('.') && !namespace.nil?
399
+ namespace + '.' + name
400
+ else
401
+ name
402
+ end
403
+ end
404
+ end
405
+ end