avro 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,431 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ module Avro
18
+ class Schema
19
+ # FIXME turn these into symbols to prevent some gc pressure
20
+ PRIMITIVE_TYPES = Set.new(%w[null boolean string bytes int long float double])
21
+ NAMED_TYPES = Set.new(%w[fixed enum record error])
22
+
23
+ VALID_TYPES = PRIMITIVE_TYPES + NAMED_TYPES + Set.new(%w[array map union request])
24
+
25
+ INT_MIN_VALUE = -(1 << 31)
26
+ INT_MAX_VALUE = (1 << 31) - 1
27
+ LONG_MIN_VALUE = -(1 << 63)
28
+ LONG_MAX_VALUE = (1 << 63) - 1
29
+
30
+ def self.parse(json_string)
31
+ real_parse(Yajl.load(json_string), {})
32
+ end
33
+
34
+ # Build Avro Schema from data parsed out of JSON string.
35
+ def self.real_parse(json_obj, names=nil)
36
+ if json_obj.is_a? Hash
37
+ type = json_obj['type']
38
+ if PRIMITIVE_TYPES.include?(type)
39
+ return PrimitiveSchema.new(type)
40
+ elsif NAMED_TYPES.include? type
41
+ name = json_obj['name']
42
+ namespace = json_obj['namespace']
43
+ case type
44
+ when 'fixed'
45
+ size = json_obj['size']
46
+ return FixedSchema.new(name, namespace, size, names)
47
+ when 'enum'
48
+ symbols = json_obj['symbols']
49
+ return EnumSchema.new(name, namespace, symbols, names)
50
+ when 'record', 'error'
51
+ fields = json_obj['fields']
52
+ return RecordSchema.new(name, namespace, fields, names, type)
53
+ else
54
+ raise SchemaParseError.new("Unknown Named Type: #{type}")
55
+ end
56
+ elsif VALID_TYPES.include?(type)
57
+ case type
58
+ when 'array'
59
+ return ArraySchema.new(json_obj['items'], names)
60
+ when 'map'
61
+ return MapSchema.new(json_obj['values'], names)
62
+ else
63
+ raise SchemaParseError.new("Unknown Valid Type: #{type}")
64
+ end
65
+ elsif type.nil?
66
+ raise SchemaParseError.new("No \"type\" property: #{json_obj}")
67
+ else
68
+ raise SchemaParseError.new("Undefined type: #{type}")
69
+ end
70
+ elsif json_obj.is_a? Array
71
+ # JSON array (union)
72
+ return UnionSchema.new(json_obj, names)
73
+ elsif PRIMITIVE_TYPES.include? json_obj
74
+ return PrimitiveSchema.new(json_obj)
75
+ else
76
+ msg = "Could not make an Avro Schema object from #{json_obj}"
77
+ raise SchemaParseError.new(msg)
78
+ end
79
+ end
80
+
81
+ # Determine if a ruby datum is an instance of a schema
82
+ def self.validate(expected_schema, datum)
83
+ case expected_schema.type
84
+ when 'null'
85
+ datum.nil?
86
+ when 'boolean'
87
+ datum == true || datum == false
88
+ when 'string', 'bytes'
89
+ datum.is_a? String
90
+ when 'int'
91
+ (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
92
+ (INT_MIN_VALUE <= datum) && (datum <= INT_MAX_VALUE)
93
+ when 'long'
94
+ (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
95
+ (LONG_MIN_VALUE <= datum) && (datum <= LONG_MAX_VALUE)
96
+ when 'float', 'double'
97
+ datum.is_a?(Float) || datum.is_a?(Fixnum) || datum.is_a?(Bignum)
98
+ when 'fixed'
99
+ datum.is_a?(String) && datum.size == expected_schema.size
100
+ when 'enum'
101
+ expected_schema.symbols.include? datum
102
+ when 'array'
103
+ datum.is_a?(Array) &&
104
+ datum.all?{|d| validate(expected_schema.items, d) }
105
+ when 'map':
106
+ datum.keys.all?{|k| k.is_a? String } &&
107
+ datum.values.all?{|v| validate(expected_schema.values, v) }
108
+ when 'union'
109
+ expected_schema.schemas.any?{|s| validate(s, datum) }
110
+ when 'record', 'error', 'request'
111
+ datum.is_a?(Hash) &&
112
+ expected_schema.fields.all?{|f| validate(f.type, datum[f.name]) }
113
+ else
114
+ raise "you suck #{expected_schema.inspect} is not allowed."
115
+ end
116
+ end
117
+
118
+ def initialize(type)
119
+ @type = type
120
+ end
121
+
122
+ def type; @type; end
123
+
124
+ def ==(other, seen=nil)
125
+ other.is_a?(Schema) && @type == other.type
126
+ end
127
+
128
+ def hash(seen=nil)
129
+ @type.hash
130
+ end
131
+
132
+ def to_hash
133
+ {'type' => @type}
134
+ end
135
+
136
+ def to_s
137
+ Yajl.dump to_hash
138
+ end
139
+
140
+ class NamedSchema < Schema
141
+ attr_reader :name, :namespace
142
+ def initialize(type, name, namespace=nil, names=nil)
143
+ super(type)
144
+ @name, @namespace = Name.extract_namespace(name, namespace)
145
+ names = Name.add_name(names, self)
146
+ end
147
+
148
+ def to_hash
149
+ props = {'name' => @name}
150
+ props.merge!('namespace' => @namespace) if @namespace
151
+ super.merge props
152
+ end
153
+
154
+ def fullname
155
+ Name.make_fullname(@name, @namespace)
156
+ end
157
+ end
158
+
159
+ class RecordSchema < NamedSchema
160
+ attr_reader :fields
161
+
162
+ def self.make_field_objects(field_data, names)
163
+ field_objects, field_names = [], Set.new
164
+ field_data.each_with_index do |field, i|
165
+ if field.respond_to?(:[]) # TODO(jmhodges) wtffffff
166
+ type = field['type']
167
+ name = field['name']
168
+ default = field['default']
169
+ order = field['order']
170
+ new_field = Field.new(type, name, default, order, names)
171
+ # make sure field name has not been used yet
172
+ if field_names.include?(new_field.name)
173
+ raise SchemaParseError, "Field name #{new_field.name.inspect} is already in use"
174
+ end
175
+ field_names << new_field.name
176
+ else
177
+ raise SchemaParseError, "Not a valid field: #{field}"
178
+ end
179
+ field_objects << new_field
180
+ end
181
+ field_objects
182
+ end
183
+
184
+ def initialize(name, namespace, fields, names=nil, schema_type='record')
185
+ if schema_type == 'request'
186
+ @type = schema_type
187
+ else
188
+ super(schema_type, name, namespace, names)
189
+ end
190
+ @fields = RecordSchema.make_field_objects(fields, names)
191
+ end
192
+
193
+ def fields_hash
194
+ fields.inject({}){|hsh, field| hsh[field.name] = field; hsh }
195
+ end
196
+
197
+ def to_hash
198
+ hsh = super.merge('fields' => @fields.map {|f|Yajl.load(f.to_s)} )
199
+ if type == 'request'
200
+ hsh['fields']
201
+ else
202
+ hsh
203
+ end
204
+ end
205
+ end
206
+
207
+ class ArraySchema < Schema
208
+ attr_reader :items, :items_schema_from_names
209
+ def initialize(items, names=nil)
210
+ @items_schema_from_names = false
211
+
212
+ super('array')
213
+
214
+ if items.is_a?(String) && names.has_key?(items)
215
+ @items = names[items]
216
+ @items_schema_from_names = true
217
+ else
218
+ begin
219
+ @items = Schema.real_parse(items, names)
220
+ rescue => e
221
+ msg = "Items schema not a valid Avro schema" + e.to_s
222
+ raise SchemaParseError, msg
223
+ end
224
+ end
225
+ end
226
+
227
+ def to_hash
228
+ name_or_json = if items_schema_from_names
229
+ items.fullname
230
+ else
231
+ Yajl.load(items.to_s)
232
+ end
233
+ super.merge('items' => name_or_json)
234
+ end
235
+ end
236
+
237
+ class MapSchema < Schema
238
+ attr_reader :values, :values_schema_from_names
239
+
240
+ def initialize(values, names=nil)
241
+ @values_schema_from_names = false
242
+ super('map')
243
+ if values.is_a?(String) && names.has_key?(values)
244
+ values_schema = names[values]
245
+ @values_schema_from_names = true
246
+ else
247
+ begin
248
+ values_schema = Schema.real_parse(values, names)
249
+ rescue => e
250
+ raise SchemaParseError.new('Values schema not a valid Avro schema.' + e.to_s)
251
+ end
252
+ end
253
+ @values = values_schema
254
+ end
255
+
256
+ def to_hash
257
+ to_dump = super
258
+ if values_schema_from_names
259
+ to_dump['values'] = values
260
+ else
261
+ to_dump['values'] = Yajl.load(values.to_s)
262
+ end
263
+ to_dump
264
+ end
265
+ end
266
+
267
+ class UnionSchema < Schema
268
+ attr_reader :schemas, :schema_from_names_indices
269
+ def initialize(schemas, names=nil)
270
+ super('union')
271
+
272
+ schema_objects = []
273
+ @schema_from_names_indices = []
274
+ schemas.each_with_index do |schema, i|
275
+ from_names = false
276
+ if schema.is_a?(String) && names.has_key?(schema)
277
+ new_schema = names[schema]
278
+ from_names = true
279
+ else
280
+ begin
281
+ new_schema = Schema.real_parse(schema, names)
282
+ rescue
283
+ raise SchemaParseError, 'Union item must be a valid Avro schema'
284
+ end
285
+ end
286
+
287
+ ns_type = new_schema.type
288
+ if VALID_TYPES.include?(ns_type) &&
289
+ !NAMED_TYPES.include?(ns_type) &&
290
+ schema_objects.map{|o| o.type }.include?(ns_type)
291
+ raise SchemaParseError, "#{ns_type} is already in Union"
292
+ elsif ns_type == 'union'
293
+ raise SchemaParseError, "Unions cannot contain other unions"
294
+ else
295
+ schema_objects << new_schema
296
+ @schema_from_names_indices << i if from_names
297
+ end
298
+ @schemas = schema_objects
299
+ end
300
+ end
301
+
302
+ def to_s
303
+ # FIXME(jmhodges) this from_name pattern is really weird and
304
+ # seems code-smelly.
305
+ to_dump = []
306
+ schemas.each_with_index do |schema, i|
307
+ if schema_from_names_indices.include?(i)
308
+ to_dump << schema.fullname
309
+ else
310
+ to_dump << Yajl.load(schema.to_s)
311
+ end
312
+ end
313
+ Yajl.dump(to_dump)
314
+ end
315
+ end
316
+
317
+ class EnumSchema < NamedSchema
318
+ attr_reader :symbols
319
+ def initialize(name, space, symbols, names=nil)
320
+ if symbols.uniq.length < symbols.length
321
+ fail_msg = 'Duplicate symbol: %s' % symbols
322
+ raise Avro::SchemaParseError, fail_msg
323
+ end
324
+ super('enum', name, space, names)
325
+ @symbols = symbols
326
+ end
327
+
328
+ def to_hash
329
+ super.merge('symbols' => symbols)
330
+ end
331
+ end
332
+
333
+ # Valid primitive types are in PRIMITIVE_TYPES.
334
+ class PrimitiveSchema < Schema
335
+ def initialize(type)
336
+ unless PRIMITIVE_TYPES.include? type
337
+ raise AvroError.new("#{type} is not a valid primitive type.")
338
+ end
339
+
340
+ super(type)
341
+ end
342
+
343
+ def to_s
344
+ to_hash.size == 1 ? type.inspect : Yajl.dump(to_hash)
345
+ end
346
+ end
347
+
348
+ class FixedSchema < NamedSchema
349
+ attr_reader :size
350
+ def initialize(name, space, size, names=nil)
351
+ # Ensure valid cto args
352
+ unless size.is_a?(Fixnum) || size.is_a?(Bignum)
353
+ raise AvroError, 'Fixed Schema requires a valid integer for size property.'
354
+ end
355
+ super('fixed', name, space, names)
356
+ @size = size
357
+ end
358
+
359
+ def to_hash
360
+ super.merge('size' => @size)
361
+ end
362
+ end
363
+
364
+ class Field
365
+ attr_reader :type, :name, :default, :order, :type_from_names
366
+ def initialize(type, name, default=nil, order=nil, names=nil)
367
+ @type_from_names = false
368
+ if type.is_a?(String) && names && names.has_key?(type)
369
+ type_schema = names[type]
370
+ @type_from_names = true
371
+ else
372
+ type_schema = Schema.real_parse(type, names)
373
+ end
374
+ @type = type_schema
375
+ @name = name
376
+ @default = default
377
+ @order = order
378
+ end
379
+
380
+ def to_hash
381
+ sigh_type = type_from_names ? type.fullname : Yajl.load(type.to_s)
382
+ hsh = {
383
+ 'name' => name,
384
+ 'type' => sigh_type
385
+ }
386
+ hsh['default'] = default if default
387
+ hsh['order'] = order if order
388
+ hsh
389
+ end
390
+
391
+ def to_s
392
+ Yajl.dump(to_hash)
393
+ end
394
+ end
395
+ end
396
+
397
+ class SchemaParseError < AvroError; end
398
+
399
+ module Name
400
+ def self.extract_namespace(name, namespace)
401
+ parts = name.split('.')
402
+ if parts.size > 1
403
+ namespace, name = parts[0..-2].join('.'), parts.last
404
+ end
405
+ return name, namespace
406
+ end
407
+
408
+ # Add a new schema object to the names dictionary (in place).
409
+ def self.add_name(names, new_schema)
410
+ new_fullname = new_schema.fullname
411
+ if Avro::Schema::VALID_TYPES.include?(new_fullname)
412
+ raise SchemaParseError, "#{new_fullname} is a reserved type name."
413
+ elsif names.nil?
414
+ names = {}
415
+ elsif names.has_key?(new_fullname)
416
+ raise SchemaParseError, "The name \"#{new_fullname}\" is already in use."
417
+ end
418
+
419
+ names[new_fullname] = new_schema
420
+ names
421
+ end
422
+
423
+ def self.make_fullname(name, namespace)
424
+ if !name.include?('.') && !namespace.nil?
425
+ namespace + '.' + name
426
+ else
427
+ name
428
+ end
429
+ end
430
+ end
431
+ end
@@ -0,0 +1,90 @@
1
+ #!/usr/bin/env ruby
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ class RandomData
19
+ def initialize(schm, seed=nil)
20
+ srand(seed) if seed
21
+ @seed = seed
22
+ @schm = schm
23
+ end
24
+
25
+ def next
26
+ nextdata(@schm)
27
+ end
28
+
29
+ def nextdata(schm, d=0)
30
+ case schm.type
31
+ when 'boolean'
32
+ rand > 0.5
33
+ when 'string'
34
+ randstr()
35
+ when 'int'
36
+ rand(Avro::Schema::INT_MAX_VALUE - Avro::Schema::INT_MIN_VALUE) + Avro::Schema::INT_MIN_VALUE
37
+ when 'long'
38
+ rand(Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) + Avro::Schema::LONG_MIN_VALUE
39
+ when 'float'
40
+ (-1024 + 2048 * rand).round.to_f
41
+ when 'double'
42
+ Avro::Schema::LONG_MIN_VALUE + (Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) * rand
43
+ when 'bytes'
44
+ randstr(BYTEPOOL)
45
+ when 'null'
46
+ nil
47
+ when 'array'
48
+ arr = []
49
+ len = rand(5) + 2 - d
50
+ len = 0 if len < 0
51
+ len.times{ arr << nextdata(schm.items, d+1) }
52
+ arr
53
+ when 'map'
54
+ map = {}
55
+ len = rand(5) + 2 - d
56
+ len = 0 if len < 0
57
+ len.times do
58
+ map[nextdata(Avro::Schema::PrimitiveSchema.new('string'))] = nextdata(schm.values, d+1)
59
+ end
60
+ map
61
+ when 'record'
62
+ m = {}
63
+ schm.fields.each do |field|
64
+ m[field.name] = nextdata(field.type, d+1)
65
+ end
66
+ m
67
+ when 'union'
68
+ types = schm.schemas
69
+ nextdata(types[rand(types.size)], d)
70
+ when 'enum'
71
+ symbols = schm.symbols
72
+ len = symbols.size
73
+ return nil if len == 0
74
+ symbols[rand(len)]
75
+ when 'fixed'
76
+ f = ""
77
+ schm.size.times { f << BYTEPOOL[rand(BYTEPOOL.size), 1] }
78
+ f
79
+ end
80
+ end
81
+
82
+ CHARPOOL = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789'
83
+ BYTEPOOL = '12345abcd'
84
+
85
+ def randstr(chars=CHARPOOL, length=20)
86
+ str = ''
87
+ rand(length+1).times { str << chars[rand(chars.size)] }
88
+ str
89
+ end
90
+ end