avro 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,431 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ module Avro
18
+ class Schema
19
+ # FIXME turn these into symbols to prevent some gc pressure
20
+ PRIMITIVE_TYPES = Set.new(%w[null boolean string bytes int long float double])
21
+ NAMED_TYPES = Set.new(%w[fixed enum record error])
22
+
23
+ VALID_TYPES = PRIMITIVE_TYPES + NAMED_TYPES + Set.new(%w[array map union request])
24
+
25
+ INT_MIN_VALUE = -(1 << 31)
26
+ INT_MAX_VALUE = (1 << 31) - 1
27
+ LONG_MIN_VALUE = -(1 << 63)
28
+ LONG_MAX_VALUE = (1 << 63) - 1
29
+
30
+ def self.parse(json_string)
31
+ real_parse(Yajl.load(json_string), {})
32
+ end
33
+
34
+ # Build Avro Schema from data parsed out of JSON string.
35
+ def self.real_parse(json_obj, names=nil)
36
+ if json_obj.is_a? Hash
37
+ type = json_obj['type']
38
+ if PRIMITIVE_TYPES.include?(type)
39
+ return PrimitiveSchema.new(type)
40
+ elsif NAMED_TYPES.include? type
41
+ name = json_obj['name']
42
+ namespace = json_obj['namespace']
43
+ case type
44
+ when 'fixed'
45
+ size = json_obj['size']
46
+ return FixedSchema.new(name, namespace, size, names)
47
+ when 'enum'
48
+ symbols = json_obj['symbols']
49
+ return EnumSchema.new(name, namespace, symbols, names)
50
+ when 'record', 'error'
51
+ fields = json_obj['fields']
52
+ return RecordSchema.new(name, namespace, fields, names, type)
53
+ else
54
+ raise SchemaParseError.new("Unknown Named Type: #{type}")
55
+ end
56
+ elsif VALID_TYPES.include?(type)
57
+ case type
58
+ when 'array'
59
+ return ArraySchema.new(json_obj['items'], names)
60
+ when 'map'
61
+ return MapSchema.new(json_obj['values'], names)
62
+ else
63
+ raise SchemaParseError.new("Unknown Valid Type: #{type}")
64
+ end
65
+ elsif type.nil?
66
+ raise SchemaParseError.new("No \"type\" property: #{json_obj}")
67
+ else
68
+ raise SchemaParseError.new("Undefined type: #{type}")
69
+ end
70
+ elsif json_obj.is_a? Array
71
+ # JSON array (union)
72
+ return UnionSchema.new(json_obj, names)
73
+ elsif PRIMITIVE_TYPES.include? json_obj
74
+ return PrimitiveSchema.new(json_obj)
75
+ else
76
+ msg = "Could not make an Avro Schema object from #{json_obj}"
77
+ raise SchemaParseError.new(msg)
78
+ end
79
+ end
80
+
81
+ # Determine if a ruby datum is an instance of a schema
82
+ def self.validate(expected_schema, datum)
83
+ case expected_schema.type
84
+ when 'null'
85
+ datum.nil?
86
+ when 'boolean'
87
+ datum == true || datum == false
88
+ when 'string', 'bytes'
89
+ datum.is_a? String
90
+ when 'int'
91
+ (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
92
+ (INT_MIN_VALUE <= datum) && (datum <= INT_MAX_VALUE)
93
+ when 'long'
94
+ (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
95
+ (LONG_MIN_VALUE <= datum) && (datum <= LONG_MAX_VALUE)
96
+ when 'float', 'double'
97
+ datum.is_a?(Float) || datum.is_a?(Fixnum) || datum.is_a?(Bignum)
98
+ when 'fixed'
99
+ datum.is_a?(String) && datum.size == expected_schema.size
100
+ when 'enum'
101
+ expected_schema.symbols.include? datum
102
+ when 'array'
103
+ datum.is_a?(Array) &&
104
+ datum.all?{|d| validate(expected_schema.items, d) }
105
+ when 'map':
106
+ datum.keys.all?{|k| k.is_a? String } &&
107
+ datum.values.all?{|v| validate(expected_schema.values, v) }
108
+ when 'union'
109
+ expected_schema.schemas.any?{|s| validate(s, datum) }
110
+ when 'record', 'error', 'request'
111
+ datum.is_a?(Hash) &&
112
+ expected_schema.fields.all?{|f| validate(f.type, datum[f.name]) }
113
+ else
114
+ raise "you suck #{expected_schema.inspect} is not allowed."
115
+ end
116
+ end
117
+
118
+ def initialize(type)
119
+ @type = type
120
+ end
121
+
122
+ def type; @type; end
123
+
124
+ def ==(other, seen=nil)
125
+ other.is_a?(Schema) && @type == other.type
126
+ end
127
+
128
+ def hash(seen=nil)
129
+ @type.hash
130
+ end
131
+
132
+ def to_hash
133
+ {'type' => @type}
134
+ end
135
+
136
+ def to_s
137
+ Yajl.dump to_hash
138
+ end
139
+
140
+ class NamedSchema < Schema
141
+ attr_reader :name, :namespace
142
+ def initialize(type, name, namespace=nil, names=nil)
143
+ super(type)
144
+ @name, @namespace = Name.extract_namespace(name, namespace)
145
+ names = Name.add_name(names, self)
146
+ end
147
+
148
+ def to_hash
149
+ props = {'name' => @name}
150
+ props.merge!('namespace' => @namespace) if @namespace
151
+ super.merge props
152
+ end
153
+
154
+ def fullname
155
+ Name.make_fullname(@name, @namespace)
156
+ end
157
+ end
158
+
159
+ class RecordSchema < NamedSchema
160
+ attr_reader :fields
161
+
162
+ def self.make_field_objects(field_data, names)
163
+ field_objects, field_names = [], Set.new
164
+ field_data.each_with_index do |field, i|
165
+ if field.respond_to?(:[]) # TODO(jmhodges) wtffffff
166
+ type = field['type']
167
+ name = field['name']
168
+ default = field['default']
169
+ order = field['order']
170
+ new_field = Field.new(type, name, default, order, names)
171
+ # make sure field name has not been used yet
172
+ if field_names.include?(new_field.name)
173
+ raise SchemaParseError, "Field name #{new_field.name.inspect} is already in use"
174
+ end
175
+ field_names << new_field.name
176
+ else
177
+ raise SchemaParseError, "Not a valid field: #{field}"
178
+ end
179
+ field_objects << new_field
180
+ end
181
+ field_objects
182
+ end
183
+
184
+ def initialize(name, namespace, fields, names=nil, schema_type='record')
185
+ if schema_type == 'request'
186
+ @type = schema_type
187
+ else
188
+ super(schema_type, name, namespace, names)
189
+ end
190
+ @fields = RecordSchema.make_field_objects(fields, names)
191
+ end
192
+
193
+ def fields_hash
194
+ fields.inject({}){|hsh, field| hsh[field.name] = field; hsh }
195
+ end
196
+
197
+ def to_hash
198
+ hsh = super.merge('fields' => @fields.map {|f|Yajl.load(f.to_s)} )
199
+ if type == 'request'
200
+ hsh['fields']
201
+ else
202
+ hsh
203
+ end
204
+ end
205
+ end
206
+
207
+ class ArraySchema < Schema
208
+ attr_reader :items, :items_schema_from_names
209
+ def initialize(items, names=nil)
210
+ @items_schema_from_names = false
211
+
212
+ super('array')
213
+
214
+ if items.is_a?(String) && names.has_key?(items)
215
+ @items = names[items]
216
+ @items_schema_from_names = true
217
+ else
218
+ begin
219
+ @items = Schema.real_parse(items, names)
220
+ rescue => e
221
+ msg = "Items schema not a valid Avro schema" + e.to_s
222
+ raise SchemaParseError, msg
223
+ end
224
+ end
225
+ end
226
+
227
+ def to_hash
228
+ name_or_json = if items_schema_from_names
229
+ items.fullname
230
+ else
231
+ Yajl.load(items.to_s)
232
+ end
233
+ super.merge('items' => name_or_json)
234
+ end
235
+ end
236
+
237
+ class MapSchema < Schema
238
+ attr_reader :values, :values_schema_from_names
239
+
240
+ def initialize(values, names=nil)
241
+ @values_schema_from_names = false
242
+ super('map')
243
+ if values.is_a?(String) && names.has_key?(values)
244
+ values_schema = names[values]
245
+ @values_schema_from_names = true
246
+ else
247
+ begin
248
+ values_schema = Schema.real_parse(values, names)
249
+ rescue => e
250
+ raise SchemaParseError.new('Values schema not a valid Avro schema.' + e.to_s)
251
+ end
252
+ end
253
+ @values = values_schema
254
+ end
255
+
256
+ def to_hash
257
+ to_dump = super
258
+ if values_schema_from_names
259
+ to_dump['values'] = values
260
+ else
261
+ to_dump['values'] = Yajl.load(values.to_s)
262
+ end
263
+ to_dump
264
+ end
265
+ end
266
+
267
+ class UnionSchema < Schema
268
+ attr_reader :schemas, :schema_from_names_indices
269
+ def initialize(schemas, names=nil)
270
+ super('union')
271
+
272
+ schema_objects = []
273
+ @schema_from_names_indices = []
274
+ schemas.each_with_index do |schema, i|
275
+ from_names = false
276
+ if schema.is_a?(String) && names.has_key?(schema)
277
+ new_schema = names[schema]
278
+ from_names = true
279
+ else
280
+ begin
281
+ new_schema = Schema.real_parse(schema, names)
282
+ rescue
283
+ raise SchemaParseError, 'Union item must be a valid Avro schema'
284
+ end
285
+ end
286
+
287
+ ns_type = new_schema.type
288
+ if VALID_TYPES.include?(ns_type) &&
289
+ !NAMED_TYPES.include?(ns_type) &&
290
+ schema_objects.map{|o| o.type }.include?(ns_type)
291
+ raise SchemaParseError, "#{ns_type} is already in Union"
292
+ elsif ns_type == 'union'
293
+ raise SchemaParseError, "Unions cannot contain other unions"
294
+ else
295
+ schema_objects << new_schema
296
+ @schema_from_names_indices << i if from_names
297
+ end
298
+ @schemas = schema_objects
299
+ end
300
+ end
301
+
302
+ def to_s
303
+ # FIXME(jmhodges) this from_name pattern is really weird and
304
+ # seems code-smelly.
305
+ to_dump = []
306
+ schemas.each_with_index do |schema, i|
307
+ if schema_from_names_indices.include?(i)
308
+ to_dump << schema.fullname
309
+ else
310
+ to_dump << Yajl.load(schema.to_s)
311
+ end
312
+ end
313
+ Yajl.dump(to_dump)
314
+ end
315
+ end
316
+
317
+ class EnumSchema < NamedSchema
318
+ attr_reader :symbols
319
+ def initialize(name, space, symbols, names=nil)
320
+ if symbols.uniq.length < symbols.length
321
+ fail_msg = 'Duplicate symbol: %s' % symbols
322
+ raise Avro::SchemaParseError, fail_msg
323
+ end
324
+ super('enum', name, space, names)
325
+ @symbols = symbols
326
+ end
327
+
328
+ def to_hash
329
+ super.merge('symbols' => symbols)
330
+ end
331
+ end
332
+
333
+ # Valid primitive types are in PRIMITIVE_TYPES.
334
+ class PrimitiveSchema < Schema
335
+ def initialize(type)
336
+ unless PRIMITIVE_TYPES.include? type
337
+ raise AvroError.new("#{type} is not a valid primitive type.")
338
+ end
339
+
340
+ super(type)
341
+ end
342
+
343
+ def to_s
344
+ to_hash.size == 1 ? type.inspect : Yajl.dump(to_hash)
345
+ end
346
+ end
347
+
348
+ class FixedSchema < NamedSchema
349
+ attr_reader :size
350
+ def initialize(name, space, size, names=nil)
351
+ # Ensure valid cto args
352
+ unless size.is_a?(Fixnum) || size.is_a?(Bignum)
353
+ raise AvroError, 'Fixed Schema requires a valid integer for size property.'
354
+ end
355
+ super('fixed', name, space, names)
356
+ @size = size
357
+ end
358
+
359
+ def to_hash
360
+ super.merge('size' => @size)
361
+ end
362
+ end
363
+
364
+ class Field
365
+ attr_reader :type, :name, :default, :order, :type_from_names
366
+ def initialize(type, name, default=nil, order=nil, names=nil)
367
+ @type_from_names = false
368
+ if type.is_a?(String) && names && names.has_key?(type)
369
+ type_schema = names[type]
370
+ @type_from_names = true
371
+ else
372
+ type_schema = Schema.real_parse(type, names)
373
+ end
374
+ @type = type_schema
375
+ @name = name
376
+ @default = default
377
+ @order = order
378
+ end
379
+
380
+ def to_hash
381
+ sigh_type = type_from_names ? type.fullname : Yajl.load(type.to_s)
382
+ hsh = {
383
+ 'name' => name,
384
+ 'type' => sigh_type
385
+ }
386
+ hsh['default'] = default if default
387
+ hsh['order'] = order if order
388
+ hsh
389
+ end
390
+
391
+ def to_s
392
+ Yajl.dump(to_hash)
393
+ end
394
+ end
395
+ end
396
+
397
+ class SchemaParseError < AvroError; end
398
+
399
+ module Name
400
+ def self.extract_namespace(name, namespace)
401
+ parts = name.split('.')
402
+ if parts.size > 1
403
+ namespace, name = parts[0..-2].join('.'), parts.last
404
+ end
405
+ return name, namespace
406
+ end
407
+
408
+ # Add a new schema object to the names dictionary (in place).
409
+ def self.add_name(names, new_schema)
410
+ new_fullname = new_schema.fullname
411
+ if Avro::Schema::VALID_TYPES.include?(new_fullname)
412
+ raise SchemaParseError, "#{new_fullname} is a reserved type name."
413
+ elsif names.nil?
414
+ names = {}
415
+ elsif names.has_key?(new_fullname)
416
+ raise SchemaParseError, "The name \"#{new_fullname}\" is already in use."
417
+ end
418
+
419
+ names[new_fullname] = new_schema
420
+ names
421
+ end
422
+
423
+ def self.make_fullname(name, namespace)
424
+ if !name.include?('.') && !namespace.nil?
425
+ namespace + '.' + name
426
+ else
427
+ name
428
+ end
429
+ end
430
+ end
431
+ end
@@ -0,0 +1,90 @@
1
+ #!/usr/bin/env ruby
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ class RandomData
19
+ def initialize(schm, seed=nil)
20
+ srand(seed) if seed
21
+ @seed = seed
22
+ @schm = schm
23
+ end
24
+
25
+ def next
26
+ nextdata(@schm)
27
+ end
28
+
29
+ def nextdata(schm, d=0)
30
+ case schm.type
31
+ when 'boolean'
32
+ rand > 0.5
33
+ when 'string'
34
+ randstr()
35
+ when 'int'
36
+ rand(Avro::Schema::INT_MAX_VALUE - Avro::Schema::INT_MIN_VALUE) + Avro::Schema::INT_MIN_VALUE
37
+ when 'long'
38
+ rand(Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) + Avro::Schema::LONG_MIN_VALUE
39
+ when 'float'
40
+ (-1024 + 2048 * rand).round.to_f
41
+ when 'double'
42
+ Avro::Schema::LONG_MIN_VALUE + (Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) * rand
43
+ when 'bytes'
44
+ randstr(BYTEPOOL)
45
+ when 'null'
46
+ nil
47
+ when 'array'
48
+ arr = []
49
+ len = rand(5) + 2 - d
50
+ len = 0 if len < 0
51
+ len.times{ arr << nextdata(schm.items, d+1) }
52
+ arr
53
+ when 'map'
54
+ map = {}
55
+ len = rand(5) + 2 - d
56
+ len = 0 if len < 0
57
+ len.times do
58
+ map[nextdata(Avro::Schema::PrimitiveSchema.new('string'))] = nextdata(schm.values, d+1)
59
+ end
60
+ map
61
+ when 'record'
62
+ m = {}
63
+ schm.fields.each do |field|
64
+ m[field.name] = nextdata(field.type, d+1)
65
+ end
66
+ m
67
+ when 'union'
68
+ types = schm.schemas
69
+ nextdata(types[rand(types.size)], d)
70
+ when 'enum'
71
+ symbols = schm.symbols
72
+ len = symbols.size
73
+ return nil if len == 0
74
+ symbols[rand(len)]
75
+ when 'fixed'
76
+ f = ""
77
+ schm.size.times { f << BYTEPOOL[rand(BYTEPOOL.size), 1] }
78
+ f
79
+ end
80
+ end
81
+
82
+ CHARPOOL = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789'
83
+ BYTEPOOL = '12345abcd'
84
+
85
+ def randstr(chars=CHARPOOL, length=20)
86
+ str = ''
87
+ rand(length+1).times { str << chars[rand(chars.size)] }
88
+ str
89
+ end
90
+ end