tros 1.7.6.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,610 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ module Tros
18
+ module IO
19
+ # Raised when datum is not an example of schema
20
+ class AvroTypeError < TrosError
21
+ def initialize(expected_schema, datum)
22
+ super("The datum #{datum.inspect} is not an example of schema #{expected_schema}")
23
+ end
24
+ end
25
+
26
+ # Raised when writer's and reader's schema do not match
27
+ class SchemaMatchException < TrosError
28
+ def initialize(writers_schema, readers_schema)
29
+ super("Writer's schema #{writers_schema} and Reader's schema " +
30
+ "#{readers_schema} do not match.")
31
+ end
32
+ end
33
+
34
+ # FIXME(jmhodges) move validate to this module?
35
+
36
+ class BinaryDecoder
37
+ # Read leaf values
38
+
39
+ # reader is an object on which we can call read, seek and tell.
40
+ attr_reader :reader
41
+ def initialize(reader)
42
+ @reader = reader
43
+ end
44
+
45
+ def byte!
46
+ @reader.readbyte
47
+ end
48
+
49
+ def read_null
50
+ # null is written as zero byte's
51
+ nil
52
+ end
53
+
54
+ def read_boolean
55
+ byte! == 1
56
+ end
57
+
58
+ def read_long
59
+ # int and long values are written using variable-length,
60
+ # zig-zag coding.
61
+ b = byte!
62
+ n = b & 0x7F
63
+ shift = 7
64
+ while (b & 0x80) != 0
65
+ b = byte!
66
+ n |= (b & 0x7F) << shift
67
+ shift += 7
68
+ end
69
+ (n >> 1) ^ -(n & 1)
70
+ end
71
+
72
+ alias_method :read_int, :read_long
73
+
74
+ def read_float
75
+ # A float is written as 4 bytes.
76
+ # The float is converted into a 32-bit integer using a method
77
+ # equivalent to Java's floatToIntBits and then encoded in
78
+ # little-endian format.
79
+ @reader.read(4).unpack('e')[0]
80
+ end
81
+
82
+ def read_double
83
+ # A double is written as 8 bytes.
84
+ # The double is converted into a 64-bit integer using a method
85
+ # equivalent to Java's doubleToLongBits and then encoded in
86
+ # little-endian format.
87
+ @reader.read(8).unpack('E')[0]
88
+ end
89
+
90
+ def read_bytes
91
+ # Bytes are encoded as a long followed by that many bytes of
92
+ # data.
93
+ read(read_long)
94
+ end
95
+
96
+ def read_string
97
+ # A string is encoded as a long followed by that many bytes of
98
+ # UTF-8 encoded character data.
99
+ read_bytes.force_encoding("UTF-8")
100
+ end
101
+
102
+ def read(len)
103
+ # Read n bytes
104
+ @reader.read(len)
105
+ end
106
+
107
+ def skip_null
108
+ nil
109
+ end
110
+
111
+ def skip_boolean
112
+ skip(1)
113
+ end
114
+
115
+ def skip_int
116
+ skip_long
117
+ end
118
+
119
+ def skip_long
120
+ b = byte!
121
+ while (b & 0x80) != 0
122
+ b = byte!
123
+ end
124
+ end
125
+
126
+ alias_method :skip_int, :skip_long
127
+
128
+ def skip_float
129
+ skip(4)
130
+ end
131
+
132
+ def skip_double
133
+ skip(8)
134
+ end
135
+
136
+ def skip_bytes
137
+ skip(read_long)
138
+ end
139
+
140
+ def skip_string
141
+ skip_bytes
142
+ end
143
+
144
+ def skip(n)
145
+ reader.seek(reader.tell() + n)
146
+ end
147
+ end
148
+
149
+ # Write leaf values
150
+ class BinaryEncoder
151
+ attr_reader :writer
152
+
153
+ def initialize(writer)
154
+ @writer = writer
155
+ end
156
+
157
+ # null is written as zero bytes
158
+ def write_null(datum)
159
+ nil
160
+ end
161
+
162
+ # a boolean is written as a single byte
163
+ # whose value is either 0 (false) or 1 (true).
164
+ def write_boolean(datum)
165
+ on_disk = datum ? 1.chr : 0.chr
166
+ writer.write(on_disk)
167
+ end
168
+
169
+ # int and long values are written using variable-length,
170
+ # zig-zag coding.
171
+ def write_long(n)
172
+ foo = n
173
+ n = (n << 1) ^ (n >> 63)
174
+ while (n & ~0x7F) != 0
175
+ @writer.write(((n & 0x7f) | 0x80).chr)
176
+ n >>= 7
177
+ end
178
+ @writer.write(n.chr)
179
+ end
180
+
181
+ alias_method :write_int, :write_long
182
+
183
+ # A float is written as 4 bytes.
184
+ # The float is converted into a 32-bit integer using a method
185
+ # equivalent to Java's floatToIntBits and then encoded in
186
+ # little-endian format.
187
+ def write_float(datum)
188
+ @writer.write([datum].pack('e'))
189
+ end
190
+
191
+ # A double is written as 8 bytes.
192
+ # The double is converted into a 64-bit integer using a method
193
+ # equivalent to Java's doubleToLongBits and then encoded in
194
+ # little-endian format.
195
+ def write_double(datum)
196
+ @writer.write([datum].pack('E'))
197
+ end
198
+
199
+ # Bytes are encoded as a long followed by that many bytes of data.
200
+ def write_bytes(datum)
201
+ write_long(datum.bytesize)
202
+ @writer.write(datum)
203
+ end
204
+
205
+ # A string is encoded as a long followed by that many bytes of
206
+ # UTF-8 encoded character data
207
+ def write_string(datum)
208
+ write_bytes(datum.encode('UTF-8'))
209
+ end
210
+
211
+ # Write an arbritary datum.
212
+ def write(datum)
213
+ writer.write(datum)
214
+ end
215
+ end
216
+
217
+ class DatumReader
218
+ def self.match_schemas(writers_schema, readers_schema)
219
+ w_type = writers_schema.type_sym
220
+ r_type = readers_schema.type_sym
221
+
222
+ # This conditional is begging for some OO love.
223
+ if w_type == :union || r_type == :union
224
+ return true
225
+ end
226
+
227
+ if w_type == r_type
228
+ return true if Schema::PRIMITIVE_TYPES_SYM.include?(r_type)
229
+
230
+ case r_type
231
+ when :record
232
+ return writers_schema.fullname == readers_schema.fullname
233
+ when :error
234
+ return writers_schema.fullname == readers_schema.fullname
235
+ when :request
236
+ return true
237
+ when :fixed
238
+ return writers_schema.fullname == readers_schema.fullname &&
239
+ writers_schema.size == readers_schema.size
240
+ when :enum
241
+ return writers_schema.fullname == readers_schema.fullname
242
+ when :map
243
+ return writers_schema.values.type == readers_schema.values.type
244
+ when :array
245
+ return writers_schema.items.type == readers_schema.items.type
246
+ end
247
+ end
248
+
249
+ # Handle schema promotion
250
+ if w_type == :int && [:long, :float, :double].include?(r_type)
251
+ return true
252
+ elsif w_type == :long && [:float, :double].include?(r_type)
253
+ return true
254
+ elsif w_type == :float && r_type == :double
255
+ return true
256
+ end
257
+
258
+ return false
259
+ end
260
+
261
+ attr_accessor :writers_schema, :readers_schema
262
+
263
+ def initialize(writers_schema=nil, readers_schema=nil)
264
+ @writers_schema = writers_schema
265
+ @readers_schema = readers_schema
266
+ end
267
+
268
+ def read(decoder)
269
+ self.readers_schema = writers_schema unless readers_schema
270
+ read_data(writers_schema, readers_schema, decoder)
271
+ end
272
+
273
+ def read_data(writers_schema, readers_schema, decoder)
274
+ # schema matching
275
+ unless self.class.match_schemas(writers_schema, readers_schema)
276
+ raise SchemaMatchException.new(writers_schema, readers_schema)
277
+ end
278
+
279
+ # schema resolution: reader's schema is a union, writer's
280
+ # schema is not
281
+ if writers_schema.type_sym != :union && readers_schema.type_sym == :union
282
+ rs = readers_schema.schemas.find{|s|
283
+ self.class.match_schemas(writers_schema, s)
284
+ }
285
+ return read_data(writers_schema, rs, decoder) if rs
286
+ raise SchemaMatchException.new(writers_schema, readers_schema)
287
+ end
288
+
289
+ # function dispatch for reading data based on type of writer's
290
+ # schema
291
+ case writers_schema.type_sym
292
+ when :null; decoder.read_null
293
+ when :boolean; decoder.read_boolean
294
+ when :string; decoder.read_string
295
+ when :int; decoder.read_int
296
+ when :long; decoder.read_long
297
+ when :float; decoder.read_float
298
+ when :double; decoder.read_double
299
+ when :bytes; decoder.read_bytes
300
+ when :fixed; read_fixed(writers_schema, readers_schema, decoder)
301
+ when :enum; read_enum(writers_schema, readers_schema, decoder)
302
+ when :array; read_array(writers_schema, readers_schema, decoder)
303
+ when :map; read_map(writers_schema, readers_schema, decoder)
304
+ when :union; read_union(writers_schema, readers_schema, decoder)
305
+ when :record, :error, :request; read_record(writers_schema, readers_schema, decoder)
306
+ else
307
+ raise TrosError, "Cannot read unknown schema type: #{writers_schema.type}"
308
+ end
309
+ end
310
+
311
+ def read_fixed(writers_schema, readers_schema, decoder)
312
+ decoder.read(writers_schema.size)
313
+ end
314
+
315
+ def read_enum(writers_schema, readers_schema, decoder)
316
+ index_of_symbol = decoder.read_int
317
+ read_symbol = writers_schema.symbols[index_of_symbol]
318
+
319
+ # TODO(jmhodges): figure out what unset means for resolution
320
+ # schema resolution
321
+ unless readers_schema.symbols.include?(read_symbol)
322
+ # 'unset' here
323
+ end
324
+
325
+ read_symbol
326
+ end
327
+
328
+ def read_array(writers_schema, readers_schema, decoder)
329
+ read_items = []
330
+ block_count = decoder.read_long
331
+ while block_count != 0
332
+ if block_count < 0
333
+ block_count = -block_count
334
+ block_size = decoder.read_long
335
+ end
336
+ block_count.times do
337
+ read_items << read_data(writers_schema.items,
338
+ readers_schema.items,
339
+ decoder)
340
+ end
341
+ block_count = decoder.read_long
342
+ end
343
+
344
+ read_items
345
+ end
346
+
347
+ def read_map(writers_schema, readers_schema, decoder)
348
+ read_items = {}
349
+ block_count = decoder.read_long
350
+ while block_count != 0
351
+ if block_count < 0
352
+ block_count = -block_count
353
+ block_size = decoder.read_long
354
+ end
355
+ block_count.times do
356
+ key = decoder.read_string
357
+ read_items[key] = read_data(writers_schema.values,
358
+ readers_schema.values,
359
+ decoder)
360
+ end
361
+ block_count = decoder.read_long
362
+ end
363
+
364
+ read_items
365
+ end
366
+
367
+ def read_union(writers_schema, readers_schema, decoder)
368
+ index_of_schema = decoder.read_long
369
+ selected_writers_schema = writers_schema.schemas[index_of_schema]
370
+
371
+ read_data(selected_writers_schema, readers_schema, decoder)
372
+ end
373
+
374
+ def read_record(writers_schema, readers_schema, decoder)
375
+ readers_fields_hash = readers_schema.fields_hash
376
+ read_record = {}
377
+ writers_schema.fields.each do |field|
378
+ if readers_field = readers_fields_hash[field.name]
379
+ field_val = read_data(field.type, readers_field.type, decoder)
380
+ read_record[field.name] = field_val
381
+ else
382
+ skip_data(field.type, decoder)
383
+ end
384
+ end
385
+
386
+ # fill in the default values
387
+ if readers_fields_hash.size > read_record.size
388
+ writers_fields_hash = writers_schema.fields_hash
389
+ readers_fields_hash.each do |field_name, field|
390
+ unless writers_fields_hash.has_key? field_name
391
+ if !field.default.nil?
392
+ field_val = read_default_value(field.type, field.default)
393
+ read_record[field.name] = field_val
394
+ else
395
+ # FIXME(jmhodges) another 'unset' here
396
+ end
397
+ end
398
+ end
399
+ end
400
+
401
+ read_record
402
+ end
403
+
404
+ def read_default_value(field_schema, default_value)
405
+ # Basically a JSON Decoder?
406
+ case field_schema.type_sym
407
+ when :null
408
+ return nil
409
+ when :boolean
410
+ return default_value
411
+ when :int, :long
412
+ return Integer(default_value)
413
+ when :float, :double
414
+ return Float(default_value)
415
+ when :enum, :fixed, :string, :bytes
416
+ return default_value
417
+ when :array
418
+ read_array = []
419
+ default_value.each do |json_val|
420
+ item_val = read_default_value(field_schema.items, json_val)
421
+ read_array << item_val
422
+ end
423
+ return read_array
424
+ when :map
425
+ read_map = {}
426
+ default_value.each do |key, json_val|
427
+ map_val = read_default_value(field_schema.values, json_val)
428
+ read_map[key] = map_val
429
+ end
430
+ return read_map
431
+ when :union
432
+ return read_default_value(field_schema.schemas[0], default_value)
433
+ when :record, :error
434
+ read_record = {}
435
+ field_schema.fields.each do |field|
436
+ json_val = default_value[field.name]
437
+ json_val = field.default unless json_val
438
+ field_val = read_default_value(field.type, json_val)
439
+ read_record[field.name] = field_val
440
+ end
441
+ return read_record
442
+ else
443
+ fail_msg = "Unknown type: #{field_schema.type}"
444
+ raise TrosError, fail_msg
445
+ end
446
+ end
447
+
448
+ def skip_data(writers_schema, decoder)
449
+ case writers_schema.type_sym
450
+ when :null
451
+ decoder.skip_null
452
+ when :boolean
453
+ decoder.skip_boolean
454
+ when :string
455
+ decoder.skip_string
456
+ when :int
457
+ decoder.skip_int
458
+ when :long
459
+ decoder.skip_long
460
+ when :float
461
+ decoder.skip_float
462
+ when :double
463
+ decoder.skip_double
464
+ when :bytes
465
+ decoder.skip_bytes
466
+ when :fixed
467
+ skip_fixed(writers_schema, decoder)
468
+ when :enum
469
+ skip_enum(writers_schema, decoder)
470
+ when :array
471
+ skip_array(writers_schema, decoder)
472
+ when :map
473
+ skip_map(writers_schema, decoder)
474
+ when :union
475
+ skip_union(writers_schema, decoder)
476
+ when :record, :error, :request
477
+ skip_record(writers_schema, decoder)
478
+ else
479
+ raise TrosError, "Unknown schema type: #{writers_schema.type}"
480
+ end
481
+ end
482
+
483
+ def skip_fixed(writers_schema, decoder)
484
+ decoder.skip(writers_schema.size)
485
+ end
486
+
487
+ def skip_enum(writers_schema, decoder)
488
+ decoder.skip_int
489
+ end
490
+
491
+ def skip_union(writers_schema, decoder)
492
+ index = decoder.read_long
493
+ skip_data(writers_schema.schemas[index], decoder)
494
+ end
495
+
496
+ def skip_array(writers_schema, decoder)
497
+ skip_blocks(decoder) { skip_data(writers_schema.items, decoder) }
498
+ end
499
+
500
+ def skip_map(writers_schema, decoder)
501
+ skip_blocks(decoder) {
502
+ decoder.skip_string
503
+ skip_data(writers_schema.values, decoder)
504
+ }
505
+ end
506
+
507
+ def skip_record(writers_schema, decoder)
508
+ writers_schema.fields.each{|f| skip_data(f.type, decoder) }
509
+ end
510
+
511
+ private
512
+ def skip_blocks(decoder, &blk)
513
+ block_count = decoder.read_long
514
+ while block_count != 0
515
+ if block_count < 0
516
+ decoder.skip(decoder.read_long)
517
+ else
518
+ block_count.times &blk
519
+ end
520
+ block_count = decoder.read_long
521
+ end
522
+ end
523
+ end # DatumReader
524
+
525
+ # DatumWriter for generic ruby objects
526
+ class DatumWriter
527
+ attr_accessor :writers_schema
528
+ def initialize(writers_schema=nil)
529
+ @writers_schema = writers_schema
530
+ end
531
+
532
+ def write(datum, encoder)
533
+ write_data(writers_schema, datum, encoder)
534
+ end
535
+
536
+ def write_data(writers_schema, datum, encoder)
537
+ unless Schema.validate(writers_schema, datum)
538
+ raise TrosTypeError.new(writers_schema, datum)
539
+ end
540
+
541
+ # function dispatch to write datum
542
+ case writers_schema.type_sym
543
+ when :null; encoder.write_null(datum)
544
+ when :boolean; encoder.write_boolean(datum)
545
+ when :string; encoder.write_string(datum)
546
+ when :int; encoder.write_int(datum)
547
+ when :long; encoder.write_long(datum)
548
+ when :float; encoder.write_float(datum)
549
+ when :double; encoder.write_double(datum)
550
+ when :bytes; encoder.write_bytes(datum)
551
+ when :fixed; write_fixed(writers_schema, datum, encoder)
552
+ when :enum; write_enum(writers_schema, datum, encoder)
553
+ when :array; write_array(writers_schema, datum, encoder)
554
+ when :map; write_map(writers_schema, datum, encoder)
555
+ when :union; write_union(writers_schema, datum, encoder)
556
+ when :record, :error, :request; write_record(writers_schema, datum, encoder)
557
+ else
558
+ raise TrosError.new("Unknown type: #{writers_schema.type}")
559
+ end
560
+ end
561
+
562
+ def write_fixed(writers_schema, datum, encoder)
563
+ encoder.write(datum)
564
+ end
565
+
566
+ def write_enum(writers_schema, datum, encoder)
567
+ index_of_datum = writers_schema.symbols.index(datum)
568
+ encoder.write_int(index_of_datum)
569
+ end
570
+
571
+ def write_array(writers_schema, datum, encoder)
572
+ if datum.size > 0
573
+ encoder.write_long(datum.size)
574
+ datum.each do |item|
575
+ write_data(writers_schema.items, item, encoder)
576
+ end
577
+ end
578
+ encoder.write_long(0)
579
+ end
580
+
581
+ def write_map(writers_schema, datum, encoder)
582
+ if datum.size > 0
583
+ encoder.write_long(datum.size)
584
+ datum.each do |k,v|
585
+ encoder.write_string(k)
586
+ write_data(writers_schema.values, v, encoder)
587
+ end
588
+ end
589
+ encoder.write_long(0)
590
+ end
591
+
592
+ def write_union(writers_schema, datum, encoder)
593
+ index_of_schema = -1
594
+ found = writers_schema.schemas.
595
+ find{|e| index_of_schema += 1; found = Schema.validate(e, datum) }
596
+ unless found # Because find_index doesn't exist in 1.8.6
597
+ raise TrosTypeError.new(writers_schema, datum)
598
+ end
599
+ encoder.write_long(index_of_schema)
600
+ write_data(writers_schema.schemas[index_of_schema], datum, encoder)
601
+ end
602
+
603
+ def write_record(writers_schema, datum, encoder)
604
+ writers_schema.fields.each do |field|
605
+ write_data(field.type, datum[field.name], encoder)
606
+ end
607
+ end
608
+ end # DatumWriter
609
+ end
610
+ end