tros 1.7.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,610 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ module Tros
18
+ module IO
19
+ # Raised when datum is not an example of schema
20
+ class AvroTypeError < TrosError
21
+ def initialize(expected_schema, datum)
22
+ super("The datum #{datum.inspect} is not an example of schema #{expected_schema}")
23
+ end
24
+ end
25
+
26
+ # Raised when writer's and reader's schema do not match
27
+ class SchemaMatchException < TrosError
28
+ def initialize(writers_schema, readers_schema)
29
+ super("Writer's schema #{writers_schema} and Reader's schema " +
30
+ "#{readers_schema} do not match.")
31
+ end
32
+ end
33
+
34
+ # FIXME(jmhodges) move validate to this module?
35
+
36
+ class BinaryDecoder
37
+ # Read leaf values
38
+
39
+ # reader is an object on which we can call read, seek and tell.
40
+ attr_reader :reader
41
+ def initialize(reader)
42
+ @reader = reader
43
+ end
44
+
45
+ def byte!
46
+ @reader.readbyte
47
+ end
48
+
49
+ def read_null
50
+ # null is written as zero byte's
51
+ nil
52
+ end
53
+
54
+ def read_boolean
55
+ byte! == 1
56
+ end
57
+
58
+ def read_long
59
+ # int and long values are written using variable-length,
60
+ # zig-zag coding.
61
+ b = byte!
62
+ n = b & 0x7F
63
+ shift = 7
64
+ while (b & 0x80) != 0
65
+ b = byte!
66
+ n |= (b & 0x7F) << shift
67
+ shift += 7
68
+ end
69
+ (n >> 1) ^ -(n & 1)
70
+ end
71
+
72
+ alias_method :read_int, :read_long
73
+
74
+ def read_float
75
+ # A float is written as 4 bytes.
76
+ # The float is converted into a 32-bit integer using a method
77
+ # equivalent to Java's floatToIntBits and then encoded in
78
+ # little-endian format.
79
+ @reader.read(4).unpack('e')[0]
80
+ end
81
+
82
+ def read_double
83
+ # A double is written as 8 bytes.
84
+ # The double is converted into a 64-bit integer using a method
85
+ # equivalent to Java's doubleToLongBits and then encoded in
86
+ # little-endian format.
87
+ @reader.read(8).unpack('E')[0]
88
+ end
89
+
90
+ def read_bytes
91
+ # Bytes are encoded as a long followed by that many bytes of
92
+ # data.
93
+ read(read_long)
94
+ end
95
+
96
+ def read_string
97
+ # A string is encoded as a long followed by that many bytes of
98
+ # UTF-8 encoded character data.
99
+ read_bytes.force_encoding("UTF-8")
100
+ end
101
+
102
+ def read(len)
103
+ # Read n bytes
104
+ @reader.read(len)
105
+ end
106
+
107
+ def skip_null
108
+ nil
109
+ end
110
+
111
+ def skip_boolean
112
+ skip(1)
113
+ end
114
+
115
+ def skip_int
116
+ skip_long
117
+ end
118
+
119
+ def skip_long
120
+ b = byte!
121
+ while (b & 0x80) != 0
122
+ b = byte!
123
+ end
124
+ end
125
+
126
+ alias_method :skip_int, :skip_long
127
+
128
+ def skip_float
129
+ skip(4)
130
+ end
131
+
132
+ def skip_double
133
+ skip(8)
134
+ end
135
+
136
+ def skip_bytes
137
+ skip(read_long)
138
+ end
139
+
140
+ def skip_string
141
+ skip_bytes
142
+ end
143
+
144
+ def skip(n)
145
+ reader.seek(reader.tell() + n)
146
+ end
147
+ end
148
+
149
+ # Write leaf values
150
+ class BinaryEncoder
151
+ attr_reader :writer
152
+
153
+ def initialize(writer)
154
+ @writer = writer
155
+ end
156
+
157
+ # null is written as zero bytes
158
+ def write_null(datum)
159
+ nil
160
+ end
161
+
162
+ # a boolean is written as a single byte
163
+ # whose value is either 0 (false) or 1 (true).
164
+ def write_boolean(datum)
165
+ on_disk = datum ? 1.chr : 0.chr
166
+ writer.write(on_disk)
167
+ end
168
+
169
+ # int and long values are written using variable-length,
170
+ # zig-zag coding.
171
+ def write_long(n)
172
+ foo = n
173
+ n = (n << 1) ^ (n >> 63)
174
+ while (n & ~0x7F) != 0
175
+ @writer.write(((n & 0x7f) | 0x80).chr)
176
+ n >>= 7
177
+ end
178
+ @writer.write(n.chr)
179
+ end
180
+
181
+ alias_method :write_int, :write_long
182
+
183
+ # A float is written as 4 bytes.
184
+ # The float is converted into a 32-bit integer using a method
185
+ # equivalent to Java's floatToIntBits and then encoded in
186
+ # little-endian format.
187
+ def write_float(datum)
188
+ @writer.write([datum].pack('e'))
189
+ end
190
+
191
+ # A double is written as 8 bytes.
192
+ # The double is converted into a 64-bit integer using a method
193
+ # equivalent to Java's doubleToLongBits and then encoded in
194
+ # little-endian format.
195
+ def write_double(datum)
196
+ @writer.write([datum].pack('E'))
197
+ end
198
+
199
+ # Bytes are encoded as a long followed by that many bytes of data.
200
+ def write_bytes(datum)
201
+ write_long(datum.bytesize)
202
+ @writer.write(datum)
203
+ end
204
+
205
+ # A string is encoded as a long followed by that many bytes of
206
+ # UTF-8 encoded character data
207
+ def write_string(datum)
208
+ write_bytes(datum.encode('UTF-8'))
209
+ end
210
+
211
+ # Write an arbritary datum.
212
+ def write(datum)
213
+ writer.write(datum)
214
+ end
215
+ end
216
+
217
+ class DatumReader
218
+ def self.match_schemas(writers_schema, readers_schema)
219
+ w_type = writers_schema.type_sym
220
+ r_type = readers_schema.type_sym
221
+
222
+ # This conditional is begging for some OO love.
223
+ if w_type == :union || r_type == :union
224
+ return true
225
+ end
226
+
227
+ if w_type == r_type
228
+ return true if Schema::PRIMITIVE_TYPES_SYM.include?(r_type)
229
+
230
+ case r_type
231
+ when :record
232
+ return writers_schema.fullname == readers_schema.fullname
233
+ when :error
234
+ return writers_schema.fullname == readers_schema.fullname
235
+ when :request
236
+ return true
237
+ when :fixed
238
+ return writers_schema.fullname == readers_schema.fullname &&
239
+ writers_schema.size == readers_schema.size
240
+ when :enum
241
+ return writers_schema.fullname == readers_schema.fullname
242
+ when :map
243
+ return writers_schema.values.type == readers_schema.values.type
244
+ when :array
245
+ return writers_schema.items.type == readers_schema.items.type
246
+ end
247
+ end
248
+
249
+ # Handle schema promotion
250
+ if w_type == :int && [:long, :float, :double].include?(r_type)
251
+ return true
252
+ elsif w_type == :long && [:float, :double].include?(r_type)
253
+ return true
254
+ elsif w_type == :float && r_type == :double
255
+ return true
256
+ end
257
+
258
+ return false
259
+ end
260
+
261
+ attr_accessor :writers_schema, :readers_schema
262
+
263
+ def initialize(writers_schema=nil, readers_schema=nil)
264
+ @writers_schema = writers_schema
265
+ @readers_schema = readers_schema
266
+ end
267
+
268
+ def read(decoder)
269
+ self.readers_schema = writers_schema unless readers_schema
270
+ read_data(writers_schema, readers_schema, decoder)
271
+ end
272
+
273
+ def read_data(writers_schema, readers_schema, decoder)
274
+ # schema matching
275
+ unless self.class.match_schemas(writers_schema, readers_schema)
276
+ raise SchemaMatchException.new(writers_schema, readers_schema)
277
+ end
278
+
279
+ # schema resolution: reader's schema is a union, writer's
280
+ # schema is not
281
+ if writers_schema.type_sym != :union && readers_schema.type_sym == :union
282
+ rs = readers_schema.schemas.find{|s|
283
+ self.class.match_schemas(writers_schema, s)
284
+ }
285
+ return read_data(writers_schema, rs, decoder) if rs
286
+ raise SchemaMatchException.new(writers_schema, readers_schema)
287
+ end
288
+
289
+ # function dispatch for reading data based on type of writer's
290
+ # schema
291
+ case writers_schema.type_sym
292
+ when :null; decoder.read_null
293
+ when :boolean; decoder.read_boolean
294
+ when :string; decoder.read_string
295
+ when :int; decoder.read_int
296
+ when :long; decoder.read_long
297
+ when :float; decoder.read_float
298
+ when :double; decoder.read_double
299
+ when :bytes; decoder.read_bytes
300
+ when :fixed; read_fixed(writers_schema, readers_schema, decoder)
301
+ when :enum; read_enum(writers_schema, readers_schema, decoder)
302
+ when :array; read_array(writers_schema, readers_schema, decoder)
303
+ when :map; read_map(writers_schema, readers_schema, decoder)
304
+ when :union; read_union(writers_schema, readers_schema, decoder)
305
+ when :record, :error, :request; read_record(writers_schema, readers_schema, decoder)
306
+ else
307
+ raise TrosError, "Cannot read unknown schema type: #{writers_schema.type}"
308
+ end
309
+ end
310
+
311
+ def read_fixed(writers_schema, readers_schema, decoder)
312
+ decoder.read(writers_schema.size)
313
+ end
314
+
315
+ def read_enum(writers_schema, readers_schema, decoder)
316
+ index_of_symbol = decoder.read_int
317
+ read_symbol = writers_schema.symbols[index_of_symbol]
318
+
319
+ # TODO(jmhodges): figure out what unset means for resolution
320
+ # schema resolution
321
+ unless readers_schema.symbols.include?(read_symbol)
322
+ # 'unset' here
323
+ end
324
+
325
+ read_symbol
326
+ end
327
+
328
+ def read_array(writers_schema, readers_schema, decoder)
329
+ read_items = []
330
+ block_count = decoder.read_long
331
+ while block_count != 0
332
+ if block_count < 0
333
+ block_count = -block_count
334
+ block_size = decoder.read_long
335
+ end
336
+ block_count.times do
337
+ read_items << read_data(writers_schema.items,
338
+ readers_schema.items,
339
+ decoder)
340
+ end
341
+ block_count = decoder.read_long
342
+ end
343
+
344
+ read_items
345
+ end
346
+
347
+ def read_map(writers_schema, readers_schema, decoder)
348
+ read_items = {}
349
+ block_count = decoder.read_long
350
+ while block_count != 0
351
+ if block_count < 0
352
+ block_count = -block_count
353
+ block_size = decoder.read_long
354
+ end
355
+ block_count.times do
356
+ key = decoder.read_string
357
+ read_items[key] = read_data(writers_schema.values,
358
+ readers_schema.values,
359
+ decoder)
360
+ end
361
+ block_count = decoder.read_long
362
+ end
363
+
364
+ read_items
365
+ end
366
+
367
+ def read_union(writers_schema, readers_schema, decoder)
368
+ index_of_schema = decoder.read_long
369
+ selected_writers_schema = writers_schema.schemas[index_of_schema]
370
+
371
+ read_data(selected_writers_schema, readers_schema, decoder)
372
+ end
373
+
374
+ def read_record(writers_schema, readers_schema, decoder)
375
+ readers_fields_hash = readers_schema.fields_hash
376
+ read_record = {}
377
+ writers_schema.fields.each do |field|
378
+ if readers_field = readers_fields_hash[field.name]
379
+ field_val = read_data(field.type, readers_field.type, decoder)
380
+ read_record[field.name] = field_val
381
+ else
382
+ skip_data(field.type, decoder)
383
+ end
384
+ end
385
+
386
+ # fill in the default values
387
+ if readers_fields_hash.size > read_record.size
388
+ writers_fields_hash = writers_schema.fields_hash
389
+ readers_fields_hash.each do |field_name, field|
390
+ unless writers_fields_hash.has_key? field_name
391
+ if !field.default.nil?
392
+ field_val = read_default_value(field.type, field.default)
393
+ read_record[field.name] = field_val
394
+ else
395
+ # FIXME(jmhodges) another 'unset' here
396
+ end
397
+ end
398
+ end
399
+ end
400
+
401
+ read_record
402
+ end
403
+
404
+ def read_default_value(field_schema, default_value)
405
+ # Basically a JSON Decoder?
406
+ case field_schema.type_sym
407
+ when :null
408
+ return nil
409
+ when :boolean
410
+ return default_value
411
+ when :int, :long
412
+ return Integer(default_value)
413
+ when :float, :double
414
+ return Float(default_value)
415
+ when :enum, :fixed, :string, :bytes
416
+ return default_value
417
+ when :array
418
+ read_array = []
419
+ default_value.each do |json_val|
420
+ item_val = read_default_value(field_schema.items, json_val)
421
+ read_array << item_val
422
+ end
423
+ return read_array
424
+ when :map
425
+ read_map = {}
426
+ default_value.each do |key, json_val|
427
+ map_val = read_default_value(field_schema.values, json_val)
428
+ read_map[key] = map_val
429
+ end
430
+ return read_map
431
+ when :union
432
+ return read_default_value(field_schema.schemas[0], default_value)
433
+ when :record, :error
434
+ read_record = {}
435
+ field_schema.fields.each do |field|
436
+ json_val = default_value[field.name]
437
+ json_val = field.default unless json_val
438
+ field_val = read_default_value(field.type, json_val)
439
+ read_record[field.name] = field_val
440
+ end
441
+ return read_record
442
+ else
443
+ fail_msg = "Unknown type: #{field_schema.type}"
444
+ raise TrosError, fail_msg
445
+ end
446
+ end
447
+
448
+ def skip_data(writers_schema, decoder)
449
+ case writers_schema.type_sym
450
+ when :null
451
+ decoder.skip_null
452
+ when :boolean
453
+ decoder.skip_boolean
454
+ when :string
455
+ decoder.skip_string
456
+ when :int
457
+ decoder.skip_int
458
+ when :long
459
+ decoder.skip_long
460
+ when :float
461
+ decoder.skip_float
462
+ when :double
463
+ decoder.skip_double
464
+ when :bytes
465
+ decoder.skip_bytes
466
+ when :fixed
467
+ skip_fixed(writers_schema, decoder)
468
+ when :enum
469
+ skip_enum(writers_schema, decoder)
470
+ when :array
471
+ skip_array(writers_schema, decoder)
472
+ when :map
473
+ skip_map(writers_schema, decoder)
474
+ when :union
475
+ skip_union(writers_schema, decoder)
476
+ when :record, :error, :request
477
+ skip_record(writers_schema, decoder)
478
+ else
479
+ raise TrosError, "Unknown schema type: #{writers_schema.type}"
480
+ end
481
+ end
482
+
483
+ def skip_fixed(writers_schema, decoder)
484
+ decoder.skip(writers_schema.size)
485
+ end
486
+
487
+ def skip_enum(writers_schema, decoder)
488
+ decoder.skip_int
489
+ end
490
+
491
+ def skip_union(writers_schema, decoder)
492
+ index = decoder.read_long
493
+ skip_data(writers_schema.schemas[index], decoder)
494
+ end
495
+
496
+ def skip_array(writers_schema, decoder)
497
+ skip_blocks(decoder) { skip_data(writers_schema.items, decoder) }
498
+ end
499
+
500
+ def skip_map(writers_schema, decoder)
501
+ skip_blocks(decoder) {
502
+ decoder.skip_string
503
+ skip_data(writers_schema.values, decoder)
504
+ }
505
+ end
506
+
507
+ def skip_record(writers_schema, decoder)
508
+ writers_schema.fields.each{|f| skip_data(f.type, decoder) }
509
+ end
510
+
511
+ private
512
+ def skip_blocks(decoder, &blk)
513
+ block_count = decoder.read_long
514
+ while block_count != 0
515
+ if block_count < 0
516
+ decoder.skip(decoder.read_long)
517
+ else
518
+ block_count.times &blk
519
+ end
520
+ block_count = decoder.read_long
521
+ end
522
+ end
523
+ end # DatumReader
524
+
525
+ # DatumWriter for generic ruby objects
526
+ class DatumWriter
527
+ attr_accessor :writers_schema
528
+ def initialize(writers_schema=nil)
529
+ @writers_schema = writers_schema
530
+ end
531
+
532
+ def write(datum, encoder)
533
+ write_data(writers_schema, datum, encoder)
534
+ end
535
+
536
+ def write_data(writers_schema, datum, encoder)
537
+ unless Schema.validate(writers_schema, datum)
538
+ raise TrosTypeError.new(writers_schema, datum)
539
+ end
540
+
541
+ # function dispatch to write datum
542
+ case writers_schema.type_sym
543
+ when :null; encoder.write_null(datum)
544
+ when :boolean; encoder.write_boolean(datum)
545
+ when :string; encoder.write_string(datum)
546
+ when :int; encoder.write_int(datum)
547
+ when :long; encoder.write_long(datum)
548
+ when :float; encoder.write_float(datum)
549
+ when :double; encoder.write_double(datum)
550
+ when :bytes; encoder.write_bytes(datum)
551
+ when :fixed; write_fixed(writers_schema, datum, encoder)
552
+ when :enum; write_enum(writers_schema, datum, encoder)
553
+ when :array; write_array(writers_schema, datum, encoder)
554
+ when :map; write_map(writers_schema, datum, encoder)
555
+ when :union; write_union(writers_schema, datum, encoder)
556
+ when :record, :error, :request; write_record(writers_schema, datum, encoder)
557
+ else
558
+ raise TrosError.new("Unknown type: #{writers_schema.type}")
559
+ end
560
+ end
561
+
562
+ def write_fixed(writers_schema, datum, encoder)
563
+ encoder.write(datum)
564
+ end
565
+
566
+ def write_enum(writers_schema, datum, encoder)
567
+ index_of_datum = writers_schema.symbols.index(datum)
568
+ encoder.write_int(index_of_datum)
569
+ end
570
+
571
+ def write_array(writers_schema, datum, encoder)
572
+ if datum.size > 0
573
+ encoder.write_long(datum.size)
574
+ datum.each do |item|
575
+ write_data(writers_schema.items, item, encoder)
576
+ end
577
+ end
578
+ encoder.write_long(0)
579
+ end
580
+
581
+ def write_map(writers_schema, datum, encoder)
582
+ if datum.size > 0
583
+ encoder.write_long(datum.size)
584
+ datum.each do |k,v|
585
+ encoder.write_string(k)
586
+ write_data(writers_schema.values, v, encoder)
587
+ end
588
+ end
589
+ encoder.write_long(0)
590
+ end
591
+
592
+ def write_union(writers_schema, datum, encoder)
593
+ index_of_schema = -1
594
+ found = writers_schema.schemas.
595
+ find{|e| index_of_schema += 1; found = Schema.validate(e, datum) }
596
+ unless found # Because find_index doesn't exist in 1.8.6
597
+ raise TrosTypeError.new(writers_schema, datum)
598
+ end
599
+ encoder.write_long(index_of_schema)
600
+ write_data(writers_schema.schemas[index_of_schema], datum, encoder)
601
+ end
602
+
603
+ def write_record(writers_schema, datum, encoder)
604
+ writers_schema.fields.each do |field|
605
+ write_data(field.type, datum[field.name], encoder)
606
+ end
607
+ end
608
+ end # DatumWriter
609
+ end
610
+ end