avro 1.3.0 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Manifest CHANGED
@@ -16,4 +16,3 @@ test/sample_ipc_server.rb
16
16
  test/test_help.rb
17
17
  test/test_io.rb
18
18
  test/test_protocol.rb
19
- tmp/test.rb.avro
data/Rakefile CHANGED
@@ -16,7 +16,7 @@
16
16
 
17
17
  require 'rubygems'
18
18
  require 'echoe'
19
- VERSION = File.open('../../share/VERSION.txt').read
19
+ VERSION = File.open('../../share/VERSION.txt').read.sub('-SNAPSHOT', '.pre1').chomp
20
20
  Echoe.new('avro', VERSION) do |p|
21
21
  p.author = "Apache Software Foundation"
22
22
  p.email = "avro-dev@hadoop.apache.org"
@@ -2,28 +2,28 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{avro}
5
- s.version = "1.3.0"
5
+ s.version = "1.3.3"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Apache Software Foundation"]
9
- s.date = %q{2010-03-01}
9
+ s.date = %q{2010-08-18}
10
10
  s.description = %q{Apache is a data serialization and RPC format}
11
11
  s.email = %q{avro-dev@hadoop.apache.org}
12
12
  s.extra_rdoc_files = ["CHANGELOG", "lib/avro.rb", "lib/avro/collect_hash.rb", "lib/avro/data_file.rb", "lib/avro/io.rb", "lib/avro/ipc.rb", "lib/avro/protocol.rb", "lib/avro/schema.rb"]
13
- s.files = ["CHANGELOG", "Manifest", "Rakefile", "avro.gemspec", "interop/test_interop.rb", "lib/avro.rb", "lib/avro/collect_hash.rb", "lib/avro/data_file.rb", "lib/avro/io.rb", "lib/avro/ipc.rb", "lib/avro/protocol.rb", "lib/avro/schema.rb", "test/random_data.rb", "test/sample_ipc_client.rb", "test/sample_ipc_server.rb", "test/test_help.rb", "test/test_io.rb", "test/test_protocol.rb", "tmp/test.rb.avro"]
13
+ s.files = ["CHANGELOG", "Manifest", "Rakefile", "avro.gemspec", "interop/test_interop.rb", "lib/avro.rb", "lib/avro/collect_hash.rb", "lib/avro/data_file.rb", "lib/avro/io.rb", "lib/avro/ipc.rb", "lib/avro/protocol.rb", "lib/avro/schema.rb", "test/random_data.rb", "test/sample_ipc_client.rb", "test/sample_ipc_server.rb", "test/test_help.rb", "test/test_io.rb", "test/test_protocol.rb", "test/test_datafile.rb", "test/test_socket_transport.rb"]
14
14
  s.homepage = %q{http://hadoop.apache.org/avro/}
15
15
  s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Avro"]
16
16
  s.require_paths = ["lib"]
17
17
  s.rubyforge_project = %q{avro}
18
- s.rubygems_version = %q{1.3.5}
18
+ s.rubygems_version = %q{1.3.7}
19
19
  s.summary = %q{Apache Avro for Ruby}
20
- s.test_files = ["test/test_help.rb", "test/test_io.rb", "test/test_protocol.rb"]
20
+ s.test_files = ["test/test_datafile.rb", "test/test_help.rb", "test/test_io.rb", "test/test_protocol.rb", "test/test_socket_transport.rb"]
21
21
 
22
22
  if s.respond_to? :specification_version then
23
23
  current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
24
24
  s.specification_version = 3
25
25
 
26
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
26
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
27
27
  s.add_runtime_dependency(%q<yajl-ruby>, [">= 0"])
28
28
  else
29
29
  s.add_dependency(%q<yajl-ruby>, [">= 0"])
@@ -17,6 +17,8 @@
17
17
  require 'yajl'
18
18
  require 'set'
19
19
  require 'md5'
20
+ require 'net/http'
21
+ require 'stringio'
20
22
 
21
23
  module Avro
22
24
  VERSION = "FIXME"
@@ -29,6 +29,39 @@ module Avro
29
29
 
30
30
  class DataFileError < AvroError; end
31
31
 
32
+ def self.open(file_path, mode='r', schema=nil)
33
+ schema = Avro::Schema.parse(schema) if schema
34
+ case mode
35
+ when 'w'
36
+ unless schema
37
+ raise DataFileError, "Writing an Avro file requires a schema."
38
+ end
39
+ io = open_writer(File.open(file_path, 'wb'), schema)
40
+ when 'r'
41
+ io = open_reader(File.open(file_path, 'rb'), schema)
42
+ else
43
+ raise DataFileError, "Only modes 'r' and 'w' allowed. You gave #{mode.inspect}."
44
+ end
45
+
46
+ yield io if block_given?
47
+ io
48
+ ensure
49
+ io.close if block_given? && io
50
+ end
51
+
52
+ class << self
53
+ private
54
+ def open_writer(file, schema)
55
+ writer = Avro::IO::DatumWriter.new(schema)
56
+ Avro::DataFile::Writer.new(file, writer, schema)
57
+ end
58
+
59
+ def open_reader(file, schema)
60
+ reader = Avro::IO::DatumReader.new(nil, schema)
61
+ Avro::DataFile::Reader.new(file, reader)
62
+ end
63
+ end
64
+
32
65
  class Writer
33
66
  def self.generate_sync_marker
34
67
  OpenSSL::Random.random_bytes(16)
@@ -76,12 +76,7 @@ module Avro
76
76
  # The float is converted into a 32-bit integer using a method
77
77
  # equivalent to Java's floatToIntBits and then encoded in
78
78
  # little-endian format.
79
-
80
- bits = (byte! & 0xFF) |
81
- ((byte! & 0xff) << 8) |
82
- ((byte! & 0xff) << 16) |
83
- ((byte! & 0xff) << 24)
84
- [bits].pack('i').unpack('e')[0]
79
+ @reader.read(4).unpack('e')[0]
85
80
  end
86
81
 
87
82
  def read_double
@@ -89,16 +84,7 @@ module Avro
89
84
  # The double is converted into a 64-bit integer using a method
90
85
  # equivalent to Java's doubleToLongBits and then encoded in
91
86
  # little-endian format.
92
-
93
- bits = (byte! & 0xFF) |
94
- ((byte! & 0xff) << 8) |
95
- ((byte! & 0xff) << 16) |
96
- ((byte! & 0xff) << 24) |
97
- ((byte! & 0xff) << 32) |
98
- ((byte! & 0xff) << 40) |
99
- ((byte! & 0xff) << 48) |
100
- ((byte! & 0xff) << 56)
101
- [bits].pack('Q').unpack('d')[0]
87
+ @reader.read(8).unpack('E')[0]
102
88
  end
103
89
 
104
90
  def read_bytes
@@ -202,11 +188,7 @@ module Avro
202
188
  # equivalent to Java's floatToIntBits and then encoded in
203
189
  # little-endian format.
204
190
  def write_float(datum)
205
- bits = [datum].pack('e').unpack('i')[0]
206
- @writer.write(((bits ) & 0xFF).chr)
207
- @writer.write(((bits >> 8 ) & 0xFF).chr)
208
- @writer.write(((bits >> 16) & 0xFF).chr)
209
- @writer.write(((bits >> 24) & 0xFF).chr)
191
+ @writer.write([datum].pack('e'))
210
192
  end
211
193
 
212
194
  # A double is written as 8 bytes.
@@ -214,15 +196,7 @@ module Avro
214
196
  # equivalent to Java's doubleToLongBits and then encoded in
215
197
  # little-endian format.
216
198
  def write_double(datum)
217
- bits = [datum].pack('d').unpack('Q')[0]
218
- @writer.write(((bits ) & 0xFF).chr)
219
- @writer.write(((bits >> 8 ) & 0xFF).chr)
220
- @writer.write(((bits >> 16) & 0xFF).chr)
221
- @writer.write(((bits >> 24) & 0xFF).chr)
222
- @writer.write(((bits >> 32) & 0xFF).chr)
223
- @writer.write(((bits >> 40) & 0xFF).chr)
224
- @writer.write(((bits >> 48) & 0xFF).chr)
225
- @writer.write(((bits >> 56) & 0xFF).chr)
199
+ @writer.write([datum].pack('E'))
226
200
  end
227
201
 
228
202
  # Bytes are encoded as a long followed by that many bytes of data.
@@ -247,7 +221,7 @@ module Avro
247
221
  class DatumReader
248
222
  def self.check_props(schema_one, schema_two, prop_list)
249
223
  prop_list.all? do |prop|
250
- schema_one.to_hash[prop] == schema_two.to_hash[prop]
224
+ schema_one.send(prop) == schema_two.send(prop)
251
225
  end
252
226
  end
253
227
 
@@ -256,33 +230,34 @@ module Avro
256
230
  r_type = readers_schema.type
257
231
 
258
232
  # This conditional is begging for some OO love.
259
- if [w_type, r_type].include? 'union'
260
- return true
261
- elsif Schema::PRIMITIVE_TYPES.include?(w_type) &&
262
- Schema::PRIMITIVE_TYPES.include?(r_type) &&
263
- w_type == r_type
264
- return true
265
- elsif (w_type == r_type) && (r_type == 'record') &&
266
- check_props(writers_schema, readers_schema, ['fullname'])
267
- return true
268
- elsif w_type == r_type && r_type == 'error' && check_props(writers_scheam, readers_schema, ['fullname'])
269
- return true
270
- elsif w_type == r_type && r_type == 'request'
271
- return true
272
- elsif (w_type == r_type) && (r_type == 'fixed') &&
273
- check_props(writers_schema, readers_schema, ['fullname', 'size'])
274
- return true
275
- elsif (w_type == r_type) && (r_type == 'enum') &&
276
- check_props(writers_schema, readers_schema, ['fullname'])
277
- return true
278
- elsif (w_type == r_type) && (r_type == 'map') &&
279
- check_props(writers_schema.values, readers_schema.values, ['type'])
280
- return true
281
- elsif (w_type == r_type) && (r_type == 'array') &&
282
- check_props(writers_schema.items, readers_schema.items, ['type'])
233
+ if w_type == 'union' || r_type == 'union'
283
234
  return true
284
235
  end
285
236
 
237
+ if w_type == r_type
238
+ if Schema::PRIMITIVE_TYPES.include?(w_type) &&
239
+ Schema::PRIMITIVE_TYPES.include?(r_type)
240
+ return true
241
+ end
242
+
243
+ case r_type
244
+ when 'record'
245
+ return check_props(writers_schema, readers_schema, [:fullname])
246
+ when 'error'
247
+ return check_props(writers_scheam, readers_schema, [:fullname])
248
+ when 'request'
249
+ return true
250
+ when 'fixed'
251
+ return check_props(writers_schema, readers_schema, [:fullname, :size])
252
+ when 'enum'
253
+ return check_props(writers_schema, readers_schema, [:fullname])
254
+ when 'map'
255
+ return check_props(writers_schema.values, readers_schema.values, [:type])
256
+ when 'array'
257
+ return check_props(writers_schema.items, readers_schema.items, [:type])
258
+ end
259
+ end
260
+
286
261
  # Handle schema promotion
287
262
  if w_type == 'int' && ['long', 'float', 'double'].include?(r_type)
288
263
  return true
@@ -424,7 +399,6 @@ module Avro
424
399
  if readers_fields_hash.size > read_record.size
425
400
  writers_fields_hash = writers_schema.fields_hash
426
401
  readers_fields_hash.each do |field_name, field|
427
-
428
402
  unless writers_fields_hash.has_key? field_name
429
403
  if !field.default.nil?
430
404
  field_val = read_default_value(field.type, field.default)
@@ -482,6 +456,77 @@ module Avro
482
456
  raise AvroError(fail_msg)
483
457
  end
484
458
  end
459
+
460
+ def skip_data(writers_schema, decoder)
461
+ case writers_schema.type
462
+ when 'null'
463
+ decoder.skip_null
464
+ when 'boolean'
465
+ decoder.skip_boolean
466
+ when 'string'
467
+ decoder.skip_string
468
+ when 'int'
469
+ decoder.skip_int
470
+ when 'long'
471
+ decoder.skip_long
472
+ when 'float'
473
+ decoder.skip_float
474
+ when 'double'
475
+ decoder.skip_double
476
+ when 'bytes'
477
+ decoder.skip_bytes
478
+ when 'fixed'
479
+ skip_fixed(writers_schema, decoder)
480
+ when 'enum'
481
+ skip_enum(writers_schema, decoder)
482
+ when 'array'
483
+ skip_array(writers_schema, decoder)
484
+ when 'map'
485
+ skip_map(writers_schema, decoder)
486
+ when 'union'
487
+ skip_union(writers_schema, decoder)
488
+ when 'record', 'error', 'request'
489
+ skip_record(writers_schema, decoder)
490
+ else
491
+ raise AvroError, "Unknown schema type: #{schm.type}"
492
+ end
493
+ end
494
+
495
+ def skip_fixed(writers_schema, decoder)
496
+ decoder.skip(writers_schema.size)
497
+ end
498
+
499
+ def skip_enum(writers_schema, decoder)
500
+ decoder.skip_int
501
+ end
502
+
503
+ def skip_array(writers_schema, decoder)
504
+ skip_blocks(decoder) { skip_data(writers_schema.items, decoder) }
505
+ end
506
+
507
+ def skip_map(writers_schema, decoder)
508
+ skip_blocks(decoder) {
509
+ decoder.skip_string
510
+ skip_data(writers_schema.values, decoder)
511
+ }
512
+ end
513
+
514
+ def skip_record(writers_schema, decoder)
515
+ writers_schema.fields.each{|f| skip_data(f.type, decoder) }
516
+ end
517
+
518
+ private
519
+ def skip_blocks(decoder, &blk)
520
+ block_count = decoder.read_long
521
+ while block_count != 0
522
+ if block_count < 0
523
+ decoder.skip(decoder.read_long)
524
+ else
525
+ block_count.times &blk
526
+ end
527
+ block_count = decoder.read_long
528
+ end
529
+ end
485
530
  end # DatumReader
486
531
 
487
532
  # DatumWriter for generic ruby objects
@@ -13,7 +13,6 @@
13
13
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
14
  # See the License for the specific language governing permissions and
15
15
  # limitations under the License.
16
- require 'stringio'
17
16
 
18
17
  module Avro::IPC
19
18
 
@@ -162,25 +161,28 @@ module Avro::IPC
162
161
 
163
162
  def read_handshake_response(decoder)
164
163
  handshake_response = HANDSHAKE_REQUESTOR_READER.read(decoder)
165
- case match = handshake_response['match']
164
+ we_have_matching_schema = false
165
+
166
+ case handshake_response['match']
166
167
  when 'BOTH'
167
168
  self.send_protocol = false
168
- true
169
+ we_have_matching_schema = true
169
170
  when 'CLIENT'
170
171
  raise AvroError.new('Handshake failure. match == CLIENT') if send_protocol
171
- self.remote_protocol = handshake_response['serverProtocol']
172
+ self.remote_protocol = Avro::Protocol.parse(handshake_response['serverProtocol'])
172
173
  self.remote_hash = handshake_response['serverHash']
173
174
  self.send_protocol = false
174
- false
175
+ we_have_matching_schema = true
175
176
  when 'NONE'
176
177
  raise AvroError.new('Handshake failure. match == NONE') if send_protocol
177
- self.remote_protocol = handshake_response['serverProtocol']
178
+ self.remote_protocol = Avro::Protocol.parse(handshake_response['serverProtocol'])
178
179
  self.remote_hash = handshake_response['serverHash']
179
180
  self.send_protocol = true
180
- false
181
181
  else
182
182
  raise AvroError.new("Unexpected match: #{match}")
183
183
  end
184
+
185
+ return we_have_matching_schema
184
186
  end
185
187
 
186
188
  def read_call_response(message_name, decoder)
@@ -236,11 +238,9 @@ module Avro::IPC
236
238
  protocol_cache[local_hash] = local_protocol
237
239
  end
238
240
 
239
- def respond(transport)
240
- # Called by a server to deserialize a request, compute and serialize
241
- # a response or error. Compare to 'handle()' in Thrift.
242
-
243
- call_request = transport.read_framed_message
241
+ # Called by a server to deserialize a request, compute and serialize
242
+ # a response or error. Compare to 'handle()' in Thrift.
243
+ def respond(call_request)
244
244
  buffer_decoder = Avro::IO::BinaryDecoder.new(StringIO.new(call_request))
245
245
  buffer_writer = StringIO.new('', 'w+')
246
246
  buffer_encoder = Avro::IO::BinaryEncoder.new(buffer_writer)
@@ -248,7 +248,7 @@ module Avro::IPC
248
248
  response_metadata = {}
249
249
 
250
250
  begin
251
- remote_protocol = process_handshake(transport, buffer_decoder, buffer_encoder)
251
+ remote_protocol = process_handshake(buffer_decoder, buffer_encoder)
252
252
  # handshake failure
253
253
  unless remote_protocol
254
254
  return buffer_writer.string
@@ -300,7 +300,7 @@ module Avro::IPC
300
300
  buffer_writer.string
301
301
  end
302
302
 
303
- def process_handshake(transport, decoder, encoder)
303
+ def process_handshake(decoder, encoder)
304
304
  handshake_request = HANDSHAKE_RESPONDER_READER.read(decoder)
305
305
  handshake_response = {}
306
306
 
@@ -308,8 +308,9 @@ module Avro::IPC
308
308
  client_hash = handshake_request['clientHash']
309
309
  client_protocol = handshake_request['clientProtocol']
310
310
  remote_protocol = protocol_cache[client_hash]
311
+
311
312
  if !remote_protocol && client_protocol
312
- remote_protocol = protocol.parse(client_protocol)
313
+ remote_protocol = Avro::Protocol.parse(client_protocol)
313
314
  protocol_cache[client_hash] = remote_protocol
314
315
  end
315
316
 
@@ -422,7 +423,7 @@ module Avro::IPC
422
423
  end
423
424
 
424
425
  def write_buffer_length(n)
425
- bytes_sent = sock.write([n].pack('I'))
426
+ bytes_sent = sock.write([n].pack('N'))
426
427
  if bytes_sent == 0
427
428
  raise ConnectionClosedException.new("socket sent 0 bytes")
428
429
  end
@@ -433,11 +434,102 @@ module Avro::IPC
433
434
  if read == '' || read == nil
434
435
  raise ConnectionClosedException.new("Socket read 0 bytes.")
435
436
  end
436
- read.unpack('I')[0]
437
+ read.unpack('N')[0]
437
438
  end
438
439
 
439
440
  def close
440
441
  sock.close
441
442
  end
442
443
  end
444
+
445
+ class ConnectionClosedError < StandardError; end
446
+
447
+ class FramedWriter
448
+ attr_reader :writer
449
+ def initialize(writer)
450
+ @writer = writer
451
+ end
452
+
453
+ def write_framed_message(message)
454
+ message_size = message.size
455
+ total_bytes_sent = 0
456
+ while message_size - total_bytes_sent > 0
457
+ if message_size - total_bytes_sent > BUFFER_SIZE
458
+ buffer_size = BUFFER_SIZE
459
+ else
460
+ buffer_size = message_size - total_bytes_sent
461
+ end
462
+ write_buffer(message[total_bytes_sent, buffer_size])
463
+ total_bytes_sent += buffer_size
464
+ end
465
+ write_buffer_size(0)
466
+ end
467
+
468
+ def to_s; writer.string; end
469
+
470
+ private
471
+ def write_buffer(chunk)
472
+ buffer_size = chunk.size
473
+ write_buffer_size(buffer_size)
474
+ writer << chunk
475
+ end
476
+
477
+ def write_buffer_size(n)
478
+ writer.write([n].pack('N'))
479
+ end
480
+ end
481
+
482
+ class FramedReader
483
+ attr_reader :reader
484
+
485
+ def initialize(reader)
486
+ @reader = reader
487
+ end
488
+
489
+ def read_framed_message
490
+ message = []
491
+ loop do
492
+ buffer = ""
493
+ buffer_size = read_buffer_size
494
+
495
+ return message.join if buffer_size == 0
496
+
497
+ while buffer.size < buffer_size
498
+ chunk = reader.read(buffer_size - buffer.size)
499
+ chunk_error?(chunk)
500
+ buffer << chunk
501
+ end
502
+ message << buffer
503
+ end
504
+ end
505
+
506
+ private
507
+ def read_buffer_size
508
+ header = reader.read(BUFFER_HEADER_LENGTH)
509
+ chunk_error?(header)
510
+ header.unpack('N')[0]
511
+ end
512
+
513
+ def chunk_error?(chunk)
514
+ raise ConnectionClosedError.new("Reader read 0 bytes") if chunk == ''
515
+ end
516
+ end
517
+
518
+ # Only works for clients. Sigh.
519
+ class HTTPTransceiver
520
+ attr_reader :remote_name, :host, :port
521
+ def initialize(host, port)
522
+ @host, @port = host, port
523
+ @remote_name = "#{host}:#{port}"
524
+ end
525
+
526
+ def transceive(message)
527
+ writer = FramedWriter.new(StringIO.new)
528
+ writer.write_framed_message(message)
529
+ resp = Net::HTTP.start(host, port) do |http|
530
+ http.post('/', writer.to_s, {'Content-Type' => 'avro/binary'})
531
+ end
532
+ FramedReader.new(StringIO.new(resp.body)).read_framed_message
533
+ end
534
+ end
443
535
  end
@@ -57,11 +57,11 @@ module Avro
57
57
  end
58
58
 
59
59
  def to_s
60
- Yajl.dump to_hash
60
+ Yajl.dump to_avro
61
61
  end
62
62
 
63
63
  def ==(other)
64
- to_hash == Yajl.load(other.to_s)
64
+ to_avro == other.to_avro
65
65
  end
66
66
 
67
67
  private
@@ -96,13 +96,14 @@ module Avro
96
96
  message_objects
97
97
  end
98
98
 
99
- def to_hash
99
+ protected
100
+ def to_avro
100
101
  hsh = {'protocol' => name}
101
102
  hsh['namespace'] = namespace if namespace
102
- hsh['types'] = types.map{|t| Yajl.load(t.to_s) } if types
103
+ hsh['types'] = types.map{|t| t.to_avro } if types
103
104
 
104
105
  if messages
105
- hsh['messages'] = messages.collect_hash{|k,t| [k, Yajl.load(t.to_s)] }
106
+ hsh['messages'] = messages.collect_hash{|k,t| [k, t.to_avro] }
106
107
  end
107
108
 
108
109
  hsh
@@ -119,18 +120,22 @@ module Avro
119
120
  @errors = parse_errors(errors, names) if errors
120
121
  end
121
122
 
122
- def to_s
123
- hsh = {'request' => Yajl.load(request.to_s)}
123
+ def to_avro
124
+ hsh = {'request' => request.to_avro}
124
125
  if response_from_names
125
126
  hsh['response'] = response.fullname
126
127
  else
127
- hsh['response'] = Yajl.load(response.to_s)
128
+ hsh['response'] = response.to_avro
128
129
  end
129
130
 
130
131
  if errors
131
- hsh['errors'] = Yajl.load(errors.to_s)
132
+ hsh['errors'] = errors.to_avro
132
133
  end
133
- Yajl.dump hsh
134
+ hsh
135
+ end
136
+
137
+ def to_s
138
+ Yajl.dump to_avro
134
139
  end
135
140
 
136
141
  def parse_request(request, names)
@@ -51,7 +51,7 @@ module Avro
51
51
  fields = json_obj['fields']
52
52
  return RecordSchema.new(name, namespace, fields, names, type)
53
53
  else
54
- raise SchemaParseError.new("Unknown Named Type: #{type}")
54
+ raise SchemaParseError.new("Unknown named type: #{type}")
55
55
  end
56
56
  elsif VALID_TYPES.include?(type)
57
57
  case type
@@ -73,7 +73,7 @@ module Avro
73
73
  elsif PRIMITIVE_TYPES.include? json_obj
74
74
  return PrimitiveSchema.new(json_obj)
75
75
  else
76
- msg = "Could not make an Avro Schema object from #{json_obj}"
76
+ msg = "#{json_obj.inspect} is not a schema we know about."
77
77
  raise SchemaParseError.new(msg)
78
78
  end
79
79
  end
@@ -129,12 +129,21 @@ module Avro
129
129
  @type.hash
130
130
  end
131
131
 
132
- def to_hash
132
+ def subparse(json_obj, names=nil)
133
+ begin
134
+ Schema.real_parse(json_obj, names)
135
+ rescue => e
136
+ raise e if e.is_a? SchemaParseError
137
+ raise SchemaParseError, "Sub-schema for #{self.class.name} not a valid Avro schema. Bad schema: #{json_obj}"
138
+ end
139
+ end
140
+
141
+ def to_avro
133
142
  {'type' => @type}
134
143
  end
135
144
 
136
145
  def to_s
137
- Yajl.dump to_hash
146
+ Yajl.dump to_avro
138
147
  end
139
148
 
140
149
  class NamedSchema < Schema
@@ -145,7 +154,7 @@ module Avro
145
154
  names = Name.add_name(names, self)
146
155
  end
147
156
 
148
- def to_hash
157
+ def to_avro
149
158
  props = {'name' => @name}
150
159
  props.merge!('namespace' => @namespace) if @namespace
151
160
  super.merge props
@@ -194,8 +203,8 @@ module Avro
194
203
  fields.inject({}){|hsh, field| hsh[field.name] = field; hsh }
195
204
  end
196
205
 
197
- def to_hash
198
- hsh = super.merge('fields' => @fields.map {|f|Yajl.load(f.to_s)} )
206
+ def to_avro
207
+ hsh = super.merge('fields' => @fields.map {|f| f.to_avro } )
199
208
  if type == 'request'
200
209
  hsh['fields']
201
210
  else
@@ -215,20 +224,15 @@ module Avro
215
224
  @items = names[items]
216
225
  @items_schema_from_names = true
217
226
  else
218
- begin
219
- @items = Schema.real_parse(items, names)
220
- rescue => e
221
- msg = "Items schema not a valid Avro schema" + e.to_s
222
- raise SchemaParseError, msg
223
- end
227
+ @items = subparse(items, names)
224
228
  end
225
229
  end
226
230
 
227
- def to_hash
231
+ def to_avro
228
232
  name_or_json = if items_schema_from_names
229
233
  items.fullname
230
234
  else
231
- Yajl.load(items.to_s)
235
+ items.to_avro
232
236
  end
233
237
  super.merge('items' => name_or_json)
234
238
  end
@@ -244,21 +248,17 @@ module Avro
244
248
  values_schema = names[values]
245
249
  @values_schema_from_names = true
246
250
  else
247
- begin
248
- values_schema = Schema.real_parse(values, names)
249
- rescue => e
250
- raise SchemaParseError.new('Values schema not a valid Avro schema.' + e.to_s)
251
- end
251
+ values_schema = subparse(values, names)
252
252
  end
253
253
  @values = values_schema
254
254
  end
255
255
 
256
- def to_hash
256
+ def to_avro
257
257
  to_dump = super
258
258
  if values_schema_from_names
259
259
  to_dump['values'] = values
260
260
  else
261
- to_dump['values'] = Yajl.load(values.to_s)
261
+ to_dump['values'] = values.to_avro
262
262
  end
263
263
  to_dump
264
264
  end
@@ -277,11 +277,7 @@ module Avro
277
277
  new_schema = names[schema]
278
278
  from_names = true
279
279
  else
280
- begin
281
- new_schema = Schema.real_parse(schema, names)
282
- rescue
283
- raise SchemaParseError, 'Union item must be a valid Avro schema'
284
- end
280
+ new_schema = subparse(schema, names)
285
281
  end
286
282
 
287
283
  ns_type = new_schema.type
@@ -299,7 +295,7 @@ module Avro
299
295
  end
300
296
  end
301
297
 
302
- def to_s
298
+ def to_avro
303
299
  # FIXME(jmhodges) this from_name pattern is really weird and
304
300
  # seems code-smelly.
305
301
  to_dump = []
@@ -307,10 +303,10 @@ module Avro
307
303
  if schema_from_names_indices.include?(i)
308
304
  to_dump << schema.fullname
309
305
  else
310
- to_dump << Yajl.load(schema.to_s)
306
+ to_dump << schema.to_avro
311
307
  end
312
308
  end
313
- Yajl.dump(to_dump)
309
+ to_dump
314
310
  end
315
311
  end
316
312
 
@@ -325,7 +321,7 @@ module Avro
325
321
  @symbols = symbols
326
322
  end
327
323
 
328
- def to_hash
324
+ def to_avro
329
325
  super.merge('symbols' => symbols)
330
326
  end
331
327
  end
@@ -340,8 +336,9 @@ module Avro
340
336
  super(type)
341
337
  end
342
338
 
343
- def to_s
344
- to_hash.size == 1 ? type.inspect : Yajl.dump(to_hash)
339
+ def to_avro
340
+ hsh = super
341
+ hsh.size == 1 ? type : hsh
345
342
  end
346
343
  end
347
344
 
@@ -356,12 +353,12 @@ module Avro
356
353
  @size = size
357
354
  end
358
355
 
359
- def to_hash
356
+ def to_avro
360
357
  super.merge('size' => @size)
361
358
  end
362
359
  end
363
360
 
364
- class Field
361
+ class Field < Schema
365
362
  attr_reader :type, :name, :default, :order, :type_from_names
366
363
  def initialize(type, name, default=nil, order=nil, names=nil)
367
364
  @type_from_names = false
@@ -369,7 +366,7 @@ module Avro
369
366
  type_schema = names[type]
370
367
  @type_from_names = true
371
368
  else
372
- type_schema = Schema.real_parse(type, names)
369
+ type_schema = subparse(type, names)
373
370
  end
374
371
  @type = type_schema
375
372
  @name = name
@@ -377,8 +374,8 @@ module Avro
377
374
  @order = order
378
375
  end
379
376
 
380
- def to_hash
381
- sigh_type = type_from_names ? type.fullname : Yajl.load(type.to_s)
377
+ def to_avro
378
+ sigh_type = type_from_names ? type.fullname : type.to_avro
382
379
  hsh = {
383
380
  'name' => name,
384
381
  'type' => sigh_type
@@ -387,10 +384,6 @@ module Avro
387
384
  hsh['order'] = order if order
388
385
  hsh
389
386
  end
390
-
391
- def to_s
392
- Yajl.dump(to_hash)
393
- end
394
387
  end
395
388
  end
396
389
 
@@ -66,8 +66,7 @@ if $0 == __FILE__
66
66
  'body' => ARGV[2]
67
67
  }
68
68
 
69
- num_messages = ARGV[3].to_i
70
- num_message = 1 if num_messages == 0
69
+ num_messages = (ARGV[3] || 1).to_i
71
70
 
72
71
  # build the parameters for the request
73
72
  params = {'message' => message}
@@ -83,4 +82,4 @@ if $0 == __FILE__
83
82
  requestor = make_requestor('localhost', 9090, MAIL_PROTOCOL)
84
83
  result = requestor.request('replay', {})
85
84
  puts("Replay Result: " + result)
86
- end
85
+ end
@@ -81,11 +81,12 @@ class MailHandler < RequestHandler
81
81
  def handle(request)
82
82
  responder = MailResponder.new()
83
83
  transport = Avro::IPC::SocketTransport.new(request)
84
- transport.write_framed_message(responder.respond(transport))
84
+ str = transport.read_framed_message
85
+ transport.write_framed_message(responder.respond(str))
85
86
  end
86
87
  end
87
88
 
88
89
  if $0 == __FILE__
89
90
  handler = MailHandler.new('localhost', 9090)
90
91
  handler.run
91
- end
92
+ end
@@ -0,0 +1,121 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'test_help'
18
+
19
+ class TestDataFile < Test::Unit::TestCase
20
+ HERE = File.expand_path File.dirname(__FILE__)
21
+ def setup
22
+ if File.exists?(HERE + '/data.avr')
23
+ File.unlink(HERE + '/data.avr')
24
+ end
25
+ end
26
+
27
+ def teardown
28
+ if File.exists?(HERE + '/data.avr')
29
+ File.unlink(HERE + '/data.avr')
30
+ end
31
+ end
32
+
33
+ def test_differing_schemas_with_primitives
34
+ writer_schema = <<-JSON
35
+ { "type": "record",
36
+ "name": "User",
37
+ "fields" : [
38
+ {"name": "username", "type": "string"},
39
+ {"name": "age", "type": "int"},
40
+ {"name": "verified", "type": "boolean", "default": "false"}
41
+ ]}
42
+ JSON
43
+
44
+ data = [{"username" => "john", "age" => 25, "verified" => true},
45
+ {"username" => "ryan", "age" => 23, "verified" => false}]
46
+
47
+ Avro::DataFile.open('data.avr', 'w', writer_schema) do |dw|
48
+ data.each{|h| dw << h }
49
+ end
50
+
51
+ # extract the username only from the avro serialized file
52
+ reader_schema = <<-JSON
53
+ { "type": "record",
54
+ "name": "User",
55
+ "fields" : [
56
+ {"name": "username", "type": "string"}
57
+ ]}
58
+ JSON
59
+
60
+ Avro::DataFile.open('data.avr', 'r', reader_schema) do |dr|
61
+ dr.each_with_index do |record, i|
62
+ assert_equal data[i]['username'], record['username']
63
+ end
64
+ end
65
+ end
66
+
67
+ def test_differing_schemas_with_complex_objects
68
+ writer_schema = <<-JSON
69
+ { "type": "record",
70
+ "name": "something",
71
+ "fields": [
72
+ {"name": "something_fixed", "type": {"name": "inner_fixed",
73
+ "type": "fixed", "size": 3}},
74
+ {"name": "something_enum", "type": {"name": "inner_enum",
75
+ "type": "enum",
76
+ "symbols": ["hello", "goodbye"]}},
77
+ {"name": "something_array", "type": {"type": "array", "items": "int"}},
78
+ {"name": "something_map", "type": {"type": "map", "values": "int"}},
79
+ {"name": "something_record", "type": {"name": "inner_record",
80
+ "type": "record",
81
+ "fields": [
82
+ {"name": "inner", "type": "int"}
83
+ ]}},
84
+ {"name": "username", "type": "string"}
85
+ ]}
86
+ JSON
87
+
88
+ data = [{"username" => "john",
89
+ "something_fixed" => "foo",
90
+ "something_enum" => "hello",
91
+ "something_array" => [1,2,3],
92
+ "something_map" => {"a" => 1, "b" => 2},
93
+ "something_record" => {"inner" => 2},
94
+ "something_error" => {"code" => 403}
95
+ },
96
+ {"username" => "ryan",
97
+ "something_fixed" => "bar",
98
+ "something_enum" => "goodbye",
99
+ "something_array" => [1,2,3],
100
+ "something_map" => {"a" => 2, "b" => 6},
101
+ "something_record" => {"inner" => 1},
102
+ "something_error" => {"code" => 401}
103
+ }]
104
+
105
+ Avro::DataFile.open('data.avr', 'w', writer_schema) do |dw|
106
+ data.each{|d| dw << d }
107
+ end
108
+
109
+ %w[fixed enum record error array map union].each do |s|
110
+ reader = Yajl.load(writer_schema)
111
+ reader['fields'] = reader['fields'].reject{|f| f['type']['type'] == s}
112
+ Avro::DataFile.open('data.avr', 'r', Yajl.dump(reader)) do |dr|
113
+ dr.each_with_index do |obj, i|
114
+ reader['fields'].each do |field|
115
+ assert_equal data[i][field['name']], obj[field['name']]
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,40 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'test_help'
18
+
19
+ class TestSocketTransport < Test::Unit::TestCase
20
+ def test_buffer_writing
21
+ io = StringIO.new
22
+ st = Avro::IPC::SocketTransport.new(io)
23
+ buffer_length = "\000\000\000\006" # 6 in big-endian
24
+ message = 'abcdef'
25
+ null_ending = "\000\000\000\000" # 0 in big-endian
26
+ full = buffer_length + message + null_ending
27
+ st.write_framed_message('abcdef')
28
+ assert_equal full, io.string
29
+ end
30
+
31
+ def test_buffer_reading
32
+ buffer_length = "\000\000\000\005" # 5 in big-endian
33
+ message = "hello"
34
+ null_ending = "\000\000\000\000" # 0 in big-endian
35
+ full = buffer_length + message + null_ending
36
+ io = StringIO.new(full)
37
+ st = Avro::IPC::SocketTransport.new(io)
38
+ assert_equal 'hello', st.read_framed_message
39
+ end
40
+ end
metadata CHANGED
@@ -1,7 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: avro
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 1
8
+ - 3
9
+ - 3
10
+ version: 1.3.3
5
11
  platform: ruby
6
12
  authors:
7
13
  - Apache Software Foundation
@@ -9,19 +15,23 @@ autorequire:
9
15
  bindir: bin
10
16
  cert_chain: []
11
17
 
12
- date: 2010-03-01 00:00:00 -08:00
18
+ date: 2010-08-18 00:00:00 -07:00
13
19
  default_executable:
14
20
  dependencies:
15
21
  - !ruby/object:Gem::Dependency
16
22
  name: yajl-ruby
17
- type: :runtime
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
20
26
  requirements:
21
27
  - - ">="
22
28
  - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
23
32
  version: "0"
24
- version:
33
+ type: :runtime
34
+ version_requirements: *id001
25
35
  description: Apache is a data serialization and RPC format
26
36
  email: avro-dev@hadoop.apache.org
27
37
  executables: []
@@ -56,7 +66,8 @@ files:
56
66
  - test/test_help.rb
57
67
  - test/test_io.rb
58
68
  - test/test_protocol.rb
59
- - tmp/test.rb.avro
69
+ - test/test_datafile.rb
70
+ - test/test_socket_transport.rb
60
71
  has_rdoc: true
61
72
  homepage: http://hadoop.apache.org/avro/
62
73
  licenses: []
@@ -70,25 +81,34 @@ rdoc_options:
70
81
  require_paths:
71
82
  - lib
72
83
  required_ruby_version: !ruby/object:Gem::Requirement
84
+ none: false
73
85
  requirements:
74
86
  - - ">="
75
87
  - !ruby/object:Gem::Version
88
+ hash: 3
89
+ segments:
90
+ - 0
76
91
  version: "0"
77
- version:
78
92
  required_rubygems_version: !ruby/object:Gem::Requirement
93
+ none: false
79
94
  requirements:
80
95
  - - ">="
81
96
  - !ruby/object:Gem::Version
97
+ hash: 11
98
+ segments:
99
+ - 1
100
+ - 2
82
101
  version: "1.2"
83
- version:
84
102
  requirements: []
85
103
 
86
104
  rubyforge_project: avro
87
- rubygems_version: 1.3.5
105
+ rubygems_version: 1.3.7
88
106
  signing_key:
89
107
  specification_version: 3
90
108
  summary: Apache Avro for Ruby
91
109
  test_files:
110
+ - test/test_datafile.rb
92
111
  - test/test_help.rb
93
112
  - test/test_io.rb
94
113
  - test/test_protocol.rb
114
+ - test/test_socket_transport.rb
Binary file