avro 1.3.0 → 1.3.3

Sign up to get free protection for your applications and to get access to all the features.
data/Manifest CHANGED
@@ -16,4 +16,3 @@ test/sample_ipc_server.rb
16
16
  test/test_help.rb
17
17
  test/test_io.rb
18
18
  test/test_protocol.rb
19
- tmp/test.rb.avro
data/Rakefile CHANGED
@@ -16,7 +16,7 @@
16
16
 
17
17
  require 'rubygems'
18
18
  require 'echoe'
19
- VERSION = File.open('../../share/VERSION.txt').read
19
+ VERSION = File.open('../../share/VERSION.txt').read.sub('-SNAPSHOT', '.pre1').chomp
20
20
  Echoe.new('avro', VERSION) do |p|
21
21
  p.author = "Apache Software Foundation"
22
22
  p.email = "avro-dev@hadoop.apache.org"
@@ -2,28 +2,28 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{avro}
5
- s.version = "1.3.0"
5
+ s.version = "1.3.3"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Apache Software Foundation"]
9
- s.date = %q{2010-03-01}
9
+ s.date = %q{2010-08-18}
10
10
  s.description = %q{Apache is a data serialization and RPC format}
11
11
  s.email = %q{avro-dev@hadoop.apache.org}
12
12
  s.extra_rdoc_files = ["CHANGELOG", "lib/avro.rb", "lib/avro/collect_hash.rb", "lib/avro/data_file.rb", "lib/avro/io.rb", "lib/avro/ipc.rb", "lib/avro/protocol.rb", "lib/avro/schema.rb"]
13
- s.files = ["CHANGELOG", "Manifest", "Rakefile", "avro.gemspec", "interop/test_interop.rb", "lib/avro.rb", "lib/avro/collect_hash.rb", "lib/avro/data_file.rb", "lib/avro/io.rb", "lib/avro/ipc.rb", "lib/avro/protocol.rb", "lib/avro/schema.rb", "test/random_data.rb", "test/sample_ipc_client.rb", "test/sample_ipc_server.rb", "test/test_help.rb", "test/test_io.rb", "test/test_protocol.rb", "tmp/test.rb.avro"]
13
+ s.files = ["CHANGELOG", "Manifest", "Rakefile", "avro.gemspec", "interop/test_interop.rb", "lib/avro.rb", "lib/avro/collect_hash.rb", "lib/avro/data_file.rb", "lib/avro/io.rb", "lib/avro/ipc.rb", "lib/avro/protocol.rb", "lib/avro/schema.rb", "test/random_data.rb", "test/sample_ipc_client.rb", "test/sample_ipc_server.rb", "test/test_help.rb", "test/test_io.rb", "test/test_protocol.rb", "test/test_datafile.rb", "test/test_socket_transport.rb"]
14
14
  s.homepage = %q{http://hadoop.apache.org/avro/}
15
15
  s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Avro"]
16
16
  s.require_paths = ["lib"]
17
17
  s.rubyforge_project = %q{avro}
18
- s.rubygems_version = %q{1.3.5}
18
+ s.rubygems_version = %q{1.3.7}
19
19
  s.summary = %q{Apache Avro for Ruby}
20
- s.test_files = ["test/test_help.rb", "test/test_io.rb", "test/test_protocol.rb"]
20
+ s.test_files = ["test/test_datafile.rb", "test/test_help.rb", "test/test_io.rb", "test/test_protocol.rb", "test/test_socket_transport.rb"]
21
21
 
22
22
  if s.respond_to? :specification_version then
23
23
  current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
24
24
  s.specification_version = 3
25
25
 
26
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
26
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
27
27
  s.add_runtime_dependency(%q<yajl-ruby>, [">= 0"])
28
28
  else
29
29
  s.add_dependency(%q<yajl-ruby>, [">= 0"])
@@ -17,6 +17,8 @@
17
17
  require 'yajl'
18
18
  require 'set'
19
19
  require 'md5'
20
+ require 'net/http'
21
+ require 'stringio'
20
22
 
21
23
  module Avro
22
24
  VERSION = "FIXME"
@@ -29,6 +29,39 @@ module Avro
29
29
 
30
30
  class DataFileError < AvroError; end
31
31
 
32
+ def self.open(file_path, mode='r', schema=nil)
33
+ schema = Avro::Schema.parse(schema) if schema
34
+ case mode
35
+ when 'w'
36
+ unless schema
37
+ raise DataFileError, "Writing an Avro file requires a schema."
38
+ end
39
+ io = open_writer(File.open(file_path, 'wb'), schema)
40
+ when 'r'
41
+ io = open_reader(File.open(file_path, 'rb'), schema)
42
+ else
43
+ raise DataFileError, "Only modes 'r' and 'w' allowed. You gave #{mode.inspect}."
44
+ end
45
+
46
+ yield io if block_given?
47
+ io
48
+ ensure
49
+ io.close if block_given? && io
50
+ end
51
+
52
+ class << self
53
+ private
54
+ def open_writer(file, schema)
55
+ writer = Avro::IO::DatumWriter.new(schema)
56
+ Avro::DataFile::Writer.new(file, writer, schema)
57
+ end
58
+
59
+ def open_reader(file, schema)
60
+ reader = Avro::IO::DatumReader.new(nil, schema)
61
+ Avro::DataFile::Reader.new(file, reader)
62
+ end
63
+ end
64
+
32
65
  class Writer
33
66
  def self.generate_sync_marker
34
67
  OpenSSL::Random.random_bytes(16)
@@ -76,12 +76,7 @@ module Avro
76
76
  # The float is converted into a 32-bit integer using a method
77
77
  # equivalent to Java's floatToIntBits and then encoded in
78
78
  # little-endian format.
79
-
80
- bits = (byte! & 0xFF) |
81
- ((byte! & 0xff) << 8) |
82
- ((byte! & 0xff) << 16) |
83
- ((byte! & 0xff) << 24)
84
- [bits].pack('i').unpack('e')[0]
79
+ @reader.read(4).unpack('e')[0]
85
80
  end
86
81
 
87
82
  def read_double
@@ -89,16 +84,7 @@ module Avro
89
84
  # The double is converted into a 64-bit integer using a method
90
85
  # equivalent to Java's doubleToLongBits and then encoded in
91
86
  # little-endian format.
92
-
93
- bits = (byte! & 0xFF) |
94
- ((byte! & 0xff) << 8) |
95
- ((byte! & 0xff) << 16) |
96
- ((byte! & 0xff) << 24) |
97
- ((byte! & 0xff) << 32) |
98
- ((byte! & 0xff) << 40) |
99
- ((byte! & 0xff) << 48) |
100
- ((byte! & 0xff) << 56)
101
- [bits].pack('Q').unpack('d')[0]
87
+ @reader.read(8).unpack('E')[0]
102
88
  end
103
89
 
104
90
  def read_bytes
@@ -202,11 +188,7 @@ module Avro
202
188
  # equivalent to Java's floatToIntBits and then encoded in
203
189
  # little-endian format.
204
190
  def write_float(datum)
205
- bits = [datum].pack('e').unpack('i')[0]
206
- @writer.write(((bits ) & 0xFF).chr)
207
- @writer.write(((bits >> 8 ) & 0xFF).chr)
208
- @writer.write(((bits >> 16) & 0xFF).chr)
209
- @writer.write(((bits >> 24) & 0xFF).chr)
191
+ @writer.write([datum].pack('e'))
210
192
  end
211
193
 
212
194
  # A double is written as 8 bytes.
@@ -214,15 +196,7 @@ module Avro
214
196
  # equivalent to Java's doubleToLongBits and then encoded in
215
197
  # little-endian format.
216
198
  def write_double(datum)
217
- bits = [datum].pack('d').unpack('Q')[0]
218
- @writer.write(((bits ) & 0xFF).chr)
219
- @writer.write(((bits >> 8 ) & 0xFF).chr)
220
- @writer.write(((bits >> 16) & 0xFF).chr)
221
- @writer.write(((bits >> 24) & 0xFF).chr)
222
- @writer.write(((bits >> 32) & 0xFF).chr)
223
- @writer.write(((bits >> 40) & 0xFF).chr)
224
- @writer.write(((bits >> 48) & 0xFF).chr)
225
- @writer.write(((bits >> 56) & 0xFF).chr)
199
+ @writer.write([datum].pack('E'))
226
200
  end
227
201
 
228
202
  # Bytes are encoded as a long followed by that many bytes of data.
@@ -247,7 +221,7 @@ module Avro
247
221
  class DatumReader
248
222
  def self.check_props(schema_one, schema_two, prop_list)
249
223
  prop_list.all? do |prop|
250
- schema_one.to_hash[prop] == schema_two.to_hash[prop]
224
+ schema_one.send(prop) == schema_two.send(prop)
251
225
  end
252
226
  end
253
227
 
@@ -256,33 +230,34 @@ module Avro
256
230
  r_type = readers_schema.type
257
231
 
258
232
  # This conditional is begging for some OO love.
259
- if [w_type, r_type].include? 'union'
260
- return true
261
- elsif Schema::PRIMITIVE_TYPES.include?(w_type) &&
262
- Schema::PRIMITIVE_TYPES.include?(r_type) &&
263
- w_type == r_type
264
- return true
265
- elsif (w_type == r_type) && (r_type == 'record') &&
266
- check_props(writers_schema, readers_schema, ['fullname'])
267
- return true
268
- elsif w_type == r_type && r_type == 'error' && check_props(writers_scheam, readers_schema, ['fullname'])
269
- return true
270
- elsif w_type == r_type && r_type == 'request'
271
- return true
272
- elsif (w_type == r_type) && (r_type == 'fixed') &&
273
- check_props(writers_schema, readers_schema, ['fullname', 'size'])
274
- return true
275
- elsif (w_type == r_type) && (r_type == 'enum') &&
276
- check_props(writers_schema, readers_schema, ['fullname'])
277
- return true
278
- elsif (w_type == r_type) && (r_type == 'map') &&
279
- check_props(writers_schema.values, readers_schema.values, ['type'])
280
- return true
281
- elsif (w_type == r_type) && (r_type == 'array') &&
282
- check_props(writers_schema.items, readers_schema.items, ['type'])
233
+ if w_type == 'union' || r_type == 'union'
283
234
  return true
284
235
  end
285
236
 
237
+ if w_type == r_type
238
+ if Schema::PRIMITIVE_TYPES.include?(w_type) &&
239
+ Schema::PRIMITIVE_TYPES.include?(r_type)
240
+ return true
241
+ end
242
+
243
+ case r_type
244
+ when 'record'
245
+ return check_props(writers_schema, readers_schema, [:fullname])
246
+ when 'error'
247
+ return check_props(writers_scheam, readers_schema, [:fullname])
248
+ when 'request'
249
+ return true
250
+ when 'fixed'
251
+ return check_props(writers_schema, readers_schema, [:fullname, :size])
252
+ when 'enum'
253
+ return check_props(writers_schema, readers_schema, [:fullname])
254
+ when 'map'
255
+ return check_props(writers_schema.values, readers_schema.values, [:type])
256
+ when 'array'
257
+ return check_props(writers_schema.items, readers_schema.items, [:type])
258
+ end
259
+ end
260
+
286
261
  # Handle schema promotion
287
262
  if w_type == 'int' && ['long', 'float', 'double'].include?(r_type)
288
263
  return true
@@ -424,7 +399,6 @@ module Avro
424
399
  if readers_fields_hash.size > read_record.size
425
400
  writers_fields_hash = writers_schema.fields_hash
426
401
  readers_fields_hash.each do |field_name, field|
427
-
428
402
  unless writers_fields_hash.has_key? field_name
429
403
  if !field.default.nil?
430
404
  field_val = read_default_value(field.type, field.default)
@@ -482,6 +456,77 @@ module Avro
482
456
  raise AvroError(fail_msg)
483
457
  end
484
458
  end
459
+
460
+ def skip_data(writers_schema, decoder)
461
+ case writers_schema.type
462
+ when 'null'
463
+ decoder.skip_null
464
+ when 'boolean'
465
+ decoder.skip_boolean
466
+ when 'string'
467
+ decoder.skip_string
468
+ when 'int'
469
+ decoder.skip_int
470
+ when 'long'
471
+ decoder.skip_long
472
+ when 'float'
473
+ decoder.skip_float
474
+ when 'double'
475
+ decoder.skip_double
476
+ when 'bytes'
477
+ decoder.skip_bytes
478
+ when 'fixed'
479
+ skip_fixed(writers_schema, decoder)
480
+ when 'enum'
481
+ skip_enum(writers_schema, decoder)
482
+ when 'array'
483
+ skip_array(writers_schema, decoder)
484
+ when 'map'
485
+ skip_map(writers_schema, decoder)
486
+ when 'union'
487
+ skip_union(writers_schema, decoder)
488
+ when 'record', 'error', 'request'
489
+ skip_record(writers_schema, decoder)
490
+ else
491
+ raise AvroError, "Unknown schema type: #{schm.type}"
492
+ end
493
+ end
494
+
495
+ def skip_fixed(writers_schema, decoder)
496
+ decoder.skip(writers_schema.size)
497
+ end
498
+
499
+ def skip_enum(writers_schema, decoder)
500
+ decoder.skip_int
501
+ end
502
+
503
+ def skip_array(writers_schema, decoder)
504
+ skip_blocks(decoder) { skip_data(writers_schema.items, decoder) }
505
+ end
506
+
507
+ def skip_map(writers_schema, decoder)
508
+ skip_blocks(decoder) {
509
+ decoder.skip_string
510
+ skip_data(writers_schema.values, decoder)
511
+ }
512
+ end
513
+
514
+ def skip_record(writers_schema, decoder)
515
+ writers_schema.fields.each{|f| skip_data(f.type, decoder) }
516
+ end
517
+
518
+ private
519
+ def skip_blocks(decoder, &blk)
520
+ block_count = decoder.read_long
521
+ while block_count != 0
522
+ if block_count < 0
523
+ decoder.skip(decoder.read_long)
524
+ else
525
+ block_count.times &blk
526
+ end
527
+ block_count = decoder.read_long
528
+ end
529
+ end
485
530
  end # DatumReader
486
531
 
487
532
  # DatumWriter for generic ruby objects
@@ -13,7 +13,6 @@
13
13
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
14
  # See the License for the specific language governing permissions and
15
15
  # limitations under the License.
16
- require 'stringio'
17
16
 
18
17
  module Avro::IPC
19
18
 
@@ -162,25 +161,28 @@ module Avro::IPC
162
161
 
163
162
  def read_handshake_response(decoder)
164
163
  handshake_response = HANDSHAKE_REQUESTOR_READER.read(decoder)
165
- case match = handshake_response['match']
164
+ we_have_matching_schema = false
165
+
166
+ case handshake_response['match']
166
167
  when 'BOTH'
167
168
  self.send_protocol = false
168
- true
169
+ we_have_matching_schema = true
169
170
  when 'CLIENT'
170
171
  raise AvroError.new('Handshake failure. match == CLIENT') if send_protocol
171
- self.remote_protocol = handshake_response['serverProtocol']
172
+ self.remote_protocol = Avro::Protocol.parse(handshake_response['serverProtocol'])
172
173
  self.remote_hash = handshake_response['serverHash']
173
174
  self.send_protocol = false
174
- false
175
+ we_have_matching_schema = true
175
176
  when 'NONE'
176
177
  raise AvroError.new('Handshake failure. match == NONE') if send_protocol
177
- self.remote_protocol = handshake_response['serverProtocol']
178
+ self.remote_protocol = Avro::Protocol.parse(handshake_response['serverProtocol'])
178
179
  self.remote_hash = handshake_response['serverHash']
179
180
  self.send_protocol = true
180
- false
181
181
  else
182
182
  raise AvroError.new("Unexpected match: #{match}")
183
183
  end
184
+
185
+ return we_have_matching_schema
184
186
  end
185
187
 
186
188
  def read_call_response(message_name, decoder)
@@ -236,11 +238,9 @@ module Avro::IPC
236
238
  protocol_cache[local_hash] = local_protocol
237
239
  end
238
240
 
239
- def respond(transport)
240
- # Called by a server to deserialize a request, compute and serialize
241
- # a response or error. Compare to 'handle()' in Thrift.
242
-
243
- call_request = transport.read_framed_message
241
+ # Called by a server to deserialize a request, compute and serialize
242
+ # a response or error. Compare to 'handle()' in Thrift.
243
+ def respond(call_request)
244
244
  buffer_decoder = Avro::IO::BinaryDecoder.new(StringIO.new(call_request))
245
245
  buffer_writer = StringIO.new('', 'w+')
246
246
  buffer_encoder = Avro::IO::BinaryEncoder.new(buffer_writer)
@@ -248,7 +248,7 @@ module Avro::IPC
248
248
  response_metadata = {}
249
249
 
250
250
  begin
251
- remote_protocol = process_handshake(transport, buffer_decoder, buffer_encoder)
251
+ remote_protocol = process_handshake(buffer_decoder, buffer_encoder)
252
252
  # handshake failure
253
253
  unless remote_protocol
254
254
  return buffer_writer.string
@@ -300,7 +300,7 @@ module Avro::IPC
300
300
  buffer_writer.string
301
301
  end
302
302
 
303
- def process_handshake(transport, decoder, encoder)
303
+ def process_handshake(decoder, encoder)
304
304
  handshake_request = HANDSHAKE_RESPONDER_READER.read(decoder)
305
305
  handshake_response = {}
306
306
 
@@ -308,8 +308,9 @@ module Avro::IPC
308
308
  client_hash = handshake_request['clientHash']
309
309
  client_protocol = handshake_request['clientProtocol']
310
310
  remote_protocol = protocol_cache[client_hash]
311
+
311
312
  if !remote_protocol && client_protocol
312
- remote_protocol = protocol.parse(client_protocol)
313
+ remote_protocol = Avro::Protocol.parse(client_protocol)
313
314
  protocol_cache[client_hash] = remote_protocol
314
315
  end
315
316
 
@@ -422,7 +423,7 @@ module Avro::IPC
422
423
  end
423
424
 
424
425
  def write_buffer_length(n)
425
- bytes_sent = sock.write([n].pack('I'))
426
+ bytes_sent = sock.write([n].pack('N'))
426
427
  if bytes_sent == 0
427
428
  raise ConnectionClosedException.new("socket sent 0 bytes")
428
429
  end
@@ -433,11 +434,102 @@ module Avro::IPC
433
434
  if read == '' || read == nil
434
435
  raise ConnectionClosedException.new("Socket read 0 bytes.")
435
436
  end
436
- read.unpack('I')[0]
437
+ read.unpack('N')[0]
437
438
  end
438
439
 
439
440
  def close
440
441
  sock.close
441
442
  end
442
443
  end
444
+
445
+ class ConnectionClosedError < StandardError; end
446
+
447
+ class FramedWriter
448
+ attr_reader :writer
449
+ def initialize(writer)
450
+ @writer = writer
451
+ end
452
+
453
+ def write_framed_message(message)
454
+ message_size = message.size
455
+ total_bytes_sent = 0
456
+ while message_size - total_bytes_sent > 0
457
+ if message_size - total_bytes_sent > BUFFER_SIZE
458
+ buffer_size = BUFFER_SIZE
459
+ else
460
+ buffer_size = message_size - total_bytes_sent
461
+ end
462
+ write_buffer(message[total_bytes_sent, buffer_size])
463
+ total_bytes_sent += buffer_size
464
+ end
465
+ write_buffer_size(0)
466
+ end
467
+
468
+ def to_s; writer.string; end
469
+
470
+ private
471
+ def write_buffer(chunk)
472
+ buffer_size = chunk.size
473
+ write_buffer_size(buffer_size)
474
+ writer << chunk
475
+ end
476
+
477
+ def write_buffer_size(n)
478
+ writer.write([n].pack('N'))
479
+ end
480
+ end
481
+
482
+ class FramedReader
483
+ attr_reader :reader
484
+
485
+ def initialize(reader)
486
+ @reader = reader
487
+ end
488
+
489
+ def read_framed_message
490
+ message = []
491
+ loop do
492
+ buffer = ""
493
+ buffer_size = read_buffer_size
494
+
495
+ return message.join if buffer_size == 0
496
+
497
+ while buffer.size < buffer_size
498
+ chunk = reader.read(buffer_size - buffer.size)
499
+ chunk_error?(chunk)
500
+ buffer << chunk
501
+ end
502
+ message << buffer
503
+ end
504
+ end
505
+
506
+ private
507
+ def read_buffer_size
508
+ header = reader.read(BUFFER_HEADER_LENGTH)
509
+ chunk_error?(header)
510
+ header.unpack('N')[0]
511
+ end
512
+
513
+ def chunk_error?(chunk)
514
+ raise ConnectionClosedError.new("Reader read 0 bytes") if chunk == ''
515
+ end
516
+ end
517
+
518
+ # Only works for clients. Sigh.
519
+ class HTTPTransceiver
520
+ attr_reader :remote_name, :host, :port
521
+ def initialize(host, port)
522
+ @host, @port = host, port
523
+ @remote_name = "#{host}:#{port}"
524
+ end
525
+
526
+ def transceive(message)
527
+ writer = FramedWriter.new(StringIO.new)
528
+ writer.write_framed_message(message)
529
+ resp = Net::HTTP.start(host, port) do |http|
530
+ http.post('/', writer.to_s, {'Content-Type' => 'avro/binary'})
531
+ end
532
+ FramedReader.new(StringIO.new(resp.body)).read_framed_message
533
+ end
534
+ end
443
535
  end
@@ -57,11 +57,11 @@ module Avro
57
57
  end
58
58
 
59
59
  def to_s
60
- Yajl.dump to_hash
60
+ Yajl.dump to_avro
61
61
  end
62
62
 
63
63
  def ==(other)
64
- to_hash == Yajl.load(other.to_s)
64
+ to_avro == other.to_avro
65
65
  end
66
66
 
67
67
  private
@@ -96,13 +96,14 @@ module Avro
96
96
  message_objects
97
97
  end
98
98
 
99
- def to_hash
99
+ protected
100
+ def to_avro
100
101
  hsh = {'protocol' => name}
101
102
  hsh['namespace'] = namespace if namespace
102
- hsh['types'] = types.map{|t| Yajl.load(t.to_s) } if types
103
+ hsh['types'] = types.map{|t| t.to_avro } if types
103
104
 
104
105
  if messages
105
- hsh['messages'] = messages.collect_hash{|k,t| [k, Yajl.load(t.to_s)] }
106
+ hsh['messages'] = messages.collect_hash{|k,t| [k, t.to_avro] }
106
107
  end
107
108
 
108
109
  hsh
@@ -119,18 +120,22 @@ module Avro
119
120
  @errors = parse_errors(errors, names) if errors
120
121
  end
121
122
 
122
- def to_s
123
- hsh = {'request' => Yajl.load(request.to_s)}
123
+ def to_avro
124
+ hsh = {'request' => request.to_avro}
124
125
  if response_from_names
125
126
  hsh['response'] = response.fullname
126
127
  else
127
- hsh['response'] = Yajl.load(response.to_s)
128
+ hsh['response'] = response.to_avro
128
129
  end
129
130
 
130
131
  if errors
131
- hsh['errors'] = Yajl.load(errors.to_s)
132
+ hsh['errors'] = errors.to_avro
132
133
  end
133
- Yajl.dump hsh
134
+ hsh
135
+ end
136
+
137
+ def to_s
138
+ Yajl.dump to_avro
134
139
  end
135
140
 
136
141
  def parse_request(request, names)
@@ -51,7 +51,7 @@ module Avro
51
51
  fields = json_obj['fields']
52
52
  return RecordSchema.new(name, namespace, fields, names, type)
53
53
  else
54
- raise SchemaParseError.new("Unknown Named Type: #{type}")
54
+ raise SchemaParseError.new("Unknown named type: #{type}")
55
55
  end
56
56
  elsif VALID_TYPES.include?(type)
57
57
  case type
@@ -73,7 +73,7 @@ module Avro
73
73
  elsif PRIMITIVE_TYPES.include? json_obj
74
74
  return PrimitiveSchema.new(json_obj)
75
75
  else
76
- msg = "Could not make an Avro Schema object from #{json_obj}"
76
+ msg = "#{json_obj.inspect} is not a schema we know about."
77
77
  raise SchemaParseError.new(msg)
78
78
  end
79
79
  end
@@ -129,12 +129,21 @@ module Avro
129
129
  @type.hash
130
130
  end
131
131
 
132
- def to_hash
132
+ def subparse(json_obj, names=nil)
133
+ begin
134
+ Schema.real_parse(json_obj, names)
135
+ rescue => e
136
+ raise e if e.is_a? SchemaParseError
137
+ raise SchemaParseError, "Sub-schema for #{self.class.name} not a valid Avro schema. Bad schema: #{json_obj}"
138
+ end
139
+ end
140
+
141
+ def to_avro
133
142
  {'type' => @type}
134
143
  end
135
144
 
136
145
  def to_s
137
- Yajl.dump to_hash
146
+ Yajl.dump to_avro
138
147
  end
139
148
 
140
149
  class NamedSchema < Schema
@@ -145,7 +154,7 @@ module Avro
145
154
  names = Name.add_name(names, self)
146
155
  end
147
156
 
148
- def to_hash
157
+ def to_avro
149
158
  props = {'name' => @name}
150
159
  props.merge!('namespace' => @namespace) if @namespace
151
160
  super.merge props
@@ -194,8 +203,8 @@ module Avro
194
203
  fields.inject({}){|hsh, field| hsh[field.name] = field; hsh }
195
204
  end
196
205
 
197
- def to_hash
198
- hsh = super.merge('fields' => @fields.map {|f|Yajl.load(f.to_s)} )
206
+ def to_avro
207
+ hsh = super.merge('fields' => @fields.map {|f| f.to_avro } )
199
208
  if type == 'request'
200
209
  hsh['fields']
201
210
  else
@@ -215,20 +224,15 @@ module Avro
215
224
  @items = names[items]
216
225
  @items_schema_from_names = true
217
226
  else
218
- begin
219
- @items = Schema.real_parse(items, names)
220
- rescue => e
221
- msg = "Items schema not a valid Avro schema" + e.to_s
222
- raise SchemaParseError, msg
223
- end
227
+ @items = subparse(items, names)
224
228
  end
225
229
  end
226
230
 
227
- def to_hash
231
+ def to_avro
228
232
  name_or_json = if items_schema_from_names
229
233
  items.fullname
230
234
  else
231
- Yajl.load(items.to_s)
235
+ items.to_avro
232
236
  end
233
237
  super.merge('items' => name_or_json)
234
238
  end
@@ -244,21 +248,17 @@ module Avro
244
248
  values_schema = names[values]
245
249
  @values_schema_from_names = true
246
250
  else
247
- begin
248
- values_schema = Schema.real_parse(values, names)
249
- rescue => e
250
- raise SchemaParseError.new('Values schema not a valid Avro schema.' + e.to_s)
251
- end
251
+ values_schema = subparse(values, names)
252
252
  end
253
253
  @values = values_schema
254
254
  end
255
255
 
256
- def to_hash
256
+ def to_avro
257
257
  to_dump = super
258
258
  if values_schema_from_names
259
259
  to_dump['values'] = values
260
260
  else
261
- to_dump['values'] = Yajl.load(values.to_s)
261
+ to_dump['values'] = values.to_avro
262
262
  end
263
263
  to_dump
264
264
  end
@@ -277,11 +277,7 @@ module Avro
277
277
  new_schema = names[schema]
278
278
  from_names = true
279
279
  else
280
- begin
281
- new_schema = Schema.real_parse(schema, names)
282
- rescue
283
- raise SchemaParseError, 'Union item must be a valid Avro schema'
284
- end
280
+ new_schema = subparse(schema, names)
285
281
  end
286
282
 
287
283
  ns_type = new_schema.type
@@ -299,7 +295,7 @@ module Avro
299
295
  end
300
296
  end
301
297
 
302
- def to_s
298
+ def to_avro
303
299
  # FIXME(jmhodges) this from_name pattern is really weird and
304
300
  # seems code-smelly.
305
301
  to_dump = []
@@ -307,10 +303,10 @@ module Avro
307
303
  if schema_from_names_indices.include?(i)
308
304
  to_dump << schema.fullname
309
305
  else
310
- to_dump << Yajl.load(schema.to_s)
306
+ to_dump << schema.to_avro
311
307
  end
312
308
  end
313
- Yajl.dump(to_dump)
309
+ to_dump
314
310
  end
315
311
  end
316
312
 
@@ -325,7 +321,7 @@ module Avro
325
321
  @symbols = symbols
326
322
  end
327
323
 
328
- def to_hash
324
+ def to_avro
329
325
  super.merge('symbols' => symbols)
330
326
  end
331
327
  end
@@ -340,8 +336,9 @@ module Avro
340
336
  super(type)
341
337
  end
342
338
 
343
- def to_s
344
- to_hash.size == 1 ? type.inspect : Yajl.dump(to_hash)
339
+ def to_avro
340
+ hsh = super
341
+ hsh.size == 1 ? type : hsh
345
342
  end
346
343
  end
347
344
 
@@ -356,12 +353,12 @@ module Avro
356
353
  @size = size
357
354
  end
358
355
 
359
- def to_hash
356
+ def to_avro
360
357
  super.merge('size' => @size)
361
358
  end
362
359
  end
363
360
 
364
- class Field
361
+ class Field < Schema
365
362
  attr_reader :type, :name, :default, :order, :type_from_names
366
363
  def initialize(type, name, default=nil, order=nil, names=nil)
367
364
  @type_from_names = false
@@ -369,7 +366,7 @@ module Avro
369
366
  type_schema = names[type]
370
367
  @type_from_names = true
371
368
  else
372
- type_schema = Schema.real_parse(type, names)
369
+ type_schema = subparse(type, names)
373
370
  end
374
371
  @type = type_schema
375
372
  @name = name
@@ -377,8 +374,8 @@ module Avro
377
374
  @order = order
378
375
  end
379
376
 
380
- def to_hash
381
- sigh_type = type_from_names ? type.fullname : Yajl.load(type.to_s)
377
+ def to_avro
378
+ sigh_type = type_from_names ? type.fullname : type.to_avro
382
379
  hsh = {
383
380
  'name' => name,
384
381
  'type' => sigh_type
@@ -387,10 +384,6 @@ module Avro
387
384
  hsh['order'] = order if order
388
385
  hsh
389
386
  end
390
-
391
- def to_s
392
- Yajl.dump(to_hash)
393
- end
394
387
  end
395
388
  end
396
389
 
@@ -66,8 +66,7 @@ if $0 == __FILE__
66
66
  'body' => ARGV[2]
67
67
  }
68
68
 
69
- num_messages = ARGV[3].to_i
70
- num_message = 1 if num_messages == 0
69
+ num_messages = (ARGV[3] || 1).to_i
71
70
 
72
71
  # build the parameters for the request
73
72
  params = {'message' => message}
@@ -83,4 +82,4 @@ if $0 == __FILE__
83
82
  requestor = make_requestor('localhost', 9090, MAIL_PROTOCOL)
84
83
  result = requestor.request('replay', {})
85
84
  puts("Replay Result: " + result)
86
- end
85
+ end
@@ -81,11 +81,12 @@ class MailHandler < RequestHandler
81
81
  def handle(request)
82
82
  responder = MailResponder.new()
83
83
  transport = Avro::IPC::SocketTransport.new(request)
84
- transport.write_framed_message(responder.respond(transport))
84
+ str = transport.read_framed_message
85
+ transport.write_framed_message(responder.respond(str))
85
86
  end
86
87
  end
87
88
 
88
89
  if $0 == __FILE__
89
90
  handler = MailHandler.new('localhost', 9090)
90
91
  handler.run
91
- end
92
+ end
@@ -0,0 +1,121 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'test_help'
18
+
19
+ class TestDataFile < Test::Unit::TestCase
20
+ HERE = File.expand_path File.dirname(__FILE__)
21
+ def setup
22
+ if File.exists?(HERE + '/data.avr')
23
+ File.unlink(HERE + '/data.avr')
24
+ end
25
+ end
26
+
27
+ def teardown
28
+ if File.exists?(HERE + '/data.avr')
29
+ File.unlink(HERE + '/data.avr')
30
+ end
31
+ end
32
+
33
+ def test_differing_schemas_with_primitives
34
+ writer_schema = <<-JSON
35
+ { "type": "record",
36
+ "name": "User",
37
+ "fields" : [
38
+ {"name": "username", "type": "string"},
39
+ {"name": "age", "type": "int"},
40
+ {"name": "verified", "type": "boolean", "default": "false"}
41
+ ]}
42
+ JSON
43
+
44
+ data = [{"username" => "john", "age" => 25, "verified" => true},
45
+ {"username" => "ryan", "age" => 23, "verified" => false}]
46
+
47
+ Avro::DataFile.open('data.avr', 'w', writer_schema) do |dw|
48
+ data.each{|h| dw << h }
49
+ end
50
+
51
+ # extract the username only from the avro serialized file
52
+ reader_schema = <<-JSON
53
+ { "type": "record",
54
+ "name": "User",
55
+ "fields" : [
56
+ {"name": "username", "type": "string"}
57
+ ]}
58
+ JSON
59
+
60
+ Avro::DataFile.open('data.avr', 'r', reader_schema) do |dr|
61
+ dr.each_with_index do |record, i|
62
+ assert_equal data[i]['username'], record['username']
63
+ end
64
+ end
65
+ end
66
+
67
+ def test_differing_schemas_with_complex_objects
68
+ writer_schema = <<-JSON
69
+ { "type": "record",
70
+ "name": "something",
71
+ "fields": [
72
+ {"name": "something_fixed", "type": {"name": "inner_fixed",
73
+ "type": "fixed", "size": 3}},
74
+ {"name": "something_enum", "type": {"name": "inner_enum",
75
+ "type": "enum",
76
+ "symbols": ["hello", "goodbye"]}},
77
+ {"name": "something_array", "type": {"type": "array", "items": "int"}},
78
+ {"name": "something_map", "type": {"type": "map", "values": "int"}},
79
+ {"name": "something_record", "type": {"name": "inner_record",
80
+ "type": "record",
81
+ "fields": [
82
+ {"name": "inner", "type": "int"}
83
+ ]}},
84
+ {"name": "username", "type": "string"}
85
+ ]}
86
+ JSON
87
+
88
+ data = [{"username" => "john",
89
+ "something_fixed" => "foo",
90
+ "something_enum" => "hello",
91
+ "something_array" => [1,2,3],
92
+ "something_map" => {"a" => 1, "b" => 2},
93
+ "something_record" => {"inner" => 2},
94
+ "something_error" => {"code" => 403}
95
+ },
96
+ {"username" => "ryan",
97
+ "something_fixed" => "bar",
98
+ "something_enum" => "goodbye",
99
+ "something_array" => [1,2,3],
100
+ "something_map" => {"a" => 2, "b" => 6},
101
+ "something_record" => {"inner" => 1},
102
+ "something_error" => {"code" => 401}
103
+ }]
104
+
105
+ Avro::DataFile.open('data.avr', 'w', writer_schema) do |dw|
106
+ data.each{|d| dw << d }
107
+ end
108
+
109
+ %w[fixed enum record error array map union].each do |s|
110
+ reader = Yajl.load(writer_schema)
111
+ reader['fields'] = reader['fields'].reject{|f| f['type']['type'] == s}
112
+ Avro::DataFile.open('data.avr', 'r', Yajl.dump(reader)) do |dr|
113
+ dr.each_with_index do |obj, i|
114
+ reader['fields'].each do |field|
115
+ assert_equal data[i][field['name']], obj[field['name']]
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,40 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'test_help'
18
+
19
+ class TestSocketTransport < Test::Unit::TestCase
20
+ def test_buffer_writing
21
+ io = StringIO.new
22
+ st = Avro::IPC::SocketTransport.new(io)
23
+ buffer_length = "\000\000\000\006" # 6 in big-endian
24
+ message = 'abcdef'
25
+ null_ending = "\000\000\000\000" # 0 in big-endian
26
+ full = buffer_length + message + null_ending
27
+ st.write_framed_message('abcdef')
28
+ assert_equal full, io.string
29
+ end
30
+
31
+ def test_buffer_reading
32
+ buffer_length = "\000\000\000\005" # 5 in big-endian
33
+ message = "hello"
34
+ null_ending = "\000\000\000\000" # 0 in big-endian
35
+ full = buffer_length + message + null_ending
36
+ io = StringIO.new(full)
37
+ st = Avro::IPC::SocketTransport.new(io)
38
+ assert_equal 'hello', st.read_framed_message
39
+ end
40
+ end
metadata CHANGED
@@ -1,7 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: avro
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 1
8
+ - 3
9
+ - 3
10
+ version: 1.3.3
5
11
  platform: ruby
6
12
  authors:
7
13
  - Apache Software Foundation
@@ -9,19 +15,23 @@ autorequire:
9
15
  bindir: bin
10
16
  cert_chain: []
11
17
 
12
- date: 2010-03-01 00:00:00 -08:00
18
+ date: 2010-08-18 00:00:00 -07:00
13
19
  default_executable:
14
20
  dependencies:
15
21
  - !ruby/object:Gem::Dependency
16
22
  name: yajl-ruby
17
- type: :runtime
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
20
26
  requirements:
21
27
  - - ">="
22
28
  - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
23
32
  version: "0"
24
- version:
33
+ type: :runtime
34
+ version_requirements: *id001
25
35
  description: Apache is a data serialization and RPC format
26
36
  email: avro-dev@hadoop.apache.org
27
37
  executables: []
@@ -56,7 +66,8 @@ files:
56
66
  - test/test_help.rb
57
67
  - test/test_io.rb
58
68
  - test/test_protocol.rb
59
- - tmp/test.rb.avro
69
+ - test/test_datafile.rb
70
+ - test/test_socket_transport.rb
60
71
  has_rdoc: true
61
72
  homepage: http://hadoop.apache.org/avro/
62
73
  licenses: []
@@ -70,25 +81,34 @@ rdoc_options:
70
81
  require_paths:
71
82
  - lib
72
83
  required_ruby_version: !ruby/object:Gem::Requirement
84
+ none: false
73
85
  requirements:
74
86
  - - ">="
75
87
  - !ruby/object:Gem::Version
88
+ hash: 3
89
+ segments:
90
+ - 0
76
91
  version: "0"
77
- version:
78
92
  required_rubygems_version: !ruby/object:Gem::Requirement
93
+ none: false
79
94
  requirements:
80
95
  - - ">="
81
96
  - !ruby/object:Gem::Version
97
+ hash: 11
98
+ segments:
99
+ - 1
100
+ - 2
82
101
  version: "1.2"
83
- version:
84
102
  requirements: []
85
103
 
86
104
  rubyforge_project: avro
87
- rubygems_version: 1.3.5
105
+ rubygems_version: 1.3.7
88
106
  signing_key:
89
107
  specification_version: 3
90
108
  summary: Apache Avro for Ruby
91
109
  test_files:
110
+ - test/test_datafile.rb
92
111
  - test/test_help.rb
93
112
  - test/test_io.rb
94
113
  - test/test_protocol.rb
114
+ - test/test_socket_transport.rb
Binary file