avro 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,443 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ require 'stringio'
17
+
18
+ module Avro::IPC
19
+
20
+ class AvroRemoteError < Avro::AvroError; end
21
+
22
+ HANDSHAKE_REQUEST_SCHEMA = Avro::Schema.parse <<-JSON
23
+ {
24
+ "type": "record",
25
+ "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc",
26
+ "fields": [
27
+ {"name": "clientHash",
28
+ "type": {"type": "fixed", "name": "MD5", "size": 16}},
29
+ {"name": "clientProtocol", "type": ["null", "string"]},
30
+ {"name": "serverHash", "type": "MD5"},
31
+ {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}
32
+ ]
33
+ }
34
+ JSON
35
+
36
+ HANDSHAKE_RESPONSE_SCHEMA = Avro::Schema.parse <<-JSON
37
+ {
38
+ "type": "record",
39
+ "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc",
40
+ "fields": [
41
+ {"name": "match",
42
+ "type": {"type": "enum", "name": "HandshakeMatch",
43
+ "symbols": ["BOTH", "CLIENT", "NONE"]}},
44
+ {"name": "serverProtocol", "type": ["null", "string"]},
45
+ {"name": "serverHash",
46
+ "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]},
47
+ {"name": "meta",
48
+ "type": ["null", {"type": "map", "values": "bytes"}]}
49
+ ]
50
+ }
51
+ JSON
52
+
53
+ HANDSHAKE_REQUESTOR_WRITER = Avro::IO::DatumWriter.new(HANDSHAKE_REQUEST_SCHEMA)
54
+ HANDSHAKE_REQUESTOR_READER = Avro::IO::DatumReader.new(HANDSHAKE_RESPONSE_SCHEMA)
55
+ HANDSHAKE_RESPONDER_WRITER = Avro::IO::DatumWriter.new(HANDSHAKE_RESPONSE_SCHEMA)
56
+ HANDSHAKE_RESPONDER_READER = Avro::IO::DatumReader.new(HANDSHAKE_REQUEST_SCHEMA)
57
+
58
+ META_SCHEMA = Avro::Schema.parse('{"type": "map", "values": "bytes"}')
59
+ META_WRITER = Avro::IO::DatumWriter.new(META_SCHEMA)
60
+ META_READER = Avro::IO::DatumReader.new(META_SCHEMA)
61
+
62
+ SYSTEM_ERROR_SCHEMA = Avro::Schema.parse('["string"]')
63
+
64
+ # protocol cache
65
+ REMOTE_HASHES = {}
66
+ REMOTE_PROTOCOLS = {}
67
+
68
+ BUFFER_HEADER_LENGTH = 4
69
+ BUFFER_SIZE = 8192
70
+
71
+ # Raised when an error message is sent by an Avro requestor or responder.
72
+ class AvroRemoteException < Avro::AvroError; end
73
+
74
+ class ConnectionClosedException < Avro::AvroError; end
75
+
76
+ class Requestor
77
+ """Base class for the client side of a protocol interaction."""
78
+ attr_reader :local_protocol, :transport
79
+ attr_accessor :remote_protocol, :remote_hash, :send_protocol
80
+
81
+ def initialize(local_protocol, transport)
82
+ @local_protocol = local_protocol
83
+ @transport = transport
84
+ @remote_protocol = nil
85
+ @remote_hash = nil
86
+ @send_protocol = nil
87
+ end
88
+
89
+ def remote_protocol=(new_remote_protocol)
90
+ @remote_protocol = new_remote_protocol
91
+ REMOTE_PROTOCOLS[transport.remote_name] = remote_protocol
92
+ end
93
+
94
+ def remote_hash=(new_remote_hash)
95
+ @remote_hash = new_remote_hash
96
+ REMOTE_HASHES[transport.remote_name] = remote_hash
97
+ end
98
+
99
+ def request(message_name, request_datum)
100
+ # Writes a request message and reads a response or error message.
101
+ # build handshake and call request
102
+ buffer_writer = StringIO.new('', 'w+')
103
+ buffer_encoder = Avro::IO::BinaryEncoder.new(buffer_writer)
104
+ write_handshake_request(buffer_encoder)
105
+ write_call_request(message_name, request_datum, buffer_encoder)
106
+
107
+ # send the handshake and call request; block until call response
108
+ call_request = buffer_writer.string
109
+ call_response = transport.transceive(call_request)
110
+
111
+ # process the handshake and call response
112
+ buffer_decoder = Avro::IO::BinaryDecoder.new(StringIO.new(call_response))
113
+ if read_handshake_response(buffer_decoder)
114
+ read_call_response(message_name, buffer_decoder)
115
+ else
116
+ request(message_name, request_datum)
117
+ end
118
+ end
119
+
120
+ def write_handshake_request(encoder)
121
+ local_hash = local_protocol.md5
122
+ remote_name = transport.remote_name
123
+ remote_hash = REMOTE_HASHES[remote_name]
124
+ unless remote_hash
125
+ remote_hash = local_hash
126
+ self.remote_protocol = local_protocol
127
+ end
128
+ request_datum = {
129
+ 'clientHash' => local_hash,
130
+ 'serverHash' => remote_hash
131
+ }
132
+ if send_protocol
133
+ request_datum['clientProtocol'] = local_protocol.to_s
134
+ end
135
+ HANDSHAKE_REQUESTOR_WRITER.write(request_datum, encoder)
136
+ end
137
+
138
+ def write_call_request(message_name, request_datum, encoder)
139
+ # The format of a call request is:
140
+ # * request metadata, a map with values of type bytes
141
+ # * the message name, an Avro string, followed by
142
+ # * the message parameters. Parameters are serialized according to
143
+ # the message's request declaration.
144
+
145
+ # TODO request metadata (not yet implemented)
146
+ request_metadata = {}
147
+ META_WRITER.write(request_metadata, encoder)
148
+
149
+ message = local_protocol.messages[message_name]
150
+ unless message
151
+ raise AvroError, "Unknown message: #{message_name}"
152
+ end
153
+ encoder.write_string(message.name)
154
+
155
+ write_request(message.request, request_datum, encoder)
156
+ end
157
+
158
+ def write_request(request_schema, request_datum, encoder)
159
+ datum_writer = Avro::IO::DatumWriter.new(request_schema)
160
+ datum_writer.write(request_datum, encoder)
161
+ end
162
+
163
+ def read_handshake_response(decoder)
164
+ handshake_response = HANDSHAKE_REQUESTOR_READER.read(decoder)
165
+ case match = handshake_response['match']
166
+ when 'BOTH'
167
+ self.send_protocol = false
168
+ true
169
+ when 'CLIENT'
170
+ raise AvroError.new('Handshake failure. match == CLIENT') if send_protocol
171
+ self.remote_protocol = handshake_response['serverProtocol']
172
+ self.remote_hash = handshake_response['serverHash']
173
+ self.send_protocol = false
174
+ false
175
+ when 'NONE'
176
+ raise AvroError.new('Handshake failure. match == NONE') if send_protocol
177
+ self.remote_protocol = handshake_response['serverProtocol']
178
+ self.remote_hash = handshake_response['serverHash']
179
+ self.send_protocol = true
180
+ false
181
+ else
182
+ raise AvroError.new("Unexpected match: #{match}")
183
+ end
184
+ end
185
+
186
+ def read_call_response(message_name, decoder)
187
+ # The format of a call response is:
188
+ # * response metadata, a map with values of type bytes
189
+ # * a one-byte error flag boolean, followed by either:
190
+ # * if the error flag is false,
191
+ # the message response, serialized per the message's response schema.
192
+ # * if the error flag is true,
193
+ # the error, serialized per the message's error union schema.
194
+ response_metadata = META_READER.read(decoder)
195
+
196
+ # remote response schema
197
+ remote_message_schema = remote_protocol.messages[message_name]
198
+ raise AvroError.new("Unknown remote message: #{message_name}") unless remote_message_schema
199
+
200
+ # local response schema
201
+ local_message_schema = local_protocol.messages[message_name]
202
+ unless local_message_schema
203
+ raise AvroError.new("Unknown local message: #{message_name}")
204
+ end
205
+
206
+ # error flag
207
+ if !decoder.read_boolean
208
+ writers_schema = remote_message_schema.response
209
+ readers_schema = local_message_schema.response
210
+ read_response(writers_schema, readers_schema, decoder)
211
+ else
212
+ writers_schema = remote_message_schema.errors || SYSTEM_ERROR_SCHEMA
213
+ readers_schema = local_message_schema.errors || SYSTEM_ERROR_SCHEMA
214
+ raise read_error(writers_schema, readers_schema, decoder)
215
+ end
216
+ end
217
+
218
+ def read_response(writers_schema, readers_schema, decoder)
219
+ datum_reader = Avro::IO::DatumReader.new(writers_schema, readers_schema)
220
+ datum_reader.read(decoder)
221
+ end
222
+
223
+ def read_error(writers_schema, readers_schema, decoder)
224
+ datum_reader = Avro::IO::DatumReader.new(writers_schema, readers_schema)
225
+ AvroRemoteError.new(datum_reader.read(decoder))
226
+ end
227
+ end
228
+
229
+ # Base class for the server side of a protocol interaction.
230
+ class Responder
231
+ attr_reader :local_protocol, :local_hash, :protocol_cache
232
+ def initialize(local_protocol)
233
+ @local_protocol = local_protocol
234
+ @local_hash = self.local_protocol.md5
235
+ @protocol_cache = {}
236
+ protocol_cache[local_hash] = local_protocol
237
+ end
238
+
239
+ def respond(transport)
240
+ # Called by a server to deserialize a request, compute and serialize
241
+ # a response or error. Compare to 'handle()' in Thrift.
242
+
243
+ call_request = transport.read_framed_message
244
+ buffer_decoder = Avro::IO::BinaryDecoder.new(StringIO.new(call_request))
245
+ buffer_writer = StringIO.new('', 'w+')
246
+ buffer_encoder = Avro::IO::BinaryEncoder.new(buffer_writer)
247
+ error = nil
248
+ response_metadata = {}
249
+
250
+ begin
251
+ remote_protocol = process_handshake(transport, buffer_decoder, buffer_encoder)
252
+ # handshake failure
253
+ unless remote_protocol
254
+ return buffer_writer.string
255
+ end
256
+
257
+ # read request using remote protocol
258
+ request_metadata = META_READER.read(buffer_decoder)
259
+ remote_message_name = buffer_decoder.read_string
260
+
261
+ # get remote and local request schemas so we can do
262
+ # schema resolution (one fine day)
263
+ remote_message = remote_protocol.messages[remote_message_name]
264
+ unless remote_message
265
+ raise AvroError.new("Unknown remote message: #{remote_message_name}")
266
+ end
267
+ local_message = local_protocol.messages[remote_message_name]
268
+ unless local_message
269
+ raise AvroError.new("Unknown local message: #{remote_message_name}")
270
+ end
271
+ writers_schema = remote_message.request
272
+ readers_schema = local_message.request
273
+ request = read_request(writers_schema, readers_schema, buffer_decoder)
274
+ # perform server logic
275
+ begin
276
+ response = call(local_message, request)
277
+ rescue AvroRemoteError => e
278
+ error = e
279
+ rescue Exception => e
280
+ error = AvroRemoteError.new(e.to_s)
281
+ end
282
+
283
+ # write response using local protocol
284
+ META_WRITER.write(response_metadata, buffer_encoder)
285
+ buffer_encoder.write_boolean(!!error)
286
+ if error.nil?
287
+ writers_schema = local_message.response
288
+ write_response(writers_schema, response, buffer_encoder)
289
+ else
290
+ writers_schema = local_message.errors || SYSTEM_ERROR_SCHEMA
291
+ write_error(writers_schema, error, buffer_encoder)
292
+ end
293
+ rescue Avro::AvroError => e
294
+ error = AvroRemoteException.new(e.to_s)
295
+ buffer_encoder = Avro::IO::BinaryEncoder.new(StringIO.new)
296
+ META_WRITER.write(response_metadata, buffer_encoder)
297
+ buffer_encoder.write_boolean(true)
298
+ self.write_error(SYSTEM_ERROR_SCHEMA, error, buffer_encoder)
299
+ end
300
+ buffer_writer.string
301
+ end
302
+
303
+ def process_handshake(transport, decoder, encoder)
304
+ handshake_request = HANDSHAKE_RESPONDER_READER.read(decoder)
305
+ handshake_response = {}
306
+
307
+ # determine the remote protocol
308
+ client_hash = handshake_request['clientHash']
309
+ client_protocol = handshake_request['clientProtocol']
310
+ remote_protocol = protocol_cache[client_hash]
311
+ if !remote_protocol && client_protocol
312
+ remote_protocol = protocol.parse(client_protocol)
313
+ protocol_cache[client_hash] = remote_protocol
314
+ end
315
+
316
+ # evaluate remote's guess of the local protocol
317
+ server_hash = handshake_request['serverHash']
318
+ if local_hash == server_hash
319
+ if !remote_protocol
320
+ handshake_response['match'] = 'NONE'
321
+ else
322
+ handshake_response['match'] = 'BOTH'
323
+ end
324
+ else
325
+ if !remote_protocol
326
+ handshake_response['match'] = 'NONE'
327
+ else
328
+ handshake_response['match'] = 'CLIENT'
329
+ end
330
+ end
331
+
332
+ if handshake_response['match'] != 'BOTH'
333
+ handshake_response['serverProtocol'] = local_protocol.to_s
334
+ handshake_response['serverHash'] = local_hash
335
+ end
336
+
337
+ HANDSHAKE_RESPONDER_WRITER.write(handshake_response, encoder)
338
+ remote_protocol
339
+ end
340
+
341
+ def call(local_message, request)
342
+ # Actual work done by server: cf. handler in thrift.
343
+ raise NotImplementedError
344
+ end
345
+
346
+ def read_request(writers_schema, readers_schema, decoder)
347
+ datum_reader = Avro::IO::DatumReader.new(writers_schema, readers_schema)
348
+ datum_reader.read(decoder)
349
+ end
350
+
351
+ def write_response(writers_schema, response_datum, encoder)
352
+ datum_writer = Avro::IO::DatumWriter.new(writers_schema)
353
+ datum_writer.write(response_datum, encoder)
354
+ end
355
+
356
+ def write_error(writers_schema, error_exception, encoder)
357
+ datum_writer = Avro::IO::DatumWriter.new(writers_schema)
358
+ datum_writer.write(error_exception.to_s, encoder)
359
+ end
360
+ end
361
+
362
+ class SocketTransport
363
+ # A simple socket-based Transport implementation.
364
+
365
+ attr_reader :sock, :remote_name
366
+
367
+ def initialize(sock)
368
+ @sock = sock
369
+ end
370
+
371
+ def transceive(request)
372
+ write_framed_message(request)
373
+ read_framed_message
374
+ end
375
+
376
+ def read_framed_message
377
+ message = []
378
+ loop do
379
+ buffer = StringIO.new
380
+ buffer_length = read_buffer_length
381
+ if buffer_length == 0
382
+ return message.join
383
+ end
384
+ while buffer.tell < buffer_length
385
+ chunk = sock.read(buffer_length - buffer.tell)
386
+ if chunk == ''
387
+ raise ConnectionClosedException.new("Socket read 0 bytes.")
388
+ end
389
+ buffer.write(chunk)
390
+ end
391
+ message << buffer.string
392
+ end
393
+ end
394
+
395
+ def write_framed_message(message)
396
+ message_length = message.size
397
+ total_bytes_sent = 0
398
+ while message_length - total_bytes_sent > 0
399
+ if message_length - total_bytes_sent > BUFFER_SIZE:
400
+ buffer_length = BUFFER_SIZE
401
+ else
402
+ buffer_length = message_length - total_bytes_sent
403
+ end
404
+ write_buffer(message[total_bytes_sent,buffer_length])
405
+ total_bytes_sent += buffer_length
406
+ end
407
+ # A message is always terminated by a zero-length buffer.
408
+ write_buffer_length(0)
409
+ end
410
+
411
+ def write_buffer(chunk)
412
+ buffer_length = chunk.size
413
+ write_buffer_length(buffer_length)
414
+ total_bytes_sent = 0
415
+ while total_bytes_sent < buffer_length
416
+ bytes_sent = self.sock.write(chunk[total_bytes_sent..-1])
417
+ if bytes_sent == 0
418
+ raise ConnectionClosedException.new("Socket sent 0 bytes.")
419
+ end
420
+ total_bytes_sent += bytes_sent
421
+ end
422
+ end
423
+
424
+ def write_buffer_length(n)
425
+ bytes_sent = sock.write([n].pack('I'))
426
+ if bytes_sent == 0
427
+ raise ConnectionClosedException.new("socket sent 0 bytes")
428
+ end
429
+ end
430
+
431
+ def read_buffer_length
432
+ read = sock.read(BUFFER_HEADER_LENGTH)
433
+ if read == '' || read == nil
434
+ raise ConnectionClosedException.new("Socket read 0 bytes.")
435
+ end
436
+ read.unpack('I')[0]
437
+ end
438
+
439
+ def close
440
+ sock.close
441
+ end
442
+ end
443
+ end
@@ -0,0 +1,160 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ module Avro
18
+ class Protocol
19
+ VALID_TYPE_SCHEMA_TYPES = Set.new(%w[enum record error fixed])
20
+ class ProtocolParseError < Avro::AvroError; end
21
+
22
+ attr_reader :name, :namespace, :types, :messages, :md5
23
+ def self.parse(protocol_string)
24
+ json_data = Yajl.load(protocol_string)
25
+
26
+ if json_data.is_a? Hash
27
+ name = json_data['protocol']
28
+ namespace = json_data['namespace']
29
+ types = json_data['types']
30
+ messages = json_data['messages']
31
+ Protocol.new(name, namespace, types, messages)
32
+ else
33
+ raise ProtocolParseError, "Not a JSON object: #{json_data}"
34
+ end
35
+ end
36
+
37
+ def initialize(name, namespace=nil, types=nil, messages=nil)
38
+ # Ensure valid ctor args
39
+ if !name
40
+ raise ProtocolParseError, 'Protocols must have a non-empty name.'
41
+ elsif !name.is_a?(String)
42
+ raise ProtocolParseError, 'The name property must be a string.'
43
+ elsif !namespace.is_a?(String)
44
+ raise ProtocolParseError, 'The namespace property must be a string.'
45
+ elsif !types.is_a?(Array)
46
+ raise ProtocolParseError, 'The types property must be a list.'
47
+ elsif !messages.is_a?(Hash)
48
+ raise ProtocolParseError, 'The messages property must be a JSON object.'
49
+ end
50
+
51
+ @name = name
52
+ @namespace = namespace
53
+ type_names = {}
54
+ @types = parse_types(types, type_names)
55
+ @messages = parse_messages(messages, type_names)
56
+ @md5 = Digest::MD5.digest(to_s)
57
+ end
58
+
59
+ def to_s
60
+ Yajl.dump to_hash
61
+ end
62
+
63
+ def ==(other)
64
+ to_hash == Yajl.load(other.to_s)
65
+ end
66
+
67
+ private
68
+ def parse_types(types, type_names)
69
+ type_objects = []
70
+ types.collect do |type|
71
+ # FIXME adding type.name to type_names is not defined in the
72
+ # spec. Possible bug in the python impl and the spec.
73
+ type_object = Schema.real_parse(type, type_names)
74
+ unless VALID_TYPE_SCHEMA_TYPES.include?(type_object.type)
75
+ msg = "Type #{type} not an enum, record, fixed or error."
76
+ raise ProtocolParseError, msg
77
+ end
78
+ type_object
79
+ end
80
+ end
81
+
82
+ def parse_messages(messages, names)
83
+ message_objects = {}
84
+ messages.each do |name, body|
85
+ if message_objects.has_key?(name)
86
+ raise ProtocolParseError, "Message name \"#{name}\" repeated."
87
+ elsif !body.is_a?(Hash)
88
+ raise ProtocolParseError, "Message name \"#{name}\" has non-object body #{body.inspect}"
89
+ end
90
+
91
+ request = body['request']
92
+ response = body['response']
93
+ errors = body['errors']
94
+ message_objects[name] = Message.new(name, request, response, errors, names)
95
+ end
96
+ message_objects
97
+ end
98
+
99
+ def to_hash
100
+ hsh = {'protocol' => name}
101
+ hsh['namespace'] = namespace if namespace
102
+ hsh['types'] = types.map{|t| Yajl.load(t.to_s) } if types
103
+
104
+ if messages
105
+ hsh['messages'] = messages.collect_hash{|k,t| [k, Yajl.load(t.to_s)] }
106
+ end
107
+
108
+ hsh
109
+ end
110
+
111
+ class Message
112
+ attr_reader :name, :response_from_names, :request, :response, :errors
113
+ def initialize(name, request, response, errors=nil, names=nil)
114
+ @name = name
115
+ @response_from_names = false
116
+
117
+ @request = parse_request(request, names)
118
+ @response = parse_response(response, names)
119
+ @errors = parse_errors(errors, names) if errors
120
+ end
121
+
122
+ def to_s
123
+ hsh = {'request' => Yajl.load(request.to_s)}
124
+ if response_from_names
125
+ hsh['response'] = response.fullname
126
+ else
127
+ hsh['response'] = Yajl.load(response.to_s)
128
+ end
129
+
130
+ if errors
131
+ hsh['errors'] = Yajl.load(errors.to_s)
132
+ end
133
+ Yajl.dump hsh
134
+ end
135
+
136
+ def parse_request(request, names)
137
+ unless request.is_a?(Array)
138
+ raise ProtocolParseError, "Request property not an Array: #{request.inspect}"
139
+ end
140
+ Schema::RecordSchema.new(nil, nil, request, names, 'request')
141
+ end
142
+
143
+ def parse_response(response, names)
144
+ if response.is_a?(String) && names[response]
145
+ @response_from_names = true
146
+ names[response]
147
+ else
148
+ Schema.real_parse(response, names)
149
+ end
150
+ end
151
+
152
+ def parse_errors(errors, names)
153
+ unless errors.is_a?(Array)
154
+ raise ProtocolParseError, "Errors property not an Array: #{errors}"
155
+ end
156
+ Schema.real_parse(errors, names)
157
+ end
158
+ end
159
+ end
160
+ end