avro 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,443 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ require 'stringio'
17
+
18
+ module Avro::IPC
19
+
20
+ class AvroRemoteError < Avro::AvroError; end
21
+
22
+ HANDSHAKE_REQUEST_SCHEMA = Avro::Schema.parse <<-JSON
23
+ {
24
+ "type": "record",
25
+ "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc",
26
+ "fields": [
27
+ {"name": "clientHash",
28
+ "type": {"type": "fixed", "name": "MD5", "size": 16}},
29
+ {"name": "clientProtocol", "type": ["null", "string"]},
30
+ {"name": "serverHash", "type": "MD5"},
31
+ {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}
32
+ ]
33
+ }
34
+ JSON
35
+
36
+ HANDSHAKE_RESPONSE_SCHEMA = Avro::Schema.parse <<-JSON
37
+ {
38
+ "type": "record",
39
+ "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc",
40
+ "fields": [
41
+ {"name": "match",
42
+ "type": {"type": "enum", "name": "HandshakeMatch",
43
+ "symbols": ["BOTH", "CLIENT", "NONE"]}},
44
+ {"name": "serverProtocol", "type": ["null", "string"]},
45
+ {"name": "serverHash",
46
+ "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]},
47
+ {"name": "meta",
48
+ "type": ["null", {"type": "map", "values": "bytes"}]}
49
+ ]
50
+ }
51
+ JSON
52
+
53
+ HANDSHAKE_REQUESTOR_WRITER = Avro::IO::DatumWriter.new(HANDSHAKE_REQUEST_SCHEMA)
54
+ HANDSHAKE_REQUESTOR_READER = Avro::IO::DatumReader.new(HANDSHAKE_RESPONSE_SCHEMA)
55
+ HANDSHAKE_RESPONDER_WRITER = Avro::IO::DatumWriter.new(HANDSHAKE_RESPONSE_SCHEMA)
56
+ HANDSHAKE_RESPONDER_READER = Avro::IO::DatumReader.new(HANDSHAKE_REQUEST_SCHEMA)
57
+
58
+ META_SCHEMA = Avro::Schema.parse('{"type": "map", "values": "bytes"}')
59
+ META_WRITER = Avro::IO::DatumWriter.new(META_SCHEMA)
60
+ META_READER = Avro::IO::DatumReader.new(META_SCHEMA)
61
+
62
+ SYSTEM_ERROR_SCHEMA = Avro::Schema.parse('["string"]')
63
+
64
+ # protocol cache
65
+ REMOTE_HASHES = {}
66
+ REMOTE_PROTOCOLS = {}
67
+
68
+ BUFFER_HEADER_LENGTH = 4
69
+ BUFFER_SIZE = 8192
70
+
71
+ # Raised when an error message is sent by an Avro requestor or responder.
72
+ class AvroRemoteException < Avro::AvroError; end
73
+
74
+ class ConnectionClosedException < Avro::AvroError; end
75
+
76
+ class Requestor
77
+ """Base class for the client side of a protocol interaction."""
78
+ attr_reader :local_protocol, :transport
79
+ attr_accessor :remote_protocol, :remote_hash, :send_protocol
80
+
81
+ def initialize(local_protocol, transport)
82
+ @local_protocol = local_protocol
83
+ @transport = transport
84
+ @remote_protocol = nil
85
+ @remote_hash = nil
86
+ @send_protocol = nil
87
+ end
88
+
89
+ def remote_protocol=(new_remote_protocol)
90
+ @remote_protocol = new_remote_protocol
91
+ REMOTE_PROTOCOLS[transport.remote_name] = remote_protocol
92
+ end
93
+
94
+ def remote_hash=(new_remote_hash)
95
+ @remote_hash = new_remote_hash
96
+ REMOTE_HASHES[transport.remote_name] = remote_hash
97
+ end
98
+
99
+ def request(message_name, request_datum)
100
+ # Writes a request message and reads a response or error message.
101
+ # build handshake and call request
102
+ buffer_writer = StringIO.new('', 'w+')
103
+ buffer_encoder = Avro::IO::BinaryEncoder.new(buffer_writer)
104
+ write_handshake_request(buffer_encoder)
105
+ write_call_request(message_name, request_datum, buffer_encoder)
106
+
107
+ # send the handshake and call request; block until call response
108
+ call_request = buffer_writer.string
109
+ call_response = transport.transceive(call_request)
110
+
111
+ # process the handshake and call response
112
+ buffer_decoder = Avro::IO::BinaryDecoder.new(StringIO.new(call_response))
113
+ if read_handshake_response(buffer_decoder)
114
+ read_call_response(message_name, buffer_decoder)
115
+ else
116
+ request(message_name, request_datum)
117
+ end
118
+ end
119
+
120
+ def write_handshake_request(encoder)
121
+ local_hash = local_protocol.md5
122
+ remote_name = transport.remote_name
123
+ remote_hash = REMOTE_HASHES[remote_name]
124
+ unless remote_hash
125
+ remote_hash = local_hash
126
+ self.remote_protocol = local_protocol
127
+ end
128
+ request_datum = {
129
+ 'clientHash' => local_hash,
130
+ 'serverHash' => remote_hash
131
+ }
132
+ if send_protocol
133
+ request_datum['clientProtocol'] = local_protocol.to_s
134
+ end
135
+ HANDSHAKE_REQUESTOR_WRITER.write(request_datum, encoder)
136
+ end
137
+
138
+ def write_call_request(message_name, request_datum, encoder)
139
+ # The format of a call request is:
140
+ # * request metadata, a map with values of type bytes
141
+ # * the message name, an Avro string, followed by
142
+ # * the message parameters. Parameters are serialized according to
143
+ # the message's request declaration.
144
+
145
+ # TODO request metadata (not yet implemented)
146
+ request_metadata = {}
147
+ META_WRITER.write(request_metadata, encoder)
148
+
149
+ message = local_protocol.messages[message_name]
150
+ unless message
151
+ raise AvroError, "Unknown message: #{message_name}"
152
+ end
153
+ encoder.write_string(message.name)
154
+
155
+ write_request(message.request, request_datum, encoder)
156
+ end
157
+
158
+ def write_request(request_schema, request_datum, encoder)
159
+ datum_writer = Avro::IO::DatumWriter.new(request_schema)
160
+ datum_writer.write(request_datum, encoder)
161
+ end
162
+
163
+ def read_handshake_response(decoder)
164
+ handshake_response = HANDSHAKE_REQUESTOR_READER.read(decoder)
165
+ case match = handshake_response['match']
166
+ when 'BOTH'
167
+ self.send_protocol = false
168
+ true
169
+ when 'CLIENT'
170
+ raise AvroError.new('Handshake failure. match == CLIENT') if send_protocol
171
+ self.remote_protocol = handshake_response['serverProtocol']
172
+ self.remote_hash = handshake_response['serverHash']
173
+ self.send_protocol = false
174
+ false
175
+ when 'NONE'
176
+ raise AvroError.new('Handshake failure. match == NONE') if send_protocol
177
+ self.remote_protocol = handshake_response['serverProtocol']
178
+ self.remote_hash = handshake_response['serverHash']
179
+ self.send_protocol = true
180
+ false
181
+ else
182
+ raise AvroError.new("Unexpected match: #{match}")
183
+ end
184
+ end
185
+
186
+ def read_call_response(message_name, decoder)
187
+ # The format of a call response is:
188
+ # * response metadata, a map with values of type bytes
189
+ # * a one-byte error flag boolean, followed by either:
190
+ # * if the error flag is false,
191
+ # the message response, serialized per the message's response schema.
192
+ # * if the error flag is true,
193
+ # the error, serialized per the message's error union schema.
194
+ response_metadata = META_READER.read(decoder)
195
+
196
+ # remote response schema
197
+ remote_message_schema = remote_protocol.messages[message_name]
198
+ raise AvroError.new("Unknown remote message: #{message_name}") unless remote_message_schema
199
+
200
+ # local response schema
201
+ local_message_schema = local_protocol.messages[message_name]
202
+ unless local_message_schema
203
+ raise AvroError.new("Unknown local message: #{message_name}")
204
+ end
205
+
206
+ # error flag
207
+ if !decoder.read_boolean
208
+ writers_schema = remote_message_schema.response
209
+ readers_schema = local_message_schema.response
210
+ read_response(writers_schema, readers_schema, decoder)
211
+ else
212
+ writers_schema = remote_message_schema.errors || SYSTEM_ERROR_SCHEMA
213
+ readers_schema = local_message_schema.errors || SYSTEM_ERROR_SCHEMA
214
+ raise read_error(writers_schema, readers_schema, decoder)
215
+ end
216
+ end
217
+
218
+ def read_response(writers_schema, readers_schema, decoder)
219
+ datum_reader = Avro::IO::DatumReader.new(writers_schema, readers_schema)
220
+ datum_reader.read(decoder)
221
+ end
222
+
223
+ def read_error(writers_schema, readers_schema, decoder)
224
+ datum_reader = Avro::IO::DatumReader.new(writers_schema, readers_schema)
225
+ AvroRemoteError.new(datum_reader.read(decoder))
226
+ end
227
+ end
228
+
229
+ # Base class for the server side of a protocol interaction.
230
+ class Responder
231
+ attr_reader :local_protocol, :local_hash, :protocol_cache
232
+ def initialize(local_protocol)
233
+ @local_protocol = local_protocol
234
+ @local_hash = self.local_protocol.md5
235
+ @protocol_cache = {}
236
+ protocol_cache[local_hash] = local_protocol
237
+ end
238
+
239
+ def respond(transport)
240
+ # Called by a server to deserialize a request, compute and serialize
241
+ # a response or error. Compare to 'handle()' in Thrift.
242
+
243
+ call_request = transport.read_framed_message
244
+ buffer_decoder = Avro::IO::BinaryDecoder.new(StringIO.new(call_request))
245
+ buffer_writer = StringIO.new('', 'w+')
246
+ buffer_encoder = Avro::IO::BinaryEncoder.new(buffer_writer)
247
+ error = nil
248
+ response_metadata = {}
249
+
250
+ begin
251
+ remote_protocol = process_handshake(transport, buffer_decoder, buffer_encoder)
252
+ # handshake failure
253
+ unless remote_protocol
254
+ return buffer_writer.string
255
+ end
256
+
257
+ # read request using remote protocol
258
+ request_metadata = META_READER.read(buffer_decoder)
259
+ remote_message_name = buffer_decoder.read_string
260
+
261
+ # get remote and local request schemas so we can do
262
+ # schema resolution (one fine day)
263
+ remote_message = remote_protocol.messages[remote_message_name]
264
+ unless remote_message
265
+ raise AvroError.new("Unknown remote message: #{remote_message_name}")
266
+ end
267
+ local_message = local_protocol.messages[remote_message_name]
268
+ unless local_message
269
+ raise AvroError.new("Unknown local message: #{remote_message_name}")
270
+ end
271
+ writers_schema = remote_message.request
272
+ readers_schema = local_message.request
273
+ request = read_request(writers_schema, readers_schema, buffer_decoder)
274
+ # perform server logic
275
+ begin
276
+ response = call(local_message, request)
277
+ rescue AvroRemoteError => e
278
+ error = e
279
+ rescue Exception => e
280
+ error = AvroRemoteError.new(e.to_s)
281
+ end
282
+
283
+ # write response using local protocol
284
+ META_WRITER.write(response_metadata, buffer_encoder)
285
+ buffer_encoder.write_boolean(!!error)
286
+ if error.nil?
287
+ writers_schema = local_message.response
288
+ write_response(writers_schema, response, buffer_encoder)
289
+ else
290
+ writers_schema = local_message.errors || SYSTEM_ERROR_SCHEMA
291
+ write_error(writers_schema, error, buffer_encoder)
292
+ end
293
+ rescue Avro::AvroError => e
294
+ error = AvroRemoteException.new(e.to_s)
295
+ buffer_encoder = Avro::IO::BinaryEncoder.new(StringIO.new)
296
+ META_WRITER.write(response_metadata, buffer_encoder)
297
+ buffer_encoder.write_boolean(true)
298
+ self.write_error(SYSTEM_ERROR_SCHEMA, error, buffer_encoder)
299
+ end
300
+ buffer_writer.string
301
+ end
302
+
303
+ def process_handshake(transport, decoder, encoder)
304
+ handshake_request = HANDSHAKE_RESPONDER_READER.read(decoder)
305
+ handshake_response = {}
306
+
307
+ # determine the remote protocol
308
+ client_hash = handshake_request['clientHash']
309
+ client_protocol = handshake_request['clientProtocol']
310
+ remote_protocol = protocol_cache[client_hash]
311
+ if !remote_protocol && client_protocol
312
+ remote_protocol = protocol.parse(client_protocol)
313
+ protocol_cache[client_hash] = remote_protocol
314
+ end
315
+
316
+ # evaluate remote's guess of the local protocol
317
+ server_hash = handshake_request['serverHash']
318
+ if local_hash == server_hash
319
+ if !remote_protocol
320
+ handshake_response['match'] = 'NONE'
321
+ else
322
+ handshake_response['match'] = 'BOTH'
323
+ end
324
+ else
325
+ if !remote_protocol
326
+ handshake_response['match'] = 'NONE'
327
+ else
328
+ handshake_response['match'] = 'CLIENT'
329
+ end
330
+ end
331
+
332
+ if handshake_response['match'] != 'BOTH'
333
+ handshake_response['serverProtocol'] = local_protocol.to_s
334
+ handshake_response['serverHash'] = local_hash
335
+ end
336
+
337
+ HANDSHAKE_RESPONDER_WRITER.write(handshake_response, encoder)
338
+ remote_protocol
339
+ end
340
+
341
+ def call(local_message, request)
342
+ # Actual work done by server: cf. handler in thrift.
343
+ raise NotImplementedError
344
+ end
345
+
346
+ def read_request(writers_schema, readers_schema, decoder)
347
+ datum_reader = Avro::IO::DatumReader.new(writers_schema, readers_schema)
348
+ datum_reader.read(decoder)
349
+ end
350
+
351
+ def write_response(writers_schema, response_datum, encoder)
352
+ datum_writer = Avro::IO::DatumWriter.new(writers_schema)
353
+ datum_writer.write(response_datum, encoder)
354
+ end
355
+
356
+ def write_error(writers_schema, error_exception, encoder)
357
+ datum_writer = Avro::IO::DatumWriter.new(writers_schema)
358
+ datum_writer.write(error_exception.to_s, encoder)
359
+ end
360
+ end
361
+
362
+ class SocketTransport
363
+ # A simple socket-based Transport implementation.
364
+
365
+ attr_reader :sock, :remote_name
366
+
367
+ def initialize(sock)
368
+ @sock = sock
369
+ end
370
+
371
+ def transceive(request)
372
+ write_framed_message(request)
373
+ read_framed_message
374
+ end
375
+
376
+ def read_framed_message
377
+ message = []
378
+ loop do
379
+ buffer = StringIO.new
380
+ buffer_length = read_buffer_length
381
+ if buffer_length == 0
382
+ return message.join
383
+ end
384
+ while buffer.tell < buffer_length
385
+ chunk = sock.read(buffer_length - buffer.tell)
386
+ if chunk == ''
387
+ raise ConnectionClosedException.new("Socket read 0 bytes.")
388
+ end
389
+ buffer.write(chunk)
390
+ end
391
+ message << buffer.string
392
+ end
393
+ end
394
+
395
+ def write_framed_message(message)
396
+ message_length = message.size
397
+ total_bytes_sent = 0
398
+ while message_length - total_bytes_sent > 0
399
+ if message_length - total_bytes_sent > BUFFER_SIZE:
400
+ buffer_length = BUFFER_SIZE
401
+ else
402
+ buffer_length = message_length - total_bytes_sent
403
+ end
404
+ write_buffer(message[total_bytes_sent,buffer_length])
405
+ total_bytes_sent += buffer_length
406
+ end
407
+ # A message is always terminated by a zero-length buffer.
408
+ write_buffer_length(0)
409
+ end
410
+
411
+ def write_buffer(chunk)
412
+ buffer_length = chunk.size
413
+ write_buffer_length(buffer_length)
414
+ total_bytes_sent = 0
415
+ while total_bytes_sent < buffer_length
416
+ bytes_sent = self.sock.write(chunk[total_bytes_sent..-1])
417
+ if bytes_sent == 0
418
+ raise ConnectionClosedException.new("Socket sent 0 bytes.")
419
+ end
420
+ total_bytes_sent += bytes_sent
421
+ end
422
+ end
423
+
424
+ def write_buffer_length(n)
425
+ bytes_sent = sock.write([n].pack('I'))
426
+ if bytes_sent == 0
427
+ raise ConnectionClosedException.new("socket sent 0 bytes")
428
+ end
429
+ end
430
+
431
+ def read_buffer_length
432
+ read = sock.read(BUFFER_HEADER_LENGTH)
433
+ if read == '' || read == nil
434
+ raise ConnectionClosedException.new("Socket read 0 bytes.")
435
+ end
436
+ read.unpack('I')[0]
437
+ end
438
+
439
+ def close
440
+ sock.close
441
+ end
442
+ end
443
+ end
@@ -0,0 +1,160 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ module Avro
18
+ class Protocol
19
+ VALID_TYPE_SCHEMA_TYPES = Set.new(%w[enum record error fixed])
20
+ class ProtocolParseError < Avro::AvroError; end
21
+
22
+ attr_reader :name, :namespace, :types, :messages, :md5
23
+ def self.parse(protocol_string)
24
+ json_data = Yajl.load(protocol_string)
25
+
26
+ if json_data.is_a? Hash
27
+ name = json_data['protocol']
28
+ namespace = json_data['namespace']
29
+ types = json_data['types']
30
+ messages = json_data['messages']
31
+ Protocol.new(name, namespace, types, messages)
32
+ else
33
+ raise ProtocolParseError, "Not a JSON object: #{json_data}"
34
+ end
35
+ end
36
+
37
+ def initialize(name, namespace=nil, types=nil, messages=nil)
38
+ # Ensure valid ctor args
39
+ if !name
40
+ raise ProtocolParseError, 'Protocols must have a non-empty name.'
41
+ elsif !name.is_a?(String)
42
+ raise ProtocolParseError, 'The name property must be a string.'
43
+ elsif !namespace.is_a?(String)
44
+ raise ProtocolParseError, 'The namespace property must be a string.'
45
+ elsif !types.is_a?(Array)
46
+ raise ProtocolParseError, 'The types property must be a list.'
47
+ elsif !messages.is_a?(Hash)
48
+ raise ProtocolParseError, 'The messages property must be a JSON object.'
49
+ end
50
+
51
+ @name = name
52
+ @namespace = namespace
53
+ type_names = {}
54
+ @types = parse_types(types, type_names)
55
+ @messages = parse_messages(messages, type_names)
56
+ @md5 = Digest::MD5.digest(to_s)
57
+ end
58
+
59
+ def to_s
60
+ Yajl.dump to_hash
61
+ end
62
+
63
+ def ==(other)
64
+ to_hash == Yajl.load(other.to_s)
65
+ end
66
+
67
+ private
68
+ def parse_types(types, type_names)
69
+ type_objects = []
70
+ types.collect do |type|
71
+ # FIXME adding type.name to type_names is not defined in the
72
+ # spec. Possible bug in the python impl and the spec.
73
+ type_object = Schema.real_parse(type, type_names)
74
+ unless VALID_TYPE_SCHEMA_TYPES.include?(type_object.type)
75
+ msg = "Type #{type} not an enum, record, fixed or error."
76
+ raise ProtocolParseError, msg
77
+ end
78
+ type_object
79
+ end
80
+ end
81
+
82
+ def parse_messages(messages, names)
83
+ message_objects = {}
84
+ messages.each do |name, body|
85
+ if message_objects.has_key?(name)
86
+ raise ProtocolParseError, "Message name \"#{name}\" repeated."
87
+ elsif !body.is_a?(Hash)
88
+ raise ProtocolParseError, "Message name \"#{name}\" has non-object body #{body.inspect}"
89
+ end
90
+
91
+ request = body['request']
92
+ response = body['response']
93
+ errors = body['errors']
94
+ message_objects[name] = Message.new(name, request, response, errors, names)
95
+ end
96
+ message_objects
97
+ end
98
+
99
+ def to_hash
100
+ hsh = {'protocol' => name}
101
+ hsh['namespace'] = namespace if namespace
102
+ hsh['types'] = types.map{|t| Yajl.load(t.to_s) } if types
103
+
104
+ if messages
105
+ hsh['messages'] = messages.collect_hash{|k,t| [k, Yajl.load(t.to_s)] }
106
+ end
107
+
108
+ hsh
109
+ end
110
+
111
+ class Message
112
+ attr_reader :name, :response_from_names, :request, :response, :errors
113
+ def initialize(name, request, response, errors=nil, names=nil)
114
+ @name = name
115
+ @response_from_names = false
116
+
117
+ @request = parse_request(request, names)
118
+ @response = parse_response(response, names)
119
+ @errors = parse_errors(errors, names) if errors
120
+ end
121
+
122
+ def to_s
123
+ hsh = {'request' => Yajl.load(request.to_s)}
124
+ if response_from_names
125
+ hsh['response'] = response.fullname
126
+ else
127
+ hsh['response'] = Yajl.load(response.to_s)
128
+ end
129
+
130
+ if errors
131
+ hsh['errors'] = Yajl.load(errors.to_s)
132
+ end
133
+ Yajl.dump hsh
134
+ end
135
+
136
+ def parse_request(request, names)
137
+ unless request.is_a?(Array)
138
+ raise ProtocolParseError, "Request property not an Array: #{request.inspect}"
139
+ end
140
+ Schema::RecordSchema.new(nil, nil, request, names, 'request')
141
+ end
142
+
143
+ def parse_response(response, names)
144
+ if response.is_a?(String) && names[response]
145
+ @response_from_names = true
146
+ names[response]
147
+ else
148
+ Schema.real_parse(response, names)
149
+ end
150
+ end
151
+
152
+ def parse_errors(errors, names)
153
+ unless errors.is_a?(Array)
154
+ raise ProtocolParseError, "Errors property not an Array: #{errors}"
155
+ end
156
+ Schema.real_parse(errors, names)
157
+ end
158
+ end
159
+ end
160
+ end