arya-pandemic 0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +99 -0
- data/Rakefile +14 -0
- data/lib/pandemic.rb +40 -0
- data/lib/pandemic/client_side/cluster_connection.rb +129 -0
- data/lib/pandemic/client_side/config.rb +33 -0
- data/lib/pandemic/client_side/connection.rb +31 -0
- data/lib/pandemic/client_side/connection_proxy.rb +15 -0
- data/lib/pandemic/client_side/pandemize.rb +17 -0
- data/lib/pandemic/connection_pool.rb +117 -0
- data/lib/pandemic/mutex_counter.rb +24 -0
- data/lib/pandemic/server_side/client.rb +86 -0
- data/lib/pandemic/server_side/config.rb +55 -0
- data/lib/pandemic/server_side/handler.rb +27 -0
- data/lib/pandemic/server_side/peer.rb +203 -0
- data/lib/pandemic/server_side/request.rb +72 -0
- data/lib/pandemic/server_side/server.rb +231 -0
- data/lib/pandemic/util.rb +26 -0
- data/pandemic.gemspec +31 -0
- metadata +91 -0
data/README.markdown
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
# Pandemic
|
2
|
+
Pandemic is a map-reduce framework. You give it the map, process, and reduce methods and it handles the rest. It works both in Ruby 1.8 and Ruby 1.9, and performs better on 1.9.
|
3
|
+
|
4
|
+
## Usage
|
5
|
+
### Server
|
6
|
+
|
7
|
+
require 'rubygems'
|
8
|
+
require 'pandemic'
|
9
|
+
|
10
|
+
class Handler < Pandemic::ServerSide::Handler
|
11
|
+
def process(body)
|
12
|
+
body.reverse
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
pandemic_server = epidemic!
|
17
|
+
pandemic_server.handler = Handler.new
|
18
|
+
pandemic_server.start.join
|
19
|
+
|
20
|
+
In this example, the handler doesn't define the map or reduce methods, and the defaults are used. The default for each is as follows:
|
21
|
+
|
22
|
+
* map: Send the full request body to every connected node
|
23
|
+
* process: Return the body (do nothing)
|
24
|
+
* reduce: Concatenate all the responses
|
25
|
+
|
26
|
+
### Client
|
27
|
+
|
28
|
+
require 'rubygems'
|
29
|
+
require 'pandemic'
|
30
|
+
|
31
|
+
class TextFlipper
|
32
|
+
include Pandemize
|
33
|
+
def flip(str)
|
34
|
+
pandemic.request(str)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
### Config
|
40
|
+
Both the server and client have config files:
|
41
|
+
|
42
|
+
# pandemic_server.yml
|
43
|
+
servers:
|
44
|
+
- host1:4000
|
45
|
+
- host2:4000
|
46
|
+
response_timeout: 0.5
|
47
|
+
|
48
|
+
Each value for the server list is the _host:port_ that a node can bind to. The servers value can be a hash or an array of hashes, but I'll get to that later. The response timeout is how long to wait for responses from nodes before returning to the client.
|
49
|
+
|
50
|
+
# pandemic_client.yml
|
51
|
+
servers:
|
52
|
+
- host1:4000
|
53
|
+
- host2:4000
|
54
|
+
max_connections_per_server: 10
|
55
|
+
min_connections_per_server: 1
|
56
|
+
The min/max connections refers to how many connections to each node. If you're using Rails, then just use 1 for both min/max since it's single threaded.
|
57
|
+
|
58
|
+
### More Config
|
59
|
+
There are three ways to start a server:
|
60
|
+
|
61
|
+
* ruby server.rb -i 0
|
62
|
+
* ruby server.rb -i machine1hostname
|
63
|
+
* ruby server.rb -a localhost:4000
|
64
|
+
|
65
|
+
The first refers to the index in the servers array:
|
66
|
+
|
67
|
+
servers:
|
68
|
+
- host1:4000 # started with ruby server.rb -i 0
|
69
|
+
- host2:4000 # started with ruby server.rb -i 0
|
70
|
+
|
71
|
+
The second refers to the index in the servers _hash_. This can be particularly useful if you use the hostname as the key.
|
72
|
+
|
73
|
+
servers:
|
74
|
+
machine1: host1:4000 # started with ruby server.rb -i machine1
|
75
|
+
machine2: host2:4000 # started with ruby server.rb -i machine2
|
76
|
+
|
77
|
+
The third is to specify the host and port explicitly. Ensure that the host and port you specify is actually in the config otherwise the other nodes won't be able to communicate with it.
|
78
|
+
|
79
|
+
You can also set node-specific configuration options.
|
80
|
+
|
81
|
+
servers:
|
82
|
+
- host1:4000:
|
83
|
+
database: pandemic_node_1
|
84
|
+
host: localhost
|
85
|
+
username: foobar
|
86
|
+
password: f00bar
|
87
|
+
- host2:4000:
|
88
|
+
database: pandemic_node_2
|
89
|
+
host: localhost
|
90
|
+
username: fizzbuzz
|
91
|
+
password: f1zzbuzz
|
92
|
+
|
93
|
+
And you can access these additional options using _config.get(keys)_ in your handler:
|
94
|
+
|
95
|
+
class Handler < Pandemic::ServerSide::Handler
|
96
|
+
def initialize
|
97
|
+
@dbh = Mysql.real_connect(*config.get('host', 'username', 'password', 'database'))
|
98
|
+
end
|
99
|
+
end
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'echoe'
|
4
|
+
|
5
|
+
Echoe.new('pandemic', '0.2') do |p|
|
6
|
+
p.description = "Distribute MapReduce to any of the workers and it will spread, like a pandemic."
|
7
|
+
p.url = ""
|
8
|
+
p.author = "Arya Asemanfar"
|
9
|
+
p.email = "aryaasemanfar@gmail.com"
|
10
|
+
p.ignore_pattern = ["tmp/*", "script/*", 'config.yml']
|
11
|
+
p.development_dependencies = []
|
12
|
+
end
|
13
|
+
|
14
|
+
Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
|
data/lib/pandemic.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'socket'
|
3
|
+
require 'fastthread' if RUBY_VERSION < '1.9'
|
4
|
+
require 'thread'
|
5
|
+
require 'monitor'
|
6
|
+
require 'yaml'
|
7
|
+
require 'digest/md5'
|
8
|
+
require 'logger'
|
9
|
+
|
10
|
+
require 'pandemic/util'
|
11
|
+
require 'pandemic/connection_pool'
|
12
|
+
require 'pandemic/mutex_counter'
|
13
|
+
|
14
|
+
require 'pandemic/server_side/config'
|
15
|
+
require 'pandemic/server_side/client'
|
16
|
+
require 'pandemic/server_side/server'
|
17
|
+
require 'pandemic/server_side/peer'
|
18
|
+
require 'pandemic/server_side/request'
|
19
|
+
require 'pandemic/server_side/handler'
|
20
|
+
|
21
|
+
require 'pandemic/client_side/config'
|
22
|
+
require 'pandemic/client_side/cluster_connection'
|
23
|
+
require 'pandemic/client_side/connection'
|
24
|
+
require 'pandemic/client_side/connection_proxy'
|
25
|
+
require 'pandemic/client_side/pandemize'
|
26
|
+
|
27
|
+
# TODO:
|
28
|
+
# - IO timeouts/robustness
|
29
|
+
# - documentation
|
30
|
+
# - PING/PONG?
|
31
|
+
|
32
|
+
$logger = Logger.new(STDOUT)
|
33
|
+
$logger.level = Logger::DEBUG
|
34
|
+
$logger.datetime_format = "%Y-%m-%d %H:%M:%S "
|
35
|
+
|
36
|
+
def epidemic!
|
37
|
+
Pandemic::ServerSide::Server.boot
|
38
|
+
end
|
39
|
+
|
40
|
+
::Pandemize = Pandemic::ClientSide::Pandemize
|
@@ -0,0 +1,129 @@
|
|
1
|
+
module Pandemic
|
2
|
+
module ClientSide
|
3
|
+
class ClusterConnection
|
4
|
+
class NotEnoughConnectionsTimeout < Exception; end
|
5
|
+
class NoNodesAvailable < Exception; end
|
6
|
+
class LostConnectionToNode < Exception; end
|
7
|
+
|
8
|
+
include Util
|
9
|
+
def initialize
|
10
|
+
Config.load
|
11
|
+
@connections = []
|
12
|
+
@available = []
|
13
|
+
@grouped_connections = Hash.new { |hash, key| hash[key] = [] }
|
14
|
+
@grouped_available = Hash.new { |hash, key| hash[key] = [] }
|
15
|
+
@mutex = Monitor.new
|
16
|
+
@connection_proxies = {}
|
17
|
+
@queue = @mutex.new_cond # TODO: there should be a queue for each group
|
18
|
+
|
19
|
+
Config.servers.each_with_index do |server_addr, key|
|
20
|
+
@connection_proxies[key] = ConnectionProxy.new(key, self)
|
21
|
+
host, port = host_port(server_addr)
|
22
|
+
Config.min_connections_per_server.times do
|
23
|
+
connection = create_connection(key)
|
24
|
+
if connection.alive?
|
25
|
+
@connections << connection
|
26
|
+
@available << connection
|
27
|
+
@grouped_connections[key] << connection
|
28
|
+
@grouped_available[key] << connection
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
def [](key)
|
36
|
+
@connection_proxies[key]
|
37
|
+
end
|
38
|
+
|
39
|
+
def request(body, key = nil)
|
40
|
+
with_connection(key) do |socket|
|
41
|
+
begin
|
42
|
+
socket.write("#{body.size}\n#{body}")
|
43
|
+
socket.flush
|
44
|
+
# IO.select([socket])
|
45
|
+
response_size = socket.gets
|
46
|
+
if response_size
|
47
|
+
socket.read(response_size.strip.to_i)
|
48
|
+
else
|
49
|
+
# nil response size
|
50
|
+
raise LostConnectionToNode
|
51
|
+
end
|
52
|
+
rescue Errno::ECONNRESET
|
53
|
+
raise LostConnectionToNode
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
def with_connection(key, &block)
|
60
|
+
connection = nil
|
61
|
+
begin
|
62
|
+
connection = checkout_connection(key)
|
63
|
+
block.call(connection.socket)
|
64
|
+
ensure
|
65
|
+
checkin_connection(connection) if connection
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def checkout_connection(key)
|
70
|
+
connection = nil
|
71
|
+
select_from = key.nil? ? @available : @grouped_available[key]
|
72
|
+
all_connections = key.nil? ? @connections : @grouped_connections[key]
|
73
|
+
@mutex.synchronize do
|
74
|
+
loop do
|
75
|
+
if select_from.size > 0
|
76
|
+
connection = select_from.pop
|
77
|
+
if key.nil?
|
78
|
+
@grouped_available[key].delete(connection)
|
79
|
+
else
|
80
|
+
@available.delete(connection)
|
81
|
+
end
|
82
|
+
break
|
83
|
+
elsif (connection = create_connection(key)) && connection.alive?
|
84
|
+
@connections << connection
|
85
|
+
@grouped_connections[key] << connection
|
86
|
+
break
|
87
|
+
elsif all_connections.size > 0 && @queue.wait(Config.connection_wait_timeout)
|
88
|
+
next
|
89
|
+
else
|
90
|
+
if all_connections.size > 0
|
91
|
+
raise NotEnoughConnectionsTimeout
|
92
|
+
else
|
93
|
+
raise NoNodesAvailable
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
return connection
|
99
|
+
end
|
100
|
+
|
101
|
+
def checkin_connection(connection)
|
102
|
+
@mutex.synchronize do
|
103
|
+
@available.unshift(connection)
|
104
|
+
@grouped_available[connection.key].unshift(connection)
|
105
|
+
@queue.signal
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def create_connection(key)
|
110
|
+
if key.nil?
|
111
|
+
# find a key where we can add more connections
|
112
|
+
min, min_key = nil, nil
|
113
|
+
@grouped_connections.each do |key, list|
|
114
|
+
if min.nil? || list.size < min
|
115
|
+
min_key = key
|
116
|
+
min = list.size
|
117
|
+
end
|
118
|
+
end
|
119
|
+
key = min_key
|
120
|
+
end
|
121
|
+
return nil if @grouped_connections[key].size >= Config.max_connections_per_server
|
122
|
+
host, port = host_port(Config.servers[key])
|
123
|
+
Connection.new(host, port, key)
|
124
|
+
end
|
125
|
+
|
126
|
+
#TODO: a thread to manage killing and reviving connections
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module Pandemic
|
2
|
+
module ClientSide
|
3
|
+
class Config
|
4
|
+
class << self
|
5
|
+
@@load_mutex = Mutex.new
|
6
|
+
attr_accessor :config_path, :loaded
|
7
|
+
attr_accessor :servers, :max_connections_per_server, :min_connections_per_server,
|
8
|
+
:connection_wait_timeout
|
9
|
+
def load
|
10
|
+
@@load_mutex.synchronize do
|
11
|
+
return if self.loaded
|
12
|
+
path = config_path
|
13
|
+
yaml = YAML.load_file(path)
|
14
|
+
|
15
|
+
@servers = yaml['servers'] || []
|
16
|
+
# this is just so if we copy/paste from server's yml to client's yml, it will still work
|
17
|
+
@servers = @servers.values if @servers.is_a?(Hash)
|
18
|
+
@servers.sort! # so it's consistent across all clients
|
19
|
+
|
20
|
+
@max_connections_per_server = (yaml['max_connections_per_server'] || 1).to_i
|
21
|
+
@min_connections_per_server = (yaml['min_connections_per_server'] || 1).to_i
|
22
|
+
@connection_wait_timeout = (yaml['connection_wait_timeout'] || 1).to_f
|
23
|
+
self.loaded = true
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def config_path
|
28
|
+
@config_path || "pandemic_client.yml"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Pandemic
|
2
|
+
module ClientSide
|
3
|
+
class Connection
|
4
|
+
attr_reader :key, :socket
|
5
|
+
def initialize(host, port, key)
|
6
|
+
@host, @port, @key = host, port, key
|
7
|
+
connect
|
8
|
+
end
|
9
|
+
|
10
|
+
def alive?
|
11
|
+
@socket && !@socket.closed?
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
def connect
|
16
|
+
@socket = begin
|
17
|
+
connection = TCPSocket.new(@host, @port)
|
18
|
+
if connection && !connection.closed?
|
19
|
+
connection.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_NODELAY, 1) if Socket.constants.include?('TCP_NODELAY')
|
20
|
+
connection.write("CLIENT\n")
|
21
|
+
connection
|
22
|
+
else
|
23
|
+
nil
|
24
|
+
end
|
25
|
+
rescue Errno::ETIMEDOUT, Errno::ECONNREFUSED
|
26
|
+
nil
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Pandemic
|
2
|
+
module ClientSide
|
3
|
+
class ConnectionProxy
|
4
|
+
instance_methods.each {|m| undef_method(m) if m !~ /^__/ && m !~ /object_id/ }
|
5
|
+
|
6
|
+
def initialize(key, cluster)
|
7
|
+
@key, @cluster = key, cluster
|
8
|
+
end
|
9
|
+
|
10
|
+
def request(body)
|
11
|
+
@cluster.request(body, @key)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Pandemic
|
2
|
+
module ClientSide
|
3
|
+
module Pandemize
|
4
|
+
def self.included(klass)
|
5
|
+
klass.class_eval do
|
6
|
+
@pandemize_connection ||= Pandemic::ClientSide::ClusterConnection.new
|
7
|
+
def self.pandemize_connection
|
8
|
+
@pandemize_connection
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
def pandemic
|
13
|
+
self.class.pandemize_connection
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,117 @@
|
|
1
|
+
module Pandemic
|
2
|
+
class ConnectionPool
|
3
|
+
class TimedOutWaitingForConnectionException < Exception; end
|
4
|
+
class CreateConnectionUndefinedException < Exception; end
|
5
|
+
include Util
|
6
|
+
def initialize(options = {})
|
7
|
+
@mutex = Monitor.new
|
8
|
+
@queue = @mutex.new_cond
|
9
|
+
@available = []
|
10
|
+
@connections = []
|
11
|
+
@max_connections = options[:max_connections] || 10
|
12
|
+
@timeout = options[:timeout] || 3
|
13
|
+
end
|
14
|
+
|
15
|
+
def add_connection!
|
16
|
+
# bang because we're ignorings the max connections
|
17
|
+
conn = create_connection
|
18
|
+
if conn
|
19
|
+
@mutex.synchronize do
|
20
|
+
@connections << conn
|
21
|
+
@available << conn
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def create_connection(&block)
|
27
|
+
if block.nil?
|
28
|
+
if @create_connection
|
29
|
+
@create_connection.call
|
30
|
+
else
|
31
|
+
raise CreateConnectionUndefinedException.new("You must specify a block to create connections")
|
32
|
+
end
|
33
|
+
else
|
34
|
+
@create_connection = block
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def destroy_connection(connection = nil, &block)
|
39
|
+
if block.nil?
|
40
|
+
if @destroy_connection
|
41
|
+
@destroy_connection.call(connection)
|
42
|
+
else
|
43
|
+
if connection && !connection.closed?
|
44
|
+
# defaul behavior is this
|
45
|
+
connection.close
|
46
|
+
end
|
47
|
+
end
|
48
|
+
else
|
49
|
+
@destroy_connection = block
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def connected?
|
54
|
+
@mutex.synchronize { @connections.size > 0 }
|
55
|
+
end
|
56
|
+
|
57
|
+
def disconnect
|
58
|
+
@mutex.synchronize do
|
59
|
+
return if @disconnecting
|
60
|
+
@disconnecting = true
|
61
|
+
@available.each do |conn|
|
62
|
+
destroy_connection(conn)
|
63
|
+
@connections.delete(conn)
|
64
|
+
end
|
65
|
+
@available = []
|
66
|
+
while @connections.size > 0 && @queue.wait
|
67
|
+
@available.each do |conn|
|
68
|
+
destroy_connection(conn)
|
69
|
+
@connections.delete(conn)
|
70
|
+
end
|
71
|
+
@available = []
|
72
|
+
end
|
73
|
+
@disconnecting = false
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def with_connection(&block)
|
78
|
+
connection = nil
|
79
|
+
begin
|
80
|
+
connection = checkout
|
81
|
+
block.call(connection)
|
82
|
+
ensure
|
83
|
+
checkin(connection) if connection
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
def checkout
|
90
|
+
connection = nil
|
91
|
+
@mutex.synchronize do
|
92
|
+
loop do
|
93
|
+
if @available.size > 0
|
94
|
+
connection = @available.pop
|
95
|
+
break
|
96
|
+
elsif @connections.size < @max_connections && (connection = create_connection)
|
97
|
+
@connections << connection
|
98
|
+
break
|
99
|
+
elsif @queue.wait(@timeout)
|
100
|
+
next
|
101
|
+
else
|
102
|
+
raise TimedOutWaitingForConnectionException
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
return connection
|
107
|
+
end
|
108
|
+
|
109
|
+
def checkin(connection)
|
110
|
+
@mutex.synchronize do
|
111
|
+
@available.unshift(connection)
|
112
|
+
@queue.signal
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
117
|
+
end
|