arya-pandemic 0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,99 @@
1
+ # Pandemic
2
+ Pandemic is a map-reduce framework. You give it the map, process, and reduce methods and it handles the rest. It works both in Ruby 1.8 and Ruby 1.9, and performs better on 1.9.
3
+
4
+ ## Usage
5
+ ### Server
6
+
7
+ require 'rubygems'
8
+ require 'pandemic'
9
+
10
+ class Handler < Pandemic::ServerSide::Handler
11
+ def process(body)
12
+ body.reverse
13
+ end
14
+ end
15
+
16
+ pandemic_server = epidemic!
17
+ pandemic_server.handler = Handler.new
18
+ pandemic_server.start.join
19
+
20
+ In this example, the handler doesn't define the map or reduce methods, and the defaults are used. The default for each is as follows:
21
+
22
+ * map: Send the full request body to every connected node
23
+ * process: Return the body (do nothing)
24
+ * reduce: Concatenate all the responses
25
+
26
+ ### Client
27
+
28
+ require 'rubygems'
29
+ require 'pandemic'
30
+
31
+ class TextFlipper
32
+ include Pandemize
33
+ def flip(str)
34
+ pandemic.request(str)
35
+ end
36
+ end
37
+
38
+
39
+ ### Config
40
+ Both the server and client have config files:
41
+
42
+ # pandemic_server.yml
43
+ servers:
44
+ - host1:4000
45
+ - host2:4000
46
+ response_timeout: 0.5
47
+
48
+ Each value for the server list is the _host:port_ that a node can bind to. The servers value can be a hash or an array of hashes, but I'll get to that later. The response timeout is how long to wait for responses from nodes before returning to the client.
49
+
50
+ # pandemic_client.yml
51
+ servers:
52
+ - host1:4000
53
+ - host2:4000
54
+ max_connections_per_server: 10
55
+ min_connections_per_server: 1
56
+ The min/max connections refers to how many connections to each node. If you're using Rails, then just use 1 for both min/max since it's single threaded.
57
+
58
+ ### More Config
59
+ There are three ways to start a server:
60
+
61
+ * ruby server.rb -i 0
62
+ * ruby server.rb -i machine1hostname
63
+ * ruby server.rb -a localhost:4000
64
+
65
+ The first refers to the index in the servers array:
66
+
67
+ servers:
68
+ - host1:4000 # started with ruby server.rb -i 0
69
+ - host2:4000 # started with ruby server.rb -i 0
70
+
71
+ The second refers to the index in the servers _hash_. This can be particularly useful if you use the hostname as the key.
72
+
73
+ servers:
74
+ machine1: host1:4000 # started with ruby server.rb -i machine1
75
+ machine2: host2:4000 # started with ruby server.rb -i machine2
76
+
77
+ The third is to specify the host and port explicitly. Ensure that the host and port you specify is actually in the config otherwise the other nodes won't be able to communicate with it.
78
+
79
+ You can also set node-specific configuration options.
80
+
81
+ servers:
82
+ - host1:4000:
83
+ database: pandemic_node_1
84
+ host: localhost
85
+ username: foobar
86
+ password: f00bar
87
+ - host2:4000:
88
+ database: pandemic_node_2
89
+ host: localhost
90
+ username: fizzbuzz
91
+ password: f1zzbuzz
92
+
93
+ And you can access these additional options using _config.get(keys)_ in your handler:
94
+
95
+ class Handler < Pandemic::ServerSide::Handler
96
+ def initialize
97
+ @dbh = Mysql.real_connect(*config.get('host', 'username', 'password', 'database'))
98
+ end
99
+ end
@@ -0,0 +1,14 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'echoe'
4
+
5
+ Echoe.new('pandemic', '0.2') do |p|
6
+ p.description = "Distribute MapReduce to any of the workers and it will spread, like a pandemic."
7
+ p.url = ""
8
+ p.author = "Arya Asemanfar"
9
+ p.email = "aryaasemanfar@gmail.com"
10
+ p.ignore_pattern = ["tmp/*", "script/*", 'config.yml']
11
+ p.development_dependencies = []
12
+ end
13
+
14
+ Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
@@ -0,0 +1,40 @@
1
+ require 'rubygems'
2
+ require 'socket'
3
+ require 'fastthread' if RUBY_VERSION < '1.9'
4
+ require 'thread'
5
+ require 'monitor'
6
+ require 'yaml'
7
+ require 'digest/md5'
8
+ require 'logger'
9
+
10
+ require 'pandemic/util'
11
+ require 'pandemic/connection_pool'
12
+ require 'pandemic/mutex_counter'
13
+
14
+ require 'pandemic/server_side/config'
15
+ require 'pandemic/server_side/client'
16
+ require 'pandemic/server_side/server'
17
+ require 'pandemic/server_side/peer'
18
+ require 'pandemic/server_side/request'
19
+ require 'pandemic/server_side/handler'
20
+
21
+ require 'pandemic/client_side/config'
22
+ require 'pandemic/client_side/cluster_connection'
23
+ require 'pandemic/client_side/connection'
24
+ require 'pandemic/client_side/connection_proxy'
25
+ require 'pandemic/client_side/pandemize'
26
+
27
+ # TODO:
28
+ # - IO timeouts/robustness
29
+ # - documentation
30
+ # - PING/PONG?
31
+
32
+ $logger = Logger.new(STDOUT)
33
+ $logger.level = Logger::DEBUG
34
+ $logger.datetime_format = "%Y-%m-%d %H:%M:%S "
35
+
36
+ def epidemic!
37
+ Pandemic::ServerSide::Server.boot
38
+ end
39
+
40
+ ::Pandemize = Pandemic::ClientSide::Pandemize
@@ -0,0 +1,129 @@
1
+ module Pandemic
2
+ module ClientSide
3
+ class ClusterConnection
4
+ class NotEnoughConnectionsTimeout < Exception; end
5
+ class NoNodesAvailable < Exception; end
6
+ class LostConnectionToNode < Exception; end
7
+
8
+ include Util
9
+ def initialize
10
+ Config.load
11
+ @connections = []
12
+ @available = []
13
+ @grouped_connections = Hash.new { |hash, key| hash[key] = [] }
14
+ @grouped_available = Hash.new { |hash, key| hash[key] = [] }
15
+ @mutex = Monitor.new
16
+ @connection_proxies = {}
17
+ @queue = @mutex.new_cond # TODO: there should be a queue for each group
18
+
19
+ Config.servers.each_with_index do |server_addr, key|
20
+ @connection_proxies[key] = ConnectionProxy.new(key, self)
21
+ host, port = host_port(server_addr)
22
+ Config.min_connections_per_server.times do
23
+ connection = create_connection(key)
24
+ if connection.alive?
25
+ @connections << connection
26
+ @available << connection
27
+ @grouped_connections[key] << connection
28
+ @grouped_available[key] << connection
29
+ end
30
+ end
31
+ end
32
+ end
33
+
34
+
35
+ def [](key)
36
+ @connection_proxies[key]
37
+ end
38
+
39
+ def request(body, key = nil)
40
+ with_connection(key) do |socket|
41
+ begin
42
+ socket.write("#{body.size}\n#{body}")
43
+ socket.flush
44
+ # IO.select([socket])
45
+ response_size = socket.gets
46
+ if response_size
47
+ socket.read(response_size.strip.to_i)
48
+ else
49
+ # nil response size
50
+ raise LostConnectionToNode
51
+ end
52
+ rescue Errno::ECONNRESET
53
+ raise LostConnectionToNode
54
+ end
55
+ end
56
+ end
57
+
58
+ private
59
+ def with_connection(key, &block)
60
+ connection = nil
61
+ begin
62
+ connection = checkout_connection(key)
63
+ block.call(connection.socket)
64
+ ensure
65
+ checkin_connection(connection) if connection
66
+ end
67
+ end
68
+
69
+ def checkout_connection(key)
70
+ connection = nil
71
+ select_from = key.nil? ? @available : @grouped_available[key]
72
+ all_connections = key.nil? ? @connections : @grouped_connections[key]
73
+ @mutex.synchronize do
74
+ loop do
75
+ if select_from.size > 0
76
+ connection = select_from.pop
77
+ if key.nil?
78
+ @grouped_available[key].delete(connection)
79
+ else
80
+ @available.delete(connection)
81
+ end
82
+ break
83
+ elsif (connection = create_connection(key)) && connection.alive?
84
+ @connections << connection
85
+ @grouped_connections[key] << connection
86
+ break
87
+ elsif all_connections.size > 0 && @queue.wait(Config.connection_wait_timeout)
88
+ next
89
+ else
90
+ if all_connections.size > 0
91
+ raise NotEnoughConnectionsTimeout
92
+ else
93
+ raise NoNodesAvailable
94
+ end
95
+ end
96
+ end
97
+ end
98
+ return connection
99
+ end
100
+
101
+ def checkin_connection(connection)
102
+ @mutex.synchronize do
103
+ @available.unshift(connection)
104
+ @grouped_available[connection.key].unshift(connection)
105
+ @queue.signal
106
+ end
107
+ end
108
+
109
+ def create_connection(key)
110
+ if key.nil?
111
+ # find a key where we can add more connections
112
+ min, min_key = nil, nil
113
+ @grouped_connections.each do |key, list|
114
+ if min.nil? || list.size < min
115
+ min_key = key
116
+ min = list.size
117
+ end
118
+ end
119
+ key = min_key
120
+ end
121
+ return nil if @grouped_connections[key].size >= Config.max_connections_per_server
122
+ host, port = host_port(Config.servers[key])
123
+ Connection.new(host, port, key)
124
+ end
125
+
126
+ #TODO: a thread to manage killing and reviving connections
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,33 @@
1
+ module Pandemic
2
+ module ClientSide
3
+ class Config
4
+ class << self
5
+ @@load_mutex = Mutex.new
6
+ attr_accessor :config_path, :loaded
7
+ attr_accessor :servers, :max_connections_per_server, :min_connections_per_server,
8
+ :connection_wait_timeout
9
+ def load
10
+ @@load_mutex.synchronize do
11
+ return if self.loaded
12
+ path = config_path
13
+ yaml = YAML.load_file(path)
14
+
15
+ @servers = yaml['servers'] || []
16
+ # this is just so if we copy/paste from server's yml to client's yml, it will still work
17
+ @servers = @servers.values if @servers.is_a?(Hash)
18
+ @servers.sort! # so it's consistent across all clients
19
+
20
+ @max_connections_per_server = (yaml['max_connections_per_server'] || 1).to_i
21
+ @min_connections_per_server = (yaml['min_connections_per_server'] || 1).to_i
22
+ @connection_wait_timeout = (yaml['connection_wait_timeout'] || 1).to_f
23
+ self.loaded = true
24
+ end
25
+ end
26
+
27
+ def config_path
28
+ @config_path || "pandemic_client.yml"
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,31 @@
1
+ module Pandemic
2
+ module ClientSide
3
+ class Connection
4
+ attr_reader :key, :socket
5
+ def initialize(host, port, key)
6
+ @host, @port, @key = host, port, key
7
+ connect
8
+ end
9
+
10
+ def alive?
11
+ @socket && !@socket.closed?
12
+ end
13
+
14
+ private
15
+ def connect
16
+ @socket = begin
17
+ connection = TCPSocket.new(@host, @port)
18
+ if connection && !connection.closed?
19
+ connection.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_NODELAY, 1) if Socket.constants.include?('TCP_NODELAY')
20
+ connection.write("CLIENT\n")
21
+ connection
22
+ else
23
+ nil
24
+ end
25
+ rescue Errno::ETIMEDOUT, Errno::ECONNREFUSED
26
+ nil
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,15 @@
1
+ module Pandemic
2
+ module ClientSide
3
+ class ConnectionProxy
4
+ instance_methods.each {|m| undef_method(m) if m !~ /^__/ && m !~ /object_id/ }
5
+
6
+ def initialize(key, cluster)
7
+ @key, @cluster = key, cluster
8
+ end
9
+
10
+ def request(body)
11
+ @cluster.request(body, @key)
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,17 @@
1
+ module Pandemic
2
+ module ClientSide
3
+ module Pandemize
4
+ def self.included(klass)
5
+ klass.class_eval do
6
+ @pandemize_connection ||= Pandemic::ClientSide::ClusterConnection.new
7
+ def self.pandemize_connection
8
+ @pandemize_connection
9
+ end
10
+ end
11
+ end
12
+ def pandemic
13
+ self.class.pandemize_connection
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,117 @@
1
+ module Pandemic
2
+ class ConnectionPool
3
+ class TimedOutWaitingForConnectionException < Exception; end
4
+ class CreateConnectionUndefinedException < Exception; end
5
+ include Util
6
+ def initialize(options = {})
7
+ @mutex = Monitor.new
8
+ @queue = @mutex.new_cond
9
+ @available = []
10
+ @connections = []
11
+ @max_connections = options[:max_connections] || 10
12
+ @timeout = options[:timeout] || 3
13
+ end
14
+
15
+ def add_connection!
16
+ # bang because we're ignorings the max connections
17
+ conn = create_connection
18
+ if conn
19
+ @mutex.synchronize do
20
+ @connections << conn
21
+ @available << conn
22
+ end
23
+ end
24
+ end
25
+
26
+ def create_connection(&block)
27
+ if block.nil?
28
+ if @create_connection
29
+ @create_connection.call
30
+ else
31
+ raise CreateConnectionUndefinedException.new("You must specify a block to create connections")
32
+ end
33
+ else
34
+ @create_connection = block
35
+ end
36
+ end
37
+
38
+ def destroy_connection(connection = nil, &block)
39
+ if block.nil?
40
+ if @destroy_connection
41
+ @destroy_connection.call(connection)
42
+ else
43
+ if connection && !connection.closed?
44
+ # defaul behavior is this
45
+ connection.close
46
+ end
47
+ end
48
+ else
49
+ @destroy_connection = block
50
+ end
51
+ end
52
+
53
+ def connected?
54
+ @mutex.synchronize { @connections.size > 0 }
55
+ end
56
+
57
+ def disconnect
58
+ @mutex.synchronize do
59
+ return if @disconnecting
60
+ @disconnecting = true
61
+ @available.each do |conn|
62
+ destroy_connection(conn)
63
+ @connections.delete(conn)
64
+ end
65
+ @available = []
66
+ while @connections.size > 0 && @queue.wait
67
+ @available.each do |conn|
68
+ destroy_connection(conn)
69
+ @connections.delete(conn)
70
+ end
71
+ @available = []
72
+ end
73
+ @disconnecting = false
74
+ end
75
+ end
76
+
77
+ def with_connection(&block)
78
+ connection = nil
79
+ begin
80
+ connection = checkout
81
+ block.call(connection)
82
+ ensure
83
+ checkin(connection) if connection
84
+ end
85
+ end
86
+
87
+ private
88
+
89
+ def checkout
90
+ connection = nil
91
+ @mutex.synchronize do
92
+ loop do
93
+ if @available.size > 0
94
+ connection = @available.pop
95
+ break
96
+ elsif @connections.size < @max_connections && (connection = create_connection)
97
+ @connections << connection
98
+ break
99
+ elsif @queue.wait(@timeout)
100
+ next
101
+ else
102
+ raise TimedOutWaitingForConnectionException
103
+ end
104
+ end
105
+ end
106
+ return connection
107
+ end
108
+
109
+ def checkin(connection)
110
+ @mutex.synchronize do
111
+ @available.unshift(connection)
112
+ @queue.signal
113
+ end
114
+ end
115
+
116
+ end
117
+ end