adhd 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +62 -2
- data/VERSION +1 -1
- data/adhd.gemspec +10 -8
- data/bin/adhd +23 -8
- data/bin/adhd_cleanup +57 -0
- data/lib/adhd/adhd_rest_server.rb +229 -0
- data/lib/adhd/config.yml +1 -1
- data/lib/adhd/models/content_doc.rb +17 -0
- data/lib/adhd/models/content_shard.rb +97 -0
- data/lib/adhd/models/node_doc.rb +139 -0
- data/lib/adhd/models/shard_range.rb +202 -0
- data/lib/adhd/node_manager.rb +260 -0
- data/lib/adhd/reactor.rb +194 -12
- data/test/test_adhd.rb +0 -11
- metadata +11 -7
- data/lib/adhd.rb +0 -120
- data/lib/adhd/models.rb +0 -388
- data/lib/adhd/node.rb +0 -13
- data/models.rb +0 -19
@@ -0,0 +1,260 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'couchrest'
|
3
|
+
require 'ruby-debug'
|
4
|
+
# require File.dirname(__FILE__) + '/models'
|
5
|
+
require File.dirname(__FILE__) + '/models/node_doc'
|
6
|
+
require File.dirname(__FILE__) + '/models/content_doc'
|
7
|
+
require File.dirname(__FILE__) + '/models/shard_range'
|
8
|
+
require File.dirname(__FILE__) + '/models/content_shard'
|
9
|
+
|
10
|
+
module Adhd
|
11
|
+
class NodeManager
|
12
|
+
|
13
|
+
attr_accessor :our_node, :ndb, :srdb
|
14
|
+
|
15
|
+
def initialize(config)
|
16
|
+
@config = config
|
17
|
+
@couch_server = CouchRest.new("http://#{config.node_url}:#{config.couchdb_server_port}")
|
18
|
+
# @couch_server.default_database = "#{config.node_name}_node_db"
|
19
|
+
@couch_db = @couch_server.database!("#{config.node_name}_node_db") # CouchRest::Database.new(@couch_server, "#{config.node_name}_node_db")
|
20
|
+
sync_with_buddy_node if config.buddy_server_url && config.buddy_server_db_name
|
21
|
+
@our_node = initialize_node
|
22
|
+
build_node_admin_databases
|
23
|
+
set_as_management_node_if_necessary
|
24
|
+
build_shards(10, 2)
|
25
|
+
build_node_content_databases
|
26
|
+
sync_databases
|
27
|
+
end
|
28
|
+
|
29
|
+
def event_handler(ev)
|
30
|
+
puts ev
|
31
|
+
end
|
32
|
+
|
33
|
+
# Sync the db with our buddy
|
34
|
+
#
|
35
|
+
def sync_with_buddy_node
|
36
|
+
begin
|
37
|
+
buddy_server = CouchRest.new("#{@config.buddy_server_url}")
|
38
|
+
buddy_db = buddy_server.database!(@config.buddy_server_db_name + "_node_db")
|
39
|
+
@couch_db.replicate_from(buddy_db)
|
40
|
+
rescue
|
41
|
+
puts "Could not buddy up with node #{@config.buddy_server_db_name}"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# Retrieve our own node record from CouchDB by our name.
|
46
|
+
#
|
47
|
+
# If there are other nodes with the name kill their records!
|
48
|
+
#
|
49
|
+
def initialize_node
|
50
|
+
puts "Initialize node #{@config.node_name}"
|
51
|
+
Node.use_database @couch_db
|
52
|
+
puts "DB #{@couch_db}, node #{@config.node_name}"
|
53
|
+
node_candidates = Node.by_name(:key => @config.node_name)
|
54
|
+
# node_candidates = @couch_db.view("by_name", {:key => @config.node_name})
|
55
|
+
node = node_candidates.pop
|
56
|
+
node = Node.new if node.nil?
|
57
|
+
node_candidates.each do |other_me|
|
58
|
+
other_me.destroy # destroy other records
|
59
|
+
end
|
60
|
+
# Update our very own record
|
61
|
+
node.name = @config.node_name
|
62
|
+
node.url = "http://#{@config.node_url}:#{@config.couchdb_server_port}"
|
63
|
+
node.status = "RUNNING"
|
64
|
+
node.save
|
65
|
+
node # Save our node as instance variable
|
66
|
+
end
|
67
|
+
|
68
|
+
# We check if we are the first node. If we are the first node, we set
|
69
|
+
# ourself up as the management node.
|
70
|
+
#
|
71
|
+
def set_as_management_node_if_necessary
|
72
|
+
all_nodes = Node.by_name
|
73
|
+
if all_nodes.length == 1
|
74
|
+
@our_node.is_management = 300
|
75
|
+
@our_node.save
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def build_node_admin_databases
|
80
|
+
@conn_manager = ConnectionBank.new
|
81
|
+
|
82
|
+
# Lets build a nice NodeDB
|
83
|
+
@ndb = NodeDB.new(@our_node)
|
84
|
+
conn_node = UpdateNotifierConnection.new(@config.node_url,
|
85
|
+
@config.couchdb_server_port,
|
86
|
+
@our_node.name + "_node_db", # NOTE: Sooo ugly!
|
87
|
+
Proc.new {|data| handle_node_update data})
|
88
|
+
@conn_manager.add_connection(conn_node)
|
89
|
+
|
90
|
+
|
91
|
+
# Lets build a nice ShardDB
|
92
|
+
@srdb = ShardRangeDB.new(@ndb)
|
93
|
+
|
94
|
+
# Listen to the shard db and in case something changes re-build the DB
|
95
|
+
# Chenges to the shards should be in-frequent and tolerable
|
96
|
+
conn_shard = UpdateNotifierConnection.new(@config.node_url,
|
97
|
+
@config.couchdb_server_port,
|
98
|
+
@our_node.name + "_shard_db", # NOTE: Sooo ugly!
|
99
|
+
Proc.new {|data| build_node_content_databases})
|
100
|
+
@conn_manager.add_connection(conn_shard)
|
101
|
+
|
102
|
+
end
|
103
|
+
|
104
|
+
def handle_node_update update
|
105
|
+
# Added, removed or changed the status of a node
|
106
|
+
# If we are the admin, when a node joins we should allocate to it
|
107
|
+
# some shards.
|
108
|
+
|
109
|
+
# Only the head management node deals with node changes
|
110
|
+
return if @ndb.head_management_node && ! (@ndb.head_management_node.name == @our_node.name)
|
111
|
+
|
112
|
+
# Given the shard_db and the node_db we should work out a new allocation
|
113
|
+
node_list = Node.by_name
|
114
|
+
shard_list = ShardRange.by_range_start
|
115
|
+
if node_list && shard_list
|
116
|
+
assign_nodes_to_shards(node_list, shard_list, 2)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
def build_node_content_databases
|
122
|
+
# Get all content shard databases
|
123
|
+
# NOTE: we will have to refresh those then we are re-assigned shards
|
124
|
+
@contentdbs = {} if !@contentdbs
|
125
|
+
current_shards = @srdb.get_content_shards
|
126
|
+
|
127
|
+
# Add the new shards
|
128
|
+
current_shards.each_key do |cs|
|
129
|
+
if !(@contentdbs.has_key?(cs)) # Make sure we do not know of this shard
|
130
|
+
shard_db = current_shards[cs]
|
131
|
+
conn = UpdateNotifierConnection.new(@config.node_url,
|
132
|
+
@config.couchdb_server_port,
|
133
|
+
@our_node.name + "_" + shard_db.this_shard.shard_db_name + "_content_db", # NOTE: Sooo ugly!
|
134
|
+
Proc.new { |data| shard_db.sync })
|
135
|
+
@conn_manager.add_connection(conn)
|
136
|
+
|
137
|
+
# Store both the shard object and the update notifier
|
138
|
+
@contentdbs[cs] = [shard_db, conn]
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
# Delete what we do not need
|
143
|
+
@contentdbs.each_key do |cs|
|
144
|
+
if !(current_shards.has_key?(cs))
|
145
|
+
# Delete this shard from our DB
|
146
|
+
remove_content_shard @contentdbs[cs][0], @contentdbs[cs][1]
|
147
|
+
# Remove that key
|
148
|
+
@contentdbs.delete cs
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def remove_content_shard content_shard, connection
|
154
|
+
# Kill the connection listening for updates on this shard
|
155
|
+
connection.kill
|
156
|
+
content_shard.sync
|
157
|
+
# TODO: test if the sync happened
|
158
|
+
# content_shard.this_shard_db.delete!
|
159
|
+
# TODO: run a sync with the current master to ensure that
|
160
|
+
# any changes have been pushed. The DELETE the database
|
161
|
+
# to save space
|
162
|
+
end
|
163
|
+
|
164
|
+
def run
|
165
|
+
# Enters the event machine loop
|
166
|
+
@conn_manager.run_all
|
167
|
+
end
|
168
|
+
|
169
|
+
def build_shards(number_of_shards, number_of_replicators)
|
170
|
+
if @our_node.is_management
|
171
|
+
|
172
|
+
if ShardRange.by_range_start.length == 0
|
173
|
+
puts "Creating new ranges"
|
174
|
+
@srdb.build_shards(number_of_shards)
|
175
|
+
end
|
176
|
+
|
177
|
+
# Populate the shards with some nodes at random
|
178
|
+
node_names = []
|
179
|
+
all_nodes = Node.by_name
|
180
|
+
all_nodes.each do |anode|
|
181
|
+
node_names << anode.name
|
182
|
+
end
|
183
|
+
|
184
|
+
ShardRange.by_range_start.each do |s|
|
185
|
+
if !s.node_list or s.node_list.length == 0
|
186
|
+
node_names.shuffle!
|
187
|
+
s.node_list = node_names[0..(number_of_replicators-1)]
|
188
|
+
s.master_node = node_names[0]
|
189
|
+
s.save
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def sync_databases
|
196
|
+
@ndb.sync # SYNC
|
197
|
+
@srdb.sync # SYNC
|
198
|
+
|
199
|
+
@contentdbs.each_key do |cs|
|
200
|
+
@contentdbs[cs][0].sync
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
def sync_admin
|
205
|
+
@ndb.sync # SYNC
|
206
|
+
@srdb.sync # SYNC
|
207
|
+
end
|
208
|
+
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
# -------- Management node logic -------------------
|
213
|
+
|
214
|
+
require 'md5'
|
215
|
+
|
216
|
+
def assign_nodes_to_shards(node_list, shard_range_list, replication_factor)
|
217
|
+
# This is an automatic way to allocate shards to nodes that just
|
218
|
+
# arrive in the networks, as well as re-allocate shards if nodes
|
219
|
+
# become unavailable or leave the network.
|
220
|
+
|
221
|
+
# NOTE: How to build skynet (Part III)
|
222
|
+
#
|
223
|
+
# The invarient we try to impost on the list of nodes part of a shard
|
224
|
+
# is that there should be at least replication_factor available nodes
|
225
|
+
# in it. At the same time we try to keep the list stable over nodes
|
226
|
+
# joining and leaving. To achieve this we hash in sequence the name of
|
227
|
+
# each node with the name of the shard. We sort this list by hash, and
|
228
|
+
# choose the first n nodes such that at least replication_factor nodes
|
229
|
+
# are available. Then we chose the first available node as the master
|
230
|
+
# for that shard.
|
231
|
+
|
232
|
+
shard_range_list.each do |shard_range|
|
233
|
+
# Sort all nodes using consistent hashing
|
234
|
+
sorted_nodes = node_list.sort_by {|node| MD5.new("#{node.name}||#{shard_range.shard_db_name}").to_s}
|
235
|
+
avail = 0
|
236
|
+
master = nil
|
237
|
+
shard_node_list = []
|
238
|
+
sorted_nodes.each do |node|
|
239
|
+
shard_node_list << node
|
240
|
+
if node.status == "RUNNING"
|
241
|
+
master = node if !master # Chose the first available to be the master
|
242
|
+
avail += 1
|
243
|
+
break if avail == replication_factor # We have enough available nodes
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
# Now put this list in the shard_range and save it
|
248
|
+
# but only if there were changes
|
249
|
+
new_master = master.name if master
|
250
|
+
new_node_list = shard_node_list.map {|node| node.name}
|
251
|
+
|
252
|
+
if !(new_master == shard_range.master_node) or !(new_node_list == shard_range.node_list)
|
253
|
+
shard_range.master_node = master.name if master
|
254
|
+
shard_range.node_list = shard_node_list.map {|node| node.name}
|
255
|
+
shard_range.save
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
end
|
260
|
+
|
data/lib/adhd/reactor.rb
CHANGED
@@ -4,7 +4,7 @@ require 'eventmachine'
|
|
4
4
|
module Adhd
|
5
5
|
|
6
6
|
# This module gets mixed into the EventMachine reactor loop, and sends events
|
7
|
-
# to our Adhd::Node which is hanging around waiting for
|
7
|
+
# to our Adhd::Node which is hanging around waiting for stuff to happen.
|
8
8
|
#
|
9
9
|
# Currently it just illustrates the fact that we can fire up an EventMachine
|
10
10
|
# and have this reactor get mixed in as glue code to our Node.
|
@@ -29,29 +29,211 @@ module Adhd
|
|
29
29
|
|
30
30
|
end
|
31
31
|
|
32
|
-
# A
|
33
|
-
#
|
34
|
-
#
|
32
|
+
# A notifier client that makes a long-running request to a CouchDB instance
|
33
|
+
# on a socket and continually sends any update notifications that it
|
34
|
+
# receives back to its @node.
|
35
35
|
#
|
36
|
-
module
|
36
|
+
module DbUpdateNotifier
|
37
37
|
|
38
|
-
def initialize(
|
39
|
-
puts "Db update
|
40
|
-
@
|
38
|
+
def initialize(db_name, conn_obj)
|
39
|
+
puts "Db update notifier start..."
|
40
|
+
@db_name = db_name
|
41
|
+
@conn_obj = conn_obj
|
42
|
+
@buffer = ""
|
43
|
+
conn_obj.connection_inside = self # We tell the outer object who we are
|
41
44
|
end
|
42
45
|
|
43
46
|
# Makes a long-running request to a CouchDB instance's _changes URL.
|
44
47
|
#
|
45
48
|
def post_init
|
46
|
-
|
49
|
+
# NOTE: full domain name needed for virtual hosting
|
50
|
+
req = "GET #{@conn_obj.base_url}/#{@db_name}/_changes?feed=continuous&heartbeat=5000\r\n\r\n"
|
51
|
+
puts req
|
52
|
+
send_data req
|
47
53
|
end
|
48
54
|
|
49
|
-
# Shoots update notifications from CouchDB to the @
|
55
|
+
# Shoots update notifications from CouchDB to the @conn.
|
50
56
|
#
|
51
|
-
def receive_data data
|
52
|
-
|
57
|
+
def receive_data data
|
58
|
+
# puts "received_data: #{data}"
|
59
|
+
# puts "||#{data}||length=#{data.length}||#{data.dump}||"
|
60
|
+
|
61
|
+
@buffer += data # Add the data to the current buffer
|
62
|
+
updates = []
|
63
|
+
if @buffer =~ /(\{[^\n]+\}\n)/
|
64
|
+
updates += $~.to_a
|
65
|
+
# Trim the buffer to $_.end(0)
|
66
|
+
@buffer = @buffer[$~.end(0)..-1]
|
67
|
+
end
|
68
|
+
|
69
|
+
# Regexp for JSON updates is /\{[\n]\}+/
|
70
|
+
updates.each do |json_event|
|
71
|
+
@conn_obj.event_handler(json_event) unless data == "\n"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
#def close_connection
|
76
|
+
# @conn_obj.close_handler(data)
|
77
|
+
#end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
|
82
|
+
# Note: Some of manos's thoughts on how to manage our connections and events.
|
83
|
+
# We should build a class called connection_manager that we ask to build
|
84
|
+
# and listen to connections, as well as route events. Its job is to
|
85
|
+
# re-open them if they are closed or times out, mark nodes as UNAVAILABLE
|
86
|
+
# and notify us back when data (i.e. an update) arrives. It would also be
|
87
|
+
# nifty if each connection was associated with a predicate: once this
|
88
|
+
# predicate is false we can simply close the connection. For example upon
|
89
|
+
# being given control of a different content shard, or a different master
|
90
|
+
# for the shard.
|
91
|
+
|
92
|
+
# In practice we will have two types of connections: Replicate and Notify.
|
93
|
+
|
94
|
+
class UpdateNotifierConnection
|
95
|
+
attr_accessor :db_name, :base_url, :connection_inside, :name
|
96
|
+
|
97
|
+
def initialize(node_url, couchdb_server_port, db_name, sync_block)
|
98
|
+
@node_url = node_url
|
99
|
+
@couchdb_server_port = couchdb_server_port
|
100
|
+
@db_name = db_name
|
101
|
+
@sync_block = sync_block
|
102
|
+
@status = "NOTRUNNING"
|
103
|
+
@base_url = "http://#{@node_url}:#{@couchdb_server_port}"
|
104
|
+
@name = @base_url +"/"+ @db_name
|
105
|
+
@keep_alive = true
|
106
|
+
end
|
107
|
+
|
108
|
+
def kill
|
109
|
+
@keep_alive = false
|
110
|
+
end
|
111
|
+
|
112
|
+
def start
|
113
|
+
puts "Register the connection for #{@db_name}"
|
114
|
+
EM.connect @node_url, @couchdb_server_port, Adhd::DbUpdateNotifier, @db_name, self
|
115
|
+
@status = "RUNNING"
|
116
|
+
end
|
117
|
+
|
118
|
+
def event_handler data
|
119
|
+
# puts "||#{data}||nn"
|
120
|
+
puts "Run a crazy sync on db #{@db_name}"
|
121
|
+
#@db_obj_for_sync.sync
|
122
|
+
@sync_block.call(data)
|
123
|
+
end
|
124
|
+
|
125
|
+
def close_handler
|
126
|
+
puts "Closed abnormally #{reason}"
|
127
|
+
@status = "NOTRUNNING"
|
128
|
+
end
|
129
|
+
|
130
|
+
def down_for_good(reason)
|
131
|
+
if reason
|
132
|
+
puts "Closed for good #{reason}"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def keep_alive?
|
137
|
+
# Returns the truth value of the predicate
|
138
|
+
@keep_alive
|
139
|
+
end
|
140
|
+
|
141
|
+
def keep_alive_or_kill!
|
142
|
+
if ! keep_alive?
|
143
|
+
# Schedule this connection for close
|
144
|
+
connection_inside.close_connection_after_writing
|
145
|
+
@status = "NOTRUNNING"
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def should_start?
|
150
|
+
!(@status == "RUNNING")
|
151
|
+
end
|
152
|
+
|
153
|
+
def is_closed?
|
154
|
+
(@status == "NOTRUNNING")
|
53
155
|
end
|
54
156
|
|
157
|
+
|
55
158
|
end
|
159
|
+
|
160
|
+
class Connection
|
161
|
+
#def on_teardown(&block)
|
162
|
+
# # Set the handler to be called then a connection is dead
|
163
|
+
# block(self) # Run the teardown handler
|
164
|
+
#end
|
165
|
+
|
166
|
+
def initialize
|
167
|
+
|
168
|
+
end
|
169
|
+
|
170
|
+
def should_start?
|
171
|
+
!(@status == "RUNNING")
|
172
|
+
end
|
173
|
+
|
174
|
+
def is_closed?
|
175
|
+
(@status == "NOTRUNNING")
|
176
|
+
end
|
177
|
+
|
178
|
+
end
|
179
|
+
|
180
|
+
class ConnectionBank
|
181
|
+
# Manage a bunch of connections for us
|
182
|
+
def initialize
|
183
|
+
@our_connections = []
|
184
|
+
end
|
185
|
+
|
186
|
+
def add_connection(conn)
|
187
|
+
# Make sure we have no duplicates
|
188
|
+
@our_connections.each do |c|
|
189
|
+
if conn.name == c.name
|
190
|
+
return
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
# If it is happy to run, add it to the list and start it!
|
195
|
+
if conn.keep_alive?
|
196
|
+
@our_connections << conn
|
197
|
+
# Register the teardown handler for when the end comes...
|
198
|
+
# conn.on_teardown(|c| { rerun(c) })
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def rerun(conn)
|
203
|
+
# When a connection is down, we check to see if it wants to be kept
|
204
|
+
# alive, and restart it otherwise we remove it from the list.
|
205
|
+
if conn.keep_alive?
|
206
|
+
begin
|
207
|
+
conn.start
|
208
|
+
rescue Exception => e
|
209
|
+
conn.down_for_good(e)
|
210
|
+
end
|
211
|
+
else
|
212
|
+
# It seems we have died of natural causes
|
213
|
+
# XXX: is it true that Ruby does not throw and exception for EOF?
|
214
|
+
# Otherwise we will never see this
|
215
|
+
conn.keep_alive_or_kill!
|
216
|
+
@our_connections.delete(conn)
|
217
|
+
conn.down_for_good(nil)
|
218
|
+
end
|
219
|
+
|
220
|
+
end
|
221
|
+
|
222
|
+
def run_all
|
223
|
+
# Go through all connections and run them all
|
224
|
+
# Run within EM.run loop
|
225
|
+
# puts "Connection bank runs all... (#{@our_connections.length} connections)"
|
226
|
+
@our_connections.each do |c|
|
227
|
+
if c.is_closed? or !c.keep_alive?
|
228
|
+
puts "Actually rerun #{c.db_name}..."
|
229
|
+
|
230
|
+
rerun(c)
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
end
|
235
|
+
|
236
|
+
end
|
237
|
+
|
56
238
|
end
|
57
239
|
|