adhd 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +62 -2
- data/VERSION +1 -1
- data/adhd.gemspec +10 -8
- data/bin/adhd +23 -8
- data/bin/adhd_cleanup +57 -0
- data/lib/adhd/adhd_rest_server.rb +229 -0
- data/lib/adhd/config.yml +1 -1
- data/lib/adhd/models/content_doc.rb +17 -0
- data/lib/adhd/models/content_shard.rb +97 -0
- data/lib/adhd/models/node_doc.rb +139 -0
- data/lib/adhd/models/shard_range.rb +202 -0
- data/lib/adhd/node_manager.rb +260 -0
- data/lib/adhd/reactor.rb +194 -12
- data/test/test_adhd.rb +0 -11
- metadata +11 -7
- data/lib/adhd.rb +0 -120
- data/lib/adhd/models.rb +0 -388
- data/lib/adhd/node.rb +0 -13
- data/models.rb +0 -19
@@ -0,0 +1,260 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'couchrest'
|
3
|
+
require 'ruby-debug'
|
4
|
+
# require File.dirname(__FILE__) + '/models'
|
5
|
+
require File.dirname(__FILE__) + '/models/node_doc'
|
6
|
+
require File.dirname(__FILE__) + '/models/content_doc'
|
7
|
+
require File.dirname(__FILE__) + '/models/shard_range'
|
8
|
+
require File.dirname(__FILE__) + '/models/content_shard'
|
9
|
+
|
10
|
+
module Adhd
|
11
|
+
class NodeManager
|
12
|
+
|
13
|
+
attr_accessor :our_node, :ndb, :srdb
|
14
|
+
|
15
|
+
def initialize(config)
|
16
|
+
@config = config
|
17
|
+
@couch_server = CouchRest.new("http://#{config.node_url}:#{config.couchdb_server_port}")
|
18
|
+
# @couch_server.default_database = "#{config.node_name}_node_db"
|
19
|
+
@couch_db = @couch_server.database!("#{config.node_name}_node_db") # CouchRest::Database.new(@couch_server, "#{config.node_name}_node_db")
|
20
|
+
sync_with_buddy_node if config.buddy_server_url && config.buddy_server_db_name
|
21
|
+
@our_node = initialize_node
|
22
|
+
build_node_admin_databases
|
23
|
+
set_as_management_node_if_necessary
|
24
|
+
build_shards(10, 2)
|
25
|
+
build_node_content_databases
|
26
|
+
sync_databases
|
27
|
+
end
|
28
|
+
|
29
|
+
def event_handler(ev)
|
30
|
+
puts ev
|
31
|
+
end
|
32
|
+
|
33
|
+
# Sync the db with our buddy
|
34
|
+
#
|
35
|
+
def sync_with_buddy_node
|
36
|
+
begin
|
37
|
+
buddy_server = CouchRest.new("#{@config.buddy_server_url}")
|
38
|
+
buddy_db = buddy_server.database!(@config.buddy_server_db_name + "_node_db")
|
39
|
+
@couch_db.replicate_from(buddy_db)
|
40
|
+
rescue
|
41
|
+
puts "Could not buddy up with node #{@config.buddy_server_db_name}"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# Retrieve our own node record from CouchDB by our name.
|
46
|
+
#
|
47
|
+
# If there are other nodes with the name kill their records!
|
48
|
+
#
|
49
|
+
def initialize_node
|
50
|
+
puts "Initialize node #{@config.node_name}"
|
51
|
+
Node.use_database @couch_db
|
52
|
+
puts "DB #{@couch_db}, node #{@config.node_name}"
|
53
|
+
node_candidates = Node.by_name(:key => @config.node_name)
|
54
|
+
# node_candidates = @couch_db.view("by_name", {:key => @config.node_name})
|
55
|
+
node = node_candidates.pop
|
56
|
+
node = Node.new if node.nil?
|
57
|
+
node_candidates.each do |other_me|
|
58
|
+
other_me.destroy # destroy other records
|
59
|
+
end
|
60
|
+
# Update our very own record
|
61
|
+
node.name = @config.node_name
|
62
|
+
node.url = "http://#{@config.node_url}:#{@config.couchdb_server_port}"
|
63
|
+
node.status = "RUNNING"
|
64
|
+
node.save
|
65
|
+
node # Save our node as instance variable
|
66
|
+
end
|
67
|
+
|
68
|
+
# We check if we are the first node. If we are the first node, we set
|
69
|
+
# ourself up as the management node.
|
70
|
+
#
|
71
|
+
def set_as_management_node_if_necessary
|
72
|
+
all_nodes = Node.by_name
|
73
|
+
if all_nodes.length == 1
|
74
|
+
@our_node.is_management = 300
|
75
|
+
@our_node.save
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def build_node_admin_databases
|
80
|
+
@conn_manager = ConnectionBank.new
|
81
|
+
|
82
|
+
# Lets build a nice NodeDB
|
83
|
+
@ndb = NodeDB.new(@our_node)
|
84
|
+
conn_node = UpdateNotifierConnection.new(@config.node_url,
|
85
|
+
@config.couchdb_server_port,
|
86
|
+
@our_node.name + "_node_db", # NOTE: Sooo ugly!
|
87
|
+
Proc.new {|data| handle_node_update data})
|
88
|
+
@conn_manager.add_connection(conn_node)
|
89
|
+
|
90
|
+
|
91
|
+
# Lets build a nice ShardDB
|
92
|
+
@srdb = ShardRangeDB.new(@ndb)
|
93
|
+
|
94
|
+
# Listen to the shard db and in case something changes re-build the DB
|
95
|
+
# Chenges to the shards should be in-frequent and tolerable
|
96
|
+
conn_shard = UpdateNotifierConnection.new(@config.node_url,
|
97
|
+
@config.couchdb_server_port,
|
98
|
+
@our_node.name + "_shard_db", # NOTE: Sooo ugly!
|
99
|
+
Proc.new {|data| build_node_content_databases})
|
100
|
+
@conn_manager.add_connection(conn_shard)
|
101
|
+
|
102
|
+
end
|
103
|
+
|
104
|
+
def handle_node_update update
|
105
|
+
# Added, removed or changed the status of a node
|
106
|
+
# If we are the admin, when a node joins we should allocate to it
|
107
|
+
# some shards.
|
108
|
+
|
109
|
+
# Only the head management node deals with node changes
|
110
|
+
return if @ndb.head_management_node && ! (@ndb.head_management_node.name == @our_node.name)
|
111
|
+
|
112
|
+
# Given the shard_db and the node_db we should work out a new allocation
|
113
|
+
node_list = Node.by_name
|
114
|
+
shard_list = ShardRange.by_range_start
|
115
|
+
if node_list && shard_list
|
116
|
+
assign_nodes_to_shards(node_list, shard_list, 2)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
def build_node_content_databases
|
122
|
+
# Get all content shard databases
|
123
|
+
# NOTE: we will have to refresh those then we are re-assigned shards
|
124
|
+
@contentdbs = {} if !@contentdbs
|
125
|
+
current_shards = @srdb.get_content_shards
|
126
|
+
|
127
|
+
# Add the new shards
|
128
|
+
current_shards.each_key do |cs|
|
129
|
+
if !(@contentdbs.has_key?(cs)) # Make sure we do not know of this shard
|
130
|
+
shard_db = current_shards[cs]
|
131
|
+
conn = UpdateNotifierConnection.new(@config.node_url,
|
132
|
+
@config.couchdb_server_port,
|
133
|
+
@our_node.name + "_" + shard_db.this_shard.shard_db_name + "_content_db", # NOTE: Sooo ugly!
|
134
|
+
Proc.new { |data| shard_db.sync })
|
135
|
+
@conn_manager.add_connection(conn)
|
136
|
+
|
137
|
+
# Store both the shard object and the update notifier
|
138
|
+
@contentdbs[cs] = [shard_db, conn]
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
# Delete what we do not need
|
143
|
+
@contentdbs.each_key do |cs|
|
144
|
+
if !(current_shards.has_key?(cs))
|
145
|
+
# Delete this shard from our DB
|
146
|
+
remove_content_shard @contentdbs[cs][0], @contentdbs[cs][1]
|
147
|
+
# Remove that key
|
148
|
+
@contentdbs.delete cs
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def remove_content_shard content_shard, connection
|
154
|
+
# Kill the connection listening for updates on this shard
|
155
|
+
connection.kill
|
156
|
+
content_shard.sync
|
157
|
+
# TODO: test if the sync happened
|
158
|
+
# content_shard.this_shard_db.delete!
|
159
|
+
# TODO: run a sync with the current master to ensure that
|
160
|
+
# any changes have been pushed. The DELETE the database
|
161
|
+
# to save space
|
162
|
+
end
|
163
|
+
|
164
|
+
def run
|
165
|
+
# Enters the event machine loop
|
166
|
+
@conn_manager.run_all
|
167
|
+
end
|
168
|
+
|
169
|
+
def build_shards(number_of_shards, number_of_replicators)
|
170
|
+
if @our_node.is_management
|
171
|
+
|
172
|
+
if ShardRange.by_range_start.length == 0
|
173
|
+
puts "Creating new ranges"
|
174
|
+
@srdb.build_shards(number_of_shards)
|
175
|
+
end
|
176
|
+
|
177
|
+
# Populate the shards with some nodes at random
|
178
|
+
node_names = []
|
179
|
+
all_nodes = Node.by_name
|
180
|
+
all_nodes.each do |anode|
|
181
|
+
node_names << anode.name
|
182
|
+
end
|
183
|
+
|
184
|
+
ShardRange.by_range_start.each do |s|
|
185
|
+
if !s.node_list or s.node_list.length == 0
|
186
|
+
node_names.shuffle!
|
187
|
+
s.node_list = node_names[0..(number_of_replicators-1)]
|
188
|
+
s.master_node = node_names[0]
|
189
|
+
s.save
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def sync_databases
|
196
|
+
@ndb.sync # SYNC
|
197
|
+
@srdb.sync # SYNC
|
198
|
+
|
199
|
+
@contentdbs.each_key do |cs|
|
200
|
+
@contentdbs[cs][0].sync
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
def sync_admin
|
205
|
+
@ndb.sync # SYNC
|
206
|
+
@srdb.sync # SYNC
|
207
|
+
end
|
208
|
+
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
# -------- Management node logic -------------------
|
213
|
+
|
214
|
+
require 'md5'
|
215
|
+
|
216
|
+
def assign_nodes_to_shards(node_list, shard_range_list, replication_factor)
|
217
|
+
# This is an automatic way to allocate shards to nodes that just
|
218
|
+
# arrive in the networks, as well as re-allocate shards if nodes
|
219
|
+
# become unavailable or leave the network.
|
220
|
+
|
221
|
+
# NOTE: How to build skynet (Part III)
|
222
|
+
#
|
223
|
+
# The invarient we try to impost on the list of nodes part of a shard
|
224
|
+
# is that there should be at least replication_factor available nodes
|
225
|
+
# in it. At the same time we try to keep the list stable over nodes
|
226
|
+
# joining and leaving. To achieve this we hash in sequence the name of
|
227
|
+
# each node with the name of the shard. We sort this list by hash, and
|
228
|
+
# choose the first n nodes such that at least replication_factor nodes
|
229
|
+
# are available. Then we chose the first available node as the master
|
230
|
+
# for that shard.
|
231
|
+
|
232
|
+
shard_range_list.each do |shard_range|
|
233
|
+
# Sort all nodes using consistent hashing
|
234
|
+
sorted_nodes = node_list.sort_by {|node| MD5.new("#{node.name}||#{shard_range.shard_db_name}").to_s}
|
235
|
+
avail = 0
|
236
|
+
master = nil
|
237
|
+
shard_node_list = []
|
238
|
+
sorted_nodes.each do |node|
|
239
|
+
shard_node_list << node
|
240
|
+
if node.status == "RUNNING"
|
241
|
+
master = node if !master # Chose the first available to be the master
|
242
|
+
avail += 1
|
243
|
+
break if avail == replication_factor # We have enough available nodes
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
# Now put this list in the shard_range and save it
|
248
|
+
# but only if there were changes
|
249
|
+
new_master = master.name if master
|
250
|
+
new_node_list = shard_node_list.map {|node| node.name}
|
251
|
+
|
252
|
+
if !(new_master == shard_range.master_node) or !(new_node_list == shard_range.node_list)
|
253
|
+
shard_range.master_node = master.name if master
|
254
|
+
shard_range.node_list = shard_node_list.map {|node| node.name}
|
255
|
+
shard_range.save
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
end
|
260
|
+
|
data/lib/adhd/reactor.rb
CHANGED
@@ -4,7 +4,7 @@ require 'eventmachine'
|
|
4
4
|
module Adhd
|
5
5
|
|
6
6
|
# This module gets mixed into the EventMachine reactor loop, and sends events
|
7
|
-
# to our Adhd::Node which is hanging around waiting for
|
7
|
+
# to our Adhd::Node which is hanging around waiting for stuff to happen.
|
8
8
|
#
|
9
9
|
# Currently it just illustrates the fact that we can fire up an EventMachine
|
10
10
|
# and have this reactor get mixed in as glue code to our Node.
|
@@ -29,29 +29,211 @@ module Adhd
|
|
29
29
|
|
30
30
|
end
|
31
31
|
|
32
|
-
# A
|
33
|
-
#
|
34
|
-
#
|
32
|
+
# A notifier client that makes a long-running request to a CouchDB instance
|
33
|
+
# on a socket and continually sends any update notifications that it
|
34
|
+
# receives back to its @node.
|
35
35
|
#
|
36
|
-
module
|
36
|
+
module DbUpdateNotifier
|
37
37
|
|
38
|
-
def initialize(
|
39
|
-
puts "Db update
|
40
|
-
@
|
38
|
+
def initialize(db_name, conn_obj)
|
39
|
+
puts "Db update notifier start..."
|
40
|
+
@db_name = db_name
|
41
|
+
@conn_obj = conn_obj
|
42
|
+
@buffer = ""
|
43
|
+
conn_obj.connection_inside = self # We tell the outer object who we are
|
41
44
|
end
|
42
45
|
|
43
46
|
# Makes a long-running request to a CouchDB instance's _changes URL.
|
44
47
|
#
|
45
48
|
def post_init
|
46
|
-
|
49
|
+
# NOTE: full domain name needed for virtual hosting
|
50
|
+
req = "GET #{@conn_obj.base_url}/#{@db_name}/_changes?feed=continuous&heartbeat=5000\r\n\r\n"
|
51
|
+
puts req
|
52
|
+
send_data req
|
47
53
|
end
|
48
54
|
|
49
|
-
# Shoots update notifications from CouchDB to the @
|
55
|
+
# Shoots update notifications from CouchDB to the @conn.
|
50
56
|
#
|
51
|
-
def receive_data data
|
52
|
-
|
57
|
+
def receive_data data
|
58
|
+
# puts "received_data: #{data}"
|
59
|
+
# puts "||#{data}||length=#{data.length}||#{data.dump}||"
|
60
|
+
|
61
|
+
@buffer += data # Add the data to the current buffer
|
62
|
+
updates = []
|
63
|
+
if @buffer =~ /(\{[^\n]+\}\n)/
|
64
|
+
updates += $~.to_a
|
65
|
+
# Trim the buffer to $_.end(0)
|
66
|
+
@buffer = @buffer[$~.end(0)..-1]
|
67
|
+
end
|
68
|
+
|
69
|
+
# Regexp for JSON updates is /\{[\n]\}+/
|
70
|
+
updates.each do |json_event|
|
71
|
+
@conn_obj.event_handler(json_event) unless data == "\n"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
#def close_connection
|
76
|
+
# @conn_obj.close_handler(data)
|
77
|
+
#end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
|
82
|
+
# Note: Some of manos's thoughts on how to manage our connections and events.
|
83
|
+
# We should build a class called connection_manager that we ask to build
|
84
|
+
# and listen to connections, as well as route events. Its job is to
|
85
|
+
# re-open them if they are closed or times out, mark nodes as UNAVAILABLE
|
86
|
+
# and notify us back when data (i.e. an update) arrives. It would also be
|
87
|
+
# nifty if each connection was associated with a predicate: once this
|
88
|
+
# predicate is false we can simply close the connection. For example upon
|
89
|
+
# being given control of a different content shard, or a different master
|
90
|
+
# for the shard.
|
91
|
+
|
92
|
+
# In practice we will have two types of connections: Replicate and Notify.
|
93
|
+
|
94
|
+
class UpdateNotifierConnection
|
95
|
+
attr_accessor :db_name, :base_url, :connection_inside, :name
|
96
|
+
|
97
|
+
def initialize(node_url, couchdb_server_port, db_name, sync_block)
|
98
|
+
@node_url = node_url
|
99
|
+
@couchdb_server_port = couchdb_server_port
|
100
|
+
@db_name = db_name
|
101
|
+
@sync_block = sync_block
|
102
|
+
@status = "NOTRUNNING"
|
103
|
+
@base_url = "http://#{@node_url}:#{@couchdb_server_port}"
|
104
|
+
@name = @base_url +"/"+ @db_name
|
105
|
+
@keep_alive = true
|
106
|
+
end
|
107
|
+
|
108
|
+
def kill
|
109
|
+
@keep_alive = false
|
110
|
+
end
|
111
|
+
|
112
|
+
def start
|
113
|
+
puts "Register the connection for #{@db_name}"
|
114
|
+
EM.connect @node_url, @couchdb_server_port, Adhd::DbUpdateNotifier, @db_name, self
|
115
|
+
@status = "RUNNING"
|
116
|
+
end
|
117
|
+
|
118
|
+
def event_handler data
|
119
|
+
# puts "||#{data}||nn"
|
120
|
+
puts "Run a crazy sync on db #{@db_name}"
|
121
|
+
#@db_obj_for_sync.sync
|
122
|
+
@sync_block.call(data)
|
123
|
+
end
|
124
|
+
|
125
|
+
def close_handler
|
126
|
+
puts "Closed abnormally #{reason}"
|
127
|
+
@status = "NOTRUNNING"
|
128
|
+
end
|
129
|
+
|
130
|
+
def down_for_good(reason)
|
131
|
+
if reason
|
132
|
+
puts "Closed for good #{reason}"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def keep_alive?
|
137
|
+
# Returns the truth value of the predicate
|
138
|
+
@keep_alive
|
139
|
+
end
|
140
|
+
|
141
|
+
def keep_alive_or_kill!
|
142
|
+
if ! keep_alive?
|
143
|
+
# Schedule this connection for close
|
144
|
+
connection_inside.close_connection_after_writing
|
145
|
+
@status = "NOTRUNNING"
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def should_start?
|
150
|
+
!(@status == "RUNNING")
|
151
|
+
end
|
152
|
+
|
153
|
+
def is_closed?
|
154
|
+
(@status == "NOTRUNNING")
|
53
155
|
end
|
54
156
|
|
157
|
+
|
55
158
|
end
|
159
|
+
|
160
|
+
class Connection
|
161
|
+
#def on_teardown(&block)
|
162
|
+
# # Set the handler to be called then a connection is dead
|
163
|
+
# block(self) # Run the teardown handler
|
164
|
+
#end
|
165
|
+
|
166
|
+
def initialize
|
167
|
+
|
168
|
+
end
|
169
|
+
|
170
|
+
def should_start?
|
171
|
+
!(@status == "RUNNING")
|
172
|
+
end
|
173
|
+
|
174
|
+
def is_closed?
|
175
|
+
(@status == "NOTRUNNING")
|
176
|
+
end
|
177
|
+
|
178
|
+
end
|
179
|
+
|
180
|
+
class ConnectionBank
|
181
|
+
# Manage a bunch of connections for us
|
182
|
+
def initialize
|
183
|
+
@our_connections = []
|
184
|
+
end
|
185
|
+
|
186
|
+
def add_connection(conn)
|
187
|
+
# Make sure we have no duplicates
|
188
|
+
@our_connections.each do |c|
|
189
|
+
if conn.name == c.name
|
190
|
+
return
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
# If it is happy to run, add it to the list and start it!
|
195
|
+
if conn.keep_alive?
|
196
|
+
@our_connections << conn
|
197
|
+
# Register the teardown handler for when the end comes...
|
198
|
+
# conn.on_teardown(|c| { rerun(c) })
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def rerun(conn)
|
203
|
+
# When a connection is down, we check to see if it wants to be kept
|
204
|
+
# alive, and restart it otherwise we remove it from the list.
|
205
|
+
if conn.keep_alive?
|
206
|
+
begin
|
207
|
+
conn.start
|
208
|
+
rescue Exception => e
|
209
|
+
conn.down_for_good(e)
|
210
|
+
end
|
211
|
+
else
|
212
|
+
# It seems we have died of natural causes
|
213
|
+
# XXX: is it true that Ruby does not throw and exception for EOF?
|
214
|
+
# Otherwise we will never see this
|
215
|
+
conn.keep_alive_or_kill!
|
216
|
+
@our_connections.delete(conn)
|
217
|
+
conn.down_for_good(nil)
|
218
|
+
end
|
219
|
+
|
220
|
+
end
|
221
|
+
|
222
|
+
def run_all
|
223
|
+
# Go through all connections and run them all
|
224
|
+
# Run within EM.run loop
|
225
|
+
# puts "Connection bank runs all... (#{@our_connections.length} connections)"
|
226
|
+
@our_connections.each do |c|
|
227
|
+
if c.is_closed? or !c.keep_alive?
|
228
|
+
puts "Actually rerun #{c.db_name}..."
|
229
|
+
|
230
|
+
rerun(c)
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
end
|
235
|
+
|
236
|
+
end
|
237
|
+
|
56
238
|
end
|
57
239
|
|