adhd 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,260 @@
1
+ require 'rubygems'
2
+ require 'couchrest'
3
+ require 'ruby-debug'
4
+ # require File.dirname(__FILE__) + '/models'
5
+ require File.dirname(__FILE__) + '/models/node_doc'
6
+ require File.dirname(__FILE__) + '/models/content_doc'
7
+ require File.dirname(__FILE__) + '/models/shard_range'
8
+ require File.dirname(__FILE__) + '/models/content_shard'
9
+
10
+ module Adhd
11
+ class NodeManager
12
+
13
+ attr_accessor :our_node, :ndb, :srdb
14
+
15
+ def initialize(config)
16
+ @config = config
17
+ @couch_server = CouchRest.new("http://#{config.node_url}:#{config.couchdb_server_port}")
18
+ # @couch_server.default_database = "#{config.node_name}_node_db"
19
+ @couch_db = @couch_server.database!("#{config.node_name}_node_db") # CouchRest::Database.new(@couch_server, "#{config.node_name}_node_db")
20
+ sync_with_buddy_node if config.buddy_server_url && config.buddy_server_db_name
21
+ @our_node = initialize_node
22
+ build_node_admin_databases
23
+ set_as_management_node_if_necessary
24
+ build_shards(10, 2)
25
+ build_node_content_databases
26
+ sync_databases
27
+ end
28
+
29
+ def event_handler(ev)
30
+ puts ev
31
+ end
32
+
33
+ # Sync the db with our buddy
34
+ #
35
+ def sync_with_buddy_node
36
+ begin
37
+ buddy_server = CouchRest.new("#{@config.buddy_server_url}")
38
+ buddy_db = buddy_server.database!(@config.buddy_server_db_name + "_node_db")
39
+ @couch_db.replicate_from(buddy_db)
40
+ rescue
41
+ puts "Could not buddy up with node #{@config.buddy_server_db_name}"
42
+ end
43
+ end
44
+
45
+ # Retrieve our own node record from CouchDB by our name.
46
+ #
47
+ # If there are other nodes with the name kill their records!
48
+ #
49
+ def initialize_node
50
+ puts "Initialize node #{@config.node_name}"
51
+ Node.use_database @couch_db
52
+ puts "DB #{@couch_db}, node #{@config.node_name}"
53
+ node_candidates = Node.by_name(:key => @config.node_name)
54
+ # node_candidates = @couch_db.view("by_name", {:key => @config.node_name})
55
+ node = node_candidates.pop
56
+ node = Node.new if node.nil?
57
+ node_candidates.each do |other_me|
58
+ other_me.destroy # destroy other records
59
+ end
60
+ # Update our very own record
61
+ node.name = @config.node_name
62
+ node.url = "http://#{@config.node_url}:#{@config.couchdb_server_port}"
63
+ node.status = "RUNNING"
64
+ node.save
65
+ node # Save our node as instance variable
66
+ end
67
+
68
+ # We check if we are the first node. If we are the first node, we set
69
+ # ourself up as the management node.
70
+ #
71
+ def set_as_management_node_if_necessary
72
+ all_nodes = Node.by_name
73
+ if all_nodes.length == 1
74
+ @our_node.is_management = 300
75
+ @our_node.save
76
+ end
77
+ end
78
+
79
+ def build_node_admin_databases
80
+ @conn_manager = ConnectionBank.new
81
+
82
+ # Lets build a nice NodeDB
83
+ @ndb = NodeDB.new(@our_node)
84
+ conn_node = UpdateNotifierConnection.new(@config.node_url,
85
+ @config.couchdb_server_port,
86
+ @our_node.name + "_node_db", # NOTE: Sooo ugly!
87
+ Proc.new {|data| handle_node_update data})
88
+ @conn_manager.add_connection(conn_node)
89
+
90
+
91
+ # Lets build a nice ShardDB
92
+ @srdb = ShardRangeDB.new(@ndb)
93
+
94
+ # Listen to the shard db and in case something changes re-build the DB
95
+ # Chenges to the shards should be in-frequent and tolerable
96
+ conn_shard = UpdateNotifierConnection.new(@config.node_url,
97
+ @config.couchdb_server_port,
98
+ @our_node.name + "_shard_db", # NOTE: Sooo ugly!
99
+ Proc.new {|data| build_node_content_databases})
100
+ @conn_manager.add_connection(conn_shard)
101
+
102
+ end
103
+
104
+ def handle_node_update update
105
+ # Added, removed or changed the status of a node
106
+ # If we are the admin, when a node joins we should allocate to it
107
+ # some shards.
108
+
109
+ # Only the head management node deals with node changes
110
+ return if @ndb.head_management_node && ! (@ndb.head_management_node.name == @our_node.name)
111
+
112
+ # Given the shard_db and the node_db we should work out a new allocation
113
+ node_list = Node.by_name
114
+ shard_list = ShardRange.by_range_start
115
+ if node_list && shard_list
116
+ assign_nodes_to_shards(node_list, shard_list, 2)
117
+ end
118
+ end
119
+
120
+
121
+ def build_node_content_databases
122
+ # Get all content shard databases
123
+ # NOTE: we will have to refresh those then we are re-assigned shards
124
+ @contentdbs = {} if !@contentdbs
125
+ current_shards = @srdb.get_content_shards
126
+
127
+ # Add the new shards
128
+ current_shards.each_key do |cs|
129
+ if !(@contentdbs.has_key?(cs)) # Make sure we do not know of this shard
130
+ shard_db = current_shards[cs]
131
+ conn = UpdateNotifierConnection.new(@config.node_url,
132
+ @config.couchdb_server_port,
133
+ @our_node.name + "_" + shard_db.this_shard.shard_db_name + "_content_db", # NOTE: Sooo ugly!
134
+ Proc.new { |data| shard_db.sync })
135
+ @conn_manager.add_connection(conn)
136
+
137
+ # Store both the shard object and the update notifier
138
+ @contentdbs[cs] = [shard_db, conn]
139
+ end
140
+ end
141
+
142
+ # Delete what we do not need
143
+ @contentdbs.each_key do |cs|
144
+ if !(current_shards.has_key?(cs))
145
+ # Delete this shard from our DB
146
+ remove_content_shard @contentdbs[cs][0], @contentdbs[cs][1]
147
+ # Remove that key
148
+ @contentdbs.delete cs
149
+ end
150
+ end
151
+ end
152
+
153
+ def remove_content_shard content_shard, connection
154
+ # Kill the connection listening for updates on this shard
155
+ connection.kill
156
+ content_shard.sync
157
+ # TODO: test if the sync happened
158
+ # content_shard.this_shard_db.delete!
159
+ # TODO: run a sync with the current master to ensure that
160
+ # any changes have been pushed. The DELETE the database
161
+ # to save space
162
+ end
163
+
164
+ def run
165
+ # Enters the event machine loop
166
+ @conn_manager.run_all
167
+ end
168
+
169
+ def build_shards(number_of_shards, number_of_replicators)
170
+ if @our_node.is_management
171
+
172
+ if ShardRange.by_range_start.length == 0
173
+ puts "Creating new ranges"
174
+ @srdb.build_shards(number_of_shards)
175
+ end
176
+
177
+ # Populate the shards with some nodes at random
178
+ node_names = []
179
+ all_nodes = Node.by_name
180
+ all_nodes.each do |anode|
181
+ node_names << anode.name
182
+ end
183
+
184
+ ShardRange.by_range_start.each do |s|
185
+ if !s.node_list or s.node_list.length == 0
186
+ node_names.shuffle!
187
+ s.node_list = node_names[0..(number_of_replicators-1)]
188
+ s.master_node = node_names[0]
189
+ s.save
190
+ end
191
+ end
192
+ end
193
+ end
194
+
195
+ def sync_databases
196
+ @ndb.sync # SYNC
197
+ @srdb.sync # SYNC
198
+
199
+ @contentdbs.each_key do |cs|
200
+ @contentdbs[cs][0].sync
201
+ end
202
+ end
203
+
204
+ def sync_admin
205
+ @ndb.sync # SYNC
206
+ @srdb.sync # SYNC
207
+ end
208
+
209
+ end
210
+ end
211
+
212
+ # -------- Management node logic -------------------
213
+
214
+ require 'md5'
215
+
216
+ def assign_nodes_to_shards(node_list, shard_range_list, replication_factor)
217
+ # This is an automatic way to allocate shards to nodes that just
218
+ # arrive in the networks, as well as re-allocate shards if nodes
219
+ # become unavailable or leave the network.
220
+
221
+ # NOTE: How to build skynet (Part III)
222
+ #
223
+ # The invarient we try to impost on the list of nodes part of a shard
224
+ # is that there should be at least replication_factor available nodes
225
+ # in it. At the same time we try to keep the list stable over nodes
226
+ # joining and leaving. To achieve this we hash in sequence the name of
227
+ # each node with the name of the shard. We sort this list by hash, and
228
+ # choose the first n nodes such that at least replication_factor nodes
229
+ # are available. Then we chose the first available node as the master
230
+ # for that shard.
231
+
232
+ shard_range_list.each do |shard_range|
233
+ # Sort all nodes using consistent hashing
234
+ sorted_nodes = node_list.sort_by {|node| MD5.new("#{node.name}||#{shard_range.shard_db_name}").to_s}
235
+ avail = 0
236
+ master = nil
237
+ shard_node_list = []
238
+ sorted_nodes.each do |node|
239
+ shard_node_list << node
240
+ if node.status == "RUNNING"
241
+ master = node if !master # Chose the first available to be the master
242
+ avail += 1
243
+ break if avail == replication_factor # We have enough available nodes
244
+ end
245
+ end
246
+
247
+ # Now put this list in the shard_range and save it
248
+ # but only if there were changes
249
+ new_master = master.name if master
250
+ new_node_list = shard_node_list.map {|node| node.name}
251
+
252
+ if !(new_master == shard_range.master_node) or !(new_node_list == shard_range.node_list)
253
+ shard_range.master_node = master.name if master
254
+ shard_range.node_list = shard_node_list.map {|node| node.name}
255
+ shard_range.save
256
+ end
257
+ end
258
+
259
+ end
260
+
@@ -4,7 +4,7 @@ require 'eventmachine'
4
4
  module Adhd
5
5
 
6
6
  # This module gets mixed into the EventMachine reactor loop, and sends events
7
- # to our Adhd::Node which is hanging around waiting for shit to happen.
7
+ # to our Adhd::Node which is hanging around waiting for stuff to happen.
8
8
  #
9
9
  # Currently it just illustrates the fact that we can fire up an EventMachine
10
10
  # and have this reactor get mixed in as glue code to our Node.
@@ -29,29 +29,211 @@ module Adhd
29
29
 
30
30
  end
31
31
 
32
- # A reactor that makes a long-running request to a CouchDB instance (using
33
- # Comet technology) and continually sends any update notifications that it
34
- # gets back to its @node.
32
+ # A notifier client that makes a long-running request to a CouchDB instance
33
+ # on a socket and continually sends any update notifications that it
34
+ # receives back to its @node.
35
35
  #
36
- module DbUpdateReactor
36
+ module DbUpdateNotifier
37
37
 
38
- def initialize(node)
39
- puts "Db update reactor start..."
40
- @node = node
38
+ def initialize(db_name, conn_obj)
39
+ puts "Db update notifier start..."
40
+ @db_name = db_name
41
+ @conn_obj = conn_obj
42
+ @buffer = ""
43
+ conn_obj.connection_inside = self # We tell the outer object who we are
41
44
  end
42
45
 
43
46
  # Makes a long-running request to a CouchDB instance's _changes URL.
44
47
  #
45
48
  def post_init
46
- send_data "GET http://192.168.1.104:5984/bar_node_db/_changes?feed=continuous&heartbeat=5000\r\n\r\n"
49
+ # NOTE: full domain name needed for virtual hosting
50
+ req = "GET #{@conn_obj.base_url}/#{@db_name}/_changes?feed=continuous&heartbeat=5000\r\n\r\n"
51
+ puts req
52
+ send_data req
47
53
  end
48
54
 
49
- # Shoots update notifications from CouchDB to the @node.
55
+ # Shoots update notifications from CouchDB to the @conn.
50
56
  #
51
- def receive_data data
52
- @node.event_handler(data)
57
+ def receive_data data
58
+ # puts "received_data: #{data}"
59
+ # puts "||#{data}||length=#{data.length}||#{data.dump}||"
60
+
61
+ @buffer += data # Add the data to the current buffer
62
+ updates = []
63
+ if @buffer =~ /(\{[^\n]+\}\n)/
64
+ updates += $~.to_a
65
+ # Trim the buffer to $_.end(0)
66
+ @buffer = @buffer[$~.end(0)..-1]
67
+ end
68
+
69
+ # Regexp for JSON updates is /\{[\n]\}+/
70
+ updates.each do |json_event|
71
+ @conn_obj.event_handler(json_event) unless data == "\n"
72
+ end
73
+ end
74
+
75
+ #def close_connection
76
+ # @conn_obj.close_handler(data)
77
+ #end
78
+
79
+ end
80
+
81
+
82
+ # Note: Some of manos's thoughts on how to manage our connections and events.
83
+ # We should build a class called connection_manager that we ask to build
84
+ # and listen to connections, as well as route events. Its job is to
85
+ # re-open them if they are closed or times out, mark nodes as UNAVAILABLE
86
+ # and notify us back when data (i.e. an update) arrives. It would also be
87
+ # nifty if each connection was associated with a predicate: once this
88
+ # predicate is false we can simply close the connection. For example upon
89
+ # being given control of a different content shard, or a different master
90
+ # for the shard.
91
+
92
+ # In practice we will have two types of connections: Replicate and Notify.
93
+
94
+ class UpdateNotifierConnection
95
+ attr_accessor :db_name, :base_url, :connection_inside, :name
96
+
97
+ def initialize(node_url, couchdb_server_port, db_name, sync_block)
98
+ @node_url = node_url
99
+ @couchdb_server_port = couchdb_server_port
100
+ @db_name = db_name
101
+ @sync_block = sync_block
102
+ @status = "NOTRUNNING"
103
+ @base_url = "http://#{@node_url}:#{@couchdb_server_port}"
104
+ @name = @base_url +"/"+ @db_name
105
+ @keep_alive = true
106
+ end
107
+
108
+ def kill
109
+ @keep_alive = false
110
+ end
111
+
112
+ def start
113
+ puts "Register the connection for #{@db_name}"
114
+ EM.connect @node_url, @couchdb_server_port, Adhd::DbUpdateNotifier, @db_name, self
115
+ @status = "RUNNING"
116
+ end
117
+
118
+ def event_handler data
119
+ # puts "||#{data}||nn"
120
+ puts "Run a crazy sync on db #{@db_name}"
121
+ #@db_obj_for_sync.sync
122
+ @sync_block.call(data)
123
+ end
124
+
125
+ def close_handler
126
+ puts "Closed abnormally #{reason}"
127
+ @status = "NOTRUNNING"
128
+ end
129
+
130
+ def down_for_good(reason)
131
+ if reason
132
+ puts "Closed for good #{reason}"
133
+ end
134
+ end
135
+
136
+ def keep_alive?
137
+ # Returns the truth value of the predicate
138
+ @keep_alive
139
+ end
140
+
141
+ def keep_alive_or_kill!
142
+ if ! keep_alive?
143
+ # Schedule this connection for close
144
+ connection_inside.close_connection_after_writing
145
+ @status = "NOTRUNNING"
146
+ end
147
+ end
148
+
149
+ def should_start?
150
+ !(@status == "RUNNING")
151
+ end
152
+
153
+ def is_closed?
154
+ (@status == "NOTRUNNING")
53
155
  end
54
156
 
157
+
55
158
  end
159
+
160
+ class Connection
161
+ #def on_teardown(&block)
162
+ # # Set the handler to be called then a connection is dead
163
+ # block(self) # Run the teardown handler
164
+ #end
165
+
166
+ def initialize
167
+
168
+ end
169
+
170
+ def should_start?
171
+ !(@status == "RUNNING")
172
+ end
173
+
174
+ def is_closed?
175
+ (@status == "NOTRUNNING")
176
+ end
177
+
178
+ end
179
+
180
+ class ConnectionBank
181
+ # Manage a bunch of connections for us
182
+ def initialize
183
+ @our_connections = []
184
+ end
185
+
186
+ def add_connection(conn)
187
+ # Make sure we have no duplicates
188
+ @our_connections.each do |c|
189
+ if conn.name == c.name
190
+ return
191
+ end
192
+ end
193
+
194
+ # If it is happy to run, add it to the list and start it!
195
+ if conn.keep_alive?
196
+ @our_connections << conn
197
+ # Register the teardown handler for when the end comes...
198
+ # conn.on_teardown(|c| { rerun(c) })
199
+ end
200
+ end
201
+
202
+ def rerun(conn)
203
+ # When a connection is down, we check to see if it wants to be kept
204
+ # alive, and restart it otherwise we remove it from the list.
205
+ if conn.keep_alive?
206
+ begin
207
+ conn.start
208
+ rescue Exception => e
209
+ conn.down_for_good(e)
210
+ end
211
+ else
212
+ # It seems we have died of natural causes
213
+ # XXX: is it true that Ruby does not throw and exception for EOF?
214
+ # Otherwise we will never see this
215
+ conn.keep_alive_or_kill!
216
+ @our_connections.delete(conn)
217
+ conn.down_for_good(nil)
218
+ end
219
+
220
+ end
221
+
222
+ def run_all
223
+ # Go through all connections and run them all
224
+ # Run within EM.run loop
225
+ # puts "Connection bank runs all... (#{@our_connections.length} connections)"
226
+ @our_connections.each do |c|
227
+ if c.is_closed? or !c.keep_alive?
228
+ puts "Actually rerun #{c.db_name}..."
229
+
230
+ rerun(c)
231
+ end
232
+ end
233
+
234
+ end
235
+
236
+ end
237
+
56
238
  end
57
239