wakame 0.5.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/LICENSE +202 -0
- data/Rakefile +11 -12
- data/VERSION +1 -1
- data/app_generators/wakame/templates/bin/wakame-agent +1 -1
- data/app_generators/wakame/templates/bin/wakame-master +1 -1
- data/app_generators/wakame/templates/config/cluster.rb +32 -24
- data/app_generators/wakame/templates/config/init.d/centos/wakame-agent +40 -0
- data/app_generators/wakame/templates/config/init.d/centos/wakame-master +40 -0
- data/app_generators/wakame/templates/config/init.d/wakame-master +1 -1
- data/lib/ext/eventmachine.rb +5 -5
- data/lib/wakame.rb +12 -0
- data/lib/wakame/action.rb +10 -11
- data/lib/wakame/actions/deploy_application.rb +61 -0
- data/lib/wakame/actions/deploy_config.rb +1 -3
- data/lib/wakame/actions/freeze_cluster.rb +15 -0
- data/lib/wakame/actions/launch_cluster.rb +1 -3
- data/lib/wakame/actions/launch_vm.rb +1 -1
- data/lib/wakame/actions/migrate_service.rb +4 -3
- data/lib/wakame/actions/notify_child_changed.rb +3 -6
- data/lib/wakame/actions/notify_parent_changed.rb +4 -7
- data/lib/wakame/actions/propagate_resource.rb +1 -3
- data/lib/wakame/actions/propagate_service.rb +1 -3
- data/lib/wakame/actions/register_agent.rb +43 -0
- data/lib/wakame/actions/reload_service.rb +2 -2
- data/lib/wakame/actions/shutdown_cluster.rb +4 -6
- data/lib/wakame/actions/shutdown_vm.rb +27 -6
- data/lib/wakame/actions/start_service.rb +40 -32
- data/lib/wakame/actions/stop_service.rb +8 -10
- data/lib/wakame/actions/unfreeze_cluster.rb +15 -0
- data/lib/wakame/actor.rb +2 -5
- data/lib/wakame/actor/deploy.rb +110 -0
- data/lib/wakame/actor/monitor.rb +14 -0
- data/lib/wakame/actor/s3fs.rb +45 -0
- data/lib/wakame/actor/service_monitor.rb +0 -17
- data/lib/wakame/actor/system.rb +5 -1
- data/lib/wakame/agent.rb +29 -179
- data/lib/wakame/agent_manager.rb +11 -0
- data/lib/wakame/agent_managers/actor_manager.rb +101 -0
- data/lib/wakame/agent_managers/monitor_manager.rb +48 -0
- data/lib/wakame/command.rb +4 -7
- data/lib/wakame/command/actor.rb +9 -12
- data/lib/wakame/command/agent_status.rb +2 -2
- data/lib/wakame/command/control_service.rb +66 -0
- data/lib/wakame/command/deploy_application.rb +18 -0
- data/lib/wakame/command/deploy_config.rb +16 -0
- data/lib/wakame/command/launch_cluster.rb +1 -1
- data/lib/wakame/command/launch_vm.rb +1 -1
- data/lib/wakame/command/propagate_resource.rb +1 -1
- data/lib/wakame/command/propagate_service.rb +5 -3
- data/lib/wakame/command/reload_service.rb +1 -1
- data/lib/wakame/command/shutdown_cluster.rb +1 -1
- data/lib/wakame/command/shutdown_vm.rb +37 -11
- data/lib/wakame/command/start_service.rb +1 -1
- data/lib/wakame/command/status.rb +6 -4
- data/lib/wakame/command/stop_service.rb +1 -1
- data/lib/wakame/configuration.rb +5 -0
- data/lib/wakame/event.rb +85 -33
- data/lib/wakame/event_dispatcher.rb +2 -2
- data/lib/wakame/initializer.rb +97 -31
- data/lib/wakame/master.rb +23 -346
- data/lib/wakame/master_manager.rb +11 -0
- data/lib/wakame/master_managers/action_manager.rb +321 -0
- data/lib/wakame/master_managers/agent_monitor.rb +166 -0
- data/lib/wakame/master_managers/cluster_manager.rb +176 -0
- data/lib/wakame/master_managers/command_queue.rb +133 -0
- data/lib/wakame/models/agent_pool.rb +113 -0
- data/lib/wakame/models/application_repository.rb +34 -0
- data/lib/wakame/models/object_store.rb +32 -0
- data/lib/wakame/models/service_cluster_pool.rb +36 -0
- data/lib/wakame/monitor.rb +3 -6
- data/lib/wakame/monitor/agent.rb +9 -6
- data/lib/wakame/monitor/service.rb +56 -29
- data/lib/wakame/runner/administrator_command.rb +210 -24
- data/lib/wakame/runner/agent.rb +2 -0
- data/lib/wakame/runner/master.rb +2 -1
- data/lib/wakame/service.rb +140 -130
- data/lib/wakame/status_db.rb +101 -121
- data/lib/wakame/util.rb +26 -15
- data/tests/setup_master.rb +1 -0
- data/tests/test_master.rb +0 -2
- data/tests/test_model_agent_pool.rb +21 -0
- data/tests/test_service.rb +14 -8
- data/tests/test_status_db.rb +2 -0
- data/tests/test_util.rb +12 -1
- data/wakame_generators/resource/templates/apache_app/apache_app.rb +20 -11
- data/wakame_generators/resource/templates/apache_app/conf/vh/aaa.test.conf +1 -1
- data/wakame_generators/resource/templates/apache_lb/apache_lb.rb +7 -7
- data/wakame_generators/resource/templates/apache_lb/conf/system-lb.conf +6 -4
- data/wakame_generators/resource/templates/apache_www/apache_www.rb +15 -13
- data/wakame_generators/resource/templates/ec2_elastic_ip/ec2_elastic_ip.rb +17 -17
- data/wakame_generators/resource/templates/ec2_elb/ec2_elb.rb +22 -15
- data/wakame_generators/resource/templates/mongodb/conf/mongodb.conf +95 -0
- data/wakame_generators/resource/templates/mongodb/init.d/mongodb +244 -0
- data/wakame_generators/resource/templates/mongodb/mongodb.rb +64 -0
- data/wakame_generators/resource/templates/mysql_master/mysql_master.rb +17 -21
- data/wakame_generators/resource/templates/nginx/conf/nginx.conf +4 -0
- data/wakame_generators/resource/templates/nginx/conf/vh/ec2_elb_common.conf +19 -0
- data/wakame_generators/resource/templates/nginx/init.d/nginx +6 -0
- data/wakame_generators/resource/templates/nginx/init.d/spawn-fcgi +46 -0
- data/wakame_generators/resource/templates/nginx/nginx.rb +15 -10
- data/wakame_generators/resource/templates/nginx_passenger/conf/nginx-passenger.conf +39 -0
- data/wakame_generators/resource/templates/nginx_passenger/conf/vh/ec2_elb_common.conf +10 -0
- data/wakame_generators/resource/templates/nginx_passenger/init.d/nginx-passenger +70 -0
- data/wakame_generators/resource/templates/nginx_passenger/nginx_passenger.rb +71 -0
- data/wakame_generators/resource/templates/s3fs/s3fs.rb +24 -0
- metadata +195 -74
- data/lib/wakame/action_manager.rb +0 -303
- data/lib/wakame/command/clone_service.rb +0 -12
- data/lib/wakame/command_queue.rb +0 -135
- data/lib/wakame/vm_manipulator.rb +0 -187
@@ -0,0 +1,321 @@
|
|
1
|
+
|
2
|
+
require 'thread'
|
3
|
+
require 'timeout'
|
4
|
+
|
5
|
+
|
6
|
+
module Wakame
|
7
|
+
class CancelActionError < StandardError; end
|
8
|
+
class CancelBroadcast < StandardError; end
|
9
|
+
class GlobalLockError < StandardError; end
|
10
|
+
|
11
|
+
module MasterManagers
|
12
|
+
class ActionManager
|
13
|
+
include MasterManager
|
14
|
+
attr_reader :active_jobs, :lock_queue
|
15
|
+
|
16
|
+
def master
|
17
|
+
Wakame::Master.instance
|
18
|
+
end
|
19
|
+
|
20
|
+
def command_queue
|
21
|
+
master.command_queue
|
22
|
+
end
|
23
|
+
|
24
|
+
def agent_monitor
|
25
|
+
master.agent_monitor
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize()
|
29
|
+
@active_jobs = {}
|
30
|
+
@job_history = []
|
31
|
+
@lock_queue = LockQueue.new
|
32
|
+
end
|
33
|
+
|
34
|
+
def init
|
35
|
+
end
|
36
|
+
|
37
|
+
def terminate
|
38
|
+
end
|
39
|
+
|
40
|
+
def cancel_action(job_id)
|
41
|
+
job_context = @active_jobs[job_id]
|
42
|
+
if job_context.nil?
|
43
|
+
Wakame.log.warn("JOB ID #{job_id} was not running.")
|
44
|
+
return
|
45
|
+
end
|
46
|
+
|
47
|
+
return if job_context[:complete_at]
|
48
|
+
|
49
|
+
root_act = job_context[:root_action]
|
50
|
+
|
51
|
+
walk_subactions = proc { |a|
|
52
|
+
if a.status == :running && (a.target_thread && a.target_thread.alive?) && a.target_thread != Thread.current
|
53
|
+
Wakame.log.debug "Raising CancelBroadcast exception: #{a.class} #{a.target_thread}(#{a.target_thread.status}), current=#{Thread.current}"
|
54
|
+
# Broadcast the special exception to all
|
55
|
+
a.target_thread.raise(CancelBroadcast, "It's broadcasted from #{a.class}")
|
56
|
+
# IMPORTANT: Ensure the worker thread to handle the exception.
|
57
|
+
#Thread.pass
|
58
|
+
end
|
59
|
+
a.subactions.each { |n|
|
60
|
+
walk_subactions.call(n)
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
begin
|
65
|
+
Thread.critical = true
|
66
|
+
walk_subactions.call(root_act)
|
67
|
+
ensure
|
68
|
+
Thread.critical = false
|
69
|
+
# IMPORTANT: Ensure the worker thread to handle the exception.
|
70
|
+
Thread.pass
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def trigger_action(action=nil, &blk)
|
75
|
+
if blk
|
76
|
+
action = Action::ProcAction.new(blk)
|
77
|
+
end
|
78
|
+
|
79
|
+
raise ArgumentError unless action.is_a?(Action)
|
80
|
+
context = create_job_context(action)
|
81
|
+
action.action_manager = self
|
82
|
+
action.job_id = context[:job_id]
|
83
|
+
|
84
|
+
run_action(action)
|
85
|
+
action.job_id
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
def run_action(action)
|
90
|
+
raise ArgumentError unless action.is_a?(Action)
|
91
|
+
job_context = @active_jobs[action.job_id]
|
92
|
+
raise "The job session is killed.: job_id=#{action.job_id}" if job_context.nil?
|
93
|
+
|
94
|
+
EM.next_tick {
|
95
|
+
|
96
|
+
begin
|
97
|
+
|
98
|
+
if job_context[:start_at].nil?
|
99
|
+
job_context[:start_at] = Time.new
|
100
|
+
ED.fire_event(Event::JobStart.new(action.job_id))
|
101
|
+
end
|
102
|
+
|
103
|
+
EM.defer proc {
|
104
|
+
res = nil
|
105
|
+
begin
|
106
|
+
action.bind_thread(Thread.current)
|
107
|
+
action.status = :running
|
108
|
+
Wakame.log.debug("Start action : #{action.class.to_s} #{action.parent_action.nil? ? '' : ('sub-action of ' + action.parent_action.class.to_s)}")
|
109
|
+
ED.fire_event(Event::ActionStart.new(action))
|
110
|
+
begin
|
111
|
+
action.run
|
112
|
+
action.completion_status = :succeeded
|
113
|
+
Wakame.log.debug("Complete action : #{action.class.to_s}")
|
114
|
+
ED.fire_event(Event::ActionComplete.new(action))
|
115
|
+
end
|
116
|
+
rescue CancelBroadcast => e
|
117
|
+
Wakame.log.info("Received cancel signal: #{e}")
|
118
|
+
action.completion_status = :canceled
|
119
|
+
begin
|
120
|
+
action.on_canceled
|
121
|
+
rescue => e
|
122
|
+
Wakame.log.error(e)
|
123
|
+
end
|
124
|
+
ED.fire_event(Event::ActionFailed.new(action, e))
|
125
|
+
res = e
|
126
|
+
rescue => e
|
127
|
+
Wakame.log.error("Failed action : #{action.class.to_s} due to #{e}")
|
128
|
+
Wakame.log.error(e)
|
129
|
+
action.completion_status = :failed
|
130
|
+
begin
|
131
|
+
action.on_failed
|
132
|
+
rescue => e
|
133
|
+
Wakame.log.error(e)
|
134
|
+
end
|
135
|
+
ED.fire_event(Event::ActionFailed.new(action, e))
|
136
|
+
# Escalate the cancelation event to parents.
|
137
|
+
unless action.parent_action.nil?
|
138
|
+
action.parent_action.notify(e)
|
139
|
+
end
|
140
|
+
# Force to cancel the current job when the root action ignored the elevated exception.
|
141
|
+
if action === job_context[:root_action]
|
142
|
+
Wakame.log.warn("The escalated exception (#{e.class}) has reached to the root action (#{action.class}). Forcing to cancel the current job #{job_context[:job_id]}")
|
143
|
+
cancel_action(job_context[:job_id]) #rescue Wakame.log.error($!)
|
144
|
+
end
|
145
|
+
res = e
|
146
|
+
ensure
|
147
|
+
action.status = :complete
|
148
|
+
action.bind_thread(nil)
|
149
|
+
end
|
150
|
+
|
151
|
+
StatusDB.pass {
|
152
|
+
process_job_complete(action, res)
|
153
|
+
}
|
154
|
+
}
|
155
|
+
rescue => e
|
156
|
+
Wakame.log.error(e)
|
157
|
+
end
|
158
|
+
}
|
159
|
+
end
|
160
|
+
|
161
|
+
private
|
162
|
+
def create_job_context(root_action)
|
163
|
+
raise ArgumentError unless root_action.is_a?(Action)
|
164
|
+
root_action.job_id = job_id = Wakame.gen_id
|
165
|
+
|
166
|
+
@active_jobs[job_id] = {
|
167
|
+
:job_id=>job_id,
|
168
|
+
:create_at=>Time.now,
|
169
|
+
:start_at=>nil,
|
170
|
+
:complete_at=>nil,
|
171
|
+
:completion_status=>nil,
|
172
|
+
:root_action=>root_action,
|
173
|
+
:notes=>{}
|
174
|
+
}
|
175
|
+
end
|
176
|
+
|
177
|
+
def process_job_complete(action, res)
|
178
|
+
job_id = action.job_id
|
179
|
+
job_context = @active_jobs[job_id] || return
|
180
|
+
|
181
|
+
actary = []
|
182
|
+
job_context[:root_action].walk_subactions {|a| actary << a }
|
183
|
+
#Wakame.log.debug(actary.collect{|a| {a.class.to_s=>a.status}}.inspect)
|
184
|
+
|
185
|
+
actary.all? { |act| act.status == :complete } || return
|
186
|
+
@lock_queue.quit(job_id)
|
187
|
+
|
188
|
+
if res.is_a?(Exception)
|
189
|
+
job_context[:exception]=res
|
190
|
+
end
|
191
|
+
|
192
|
+
job_context[:complete_at]=Time.now
|
193
|
+
|
194
|
+
if actary.all? { |act| act.completion_status == :succeeded }
|
195
|
+
end_status = :succeeded
|
196
|
+
else
|
197
|
+
end_status = :failed
|
198
|
+
end
|
199
|
+
job_context[:completion_status] = end_status
|
200
|
+
|
201
|
+
case end_status
|
202
|
+
when :succeeded
|
203
|
+
ED.fire_event(Event::JobSuccess.new(action.job_id))
|
204
|
+
when :failed
|
205
|
+
ED.fire_event(Event::JobFailed.new(action.job_id, res))
|
206
|
+
end
|
207
|
+
ED.fire_event(Event::JobComplete.new(action.job_id, end_status))
|
208
|
+
|
209
|
+
@active_jobs.delete(job_id)
|
210
|
+
end
|
211
|
+
|
212
|
+
end
|
213
|
+
|
214
|
+
class LockQueue
|
215
|
+
def initialize()
|
216
|
+
@locks = {}
|
217
|
+
@id2res = {}
|
218
|
+
|
219
|
+
@self_m = ::Mutex.new
|
220
|
+
|
221
|
+
@queue_by_thread = {}
|
222
|
+
@qbt_m = ::Mutex.new
|
223
|
+
end
|
224
|
+
|
225
|
+
def set(resource, id)
|
226
|
+
@self_m.synchronize {
|
227
|
+
# Ths Job ID already holds/reserves the lock regarding the resource.
|
228
|
+
return if @id2res.has_key?(id) && @id2res[id].has_key?(resource.to_s)
|
229
|
+
|
230
|
+
@locks[resource.to_s] ||= []
|
231
|
+
@id2res[id] ||= {}
|
232
|
+
|
233
|
+
@id2res[id][resource.to_s]=1
|
234
|
+
@locks[resource.to_s] << id
|
235
|
+
}
|
236
|
+
Wakame.log.debug("#{self.class}: set(#{resource.to_s}, #{id})" + "\n#{self.inspect}")
|
237
|
+
end
|
238
|
+
|
239
|
+
def reset()
|
240
|
+
@self_m.synchronize {
|
241
|
+
@locks.keys { |k|
|
242
|
+
@locks[k].clear
|
243
|
+
}
|
244
|
+
@id2res.clear
|
245
|
+
}
|
246
|
+
end
|
247
|
+
|
248
|
+
def test(id)
|
249
|
+
@self_m.synchronize {
|
250
|
+
reslist = @id2res[id]
|
251
|
+
return :pass if reslist.nil? || reslist.empty?
|
252
|
+
|
253
|
+
if reslist.keys.all? { |r| id == @locks[r.to_s][0] }
|
254
|
+
return :runnable
|
255
|
+
else
|
256
|
+
return :wait
|
257
|
+
end
|
258
|
+
}
|
259
|
+
end
|
260
|
+
|
261
|
+
def wait(id, tout=60*30)
|
262
|
+
@qbt_m.synchronize { @queue_by_thread[Thread.current] = ::Queue.new }
|
263
|
+
|
264
|
+
timeout(tout) {
|
265
|
+
while test(id) == :wait
|
266
|
+
Wakame.log.debug("#{self.class}: Job #{id} waits for locked resouces: #{@id2res[id].keys.join(', ')}")
|
267
|
+
break if id == @queue_by_thread[Thread.current].deq
|
268
|
+
end
|
269
|
+
}
|
270
|
+
ensure
|
271
|
+
@qbt_m.synchronize { @queue_by_thread.delete(Thread.current) }
|
272
|
+
end
|
273
|
+
|
274
|
+
def quit(id)
|
275
|
+
case test(id)
|
276
|
+
when :runnable, :wait
|
277
|
+
@self_m.synchronize {
|
278
|
+
@id2res[id].keys.each { |r| @locks[r.to_s].delete_if{ |i| i == id } }
|
279
|
+
@locks.delete_if{ |k,v| v.nil? || v.empty? }
|
280
|
+
}
|
281
|
+
@qbt_m.synchronize {
|
282
|
+
@queue_by_thread.each {|t, q| q.enq(id) }
|
283
|
+
}
|
284
|
+
end
|
285
|
+
|
286
|
+
@id2res.delete(id)
|
287
|
+
Wakame.log.debug("#{self.class}: quit(#{id})" + "\n#{self.inspect}")
|
288
|
+
end
|
289
|
+
|
290
|
+
def clear_resource(resource)
|
291
|
+
end
|
292
|
+
|
293
|
+
def inspect
|
294
|
+
output = @locks.collect { |k, lst|
|
295
|
+
[k, lst].flatten
|
296
|
+
}
|
297
|
+
return "" if output.empty?
|
298
|
+
|
299
|
+
# Table display
|
300
|
+
maxcolws = (0..(output.size)).zip(*output).collect { |i| i.shift; i.map!{|i| (i.nil? ? "" : i).length }.max }
|
301
|
+
maxcol = maxcolws.size
|
302
|
+
maxcolws.reverse.each { |i|
|
303
|
+
break if i > 0
|
304
|
+
maxcol -= 1
|
305
|
+
}
|
306
|
+
|
307
|
+
textrows = output.collect { |x|
|
308
|
+
buf=""
|
309
|
+
maxcol.times { |n|
|
310
|
+
buf << "|" + (x[n] || "").ljust(maxcolws[n])
|
311
|
+
}
|
312
|
+
buf << "|"
|
313
|
+
}
|
314
|
+
|
315
|
+
"+" + (["-"] * (textrows[0].length - 2)).join('') + "+\n" + \
|
316
|
+
textrows.join("\n") + \
|
317
|
+
"\n+" + (["-"] * (textrows[0].length - 2)).join('')+ "+"
|
318
|
+
end
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
@@ -0,0 +1,166 @@
|
|
1
|
+
|
2
|
+
module Wakame
|
3
|
+
module MasterManagers
|
4
|
+
class AgentMonitor
|
5
|
+
include MasterManager
|
6
|
+
include ThreadImmutable
|
7
|
+
|
8
|
+
def init
|
9
|
+
@agent_timeout = 301.to_f
|
10
|
+
@agent_kill_timeout = @agent_timeout * 2
|
11
|
+
@gc_period = 20.to_f
|
12
|
+
|
13
|
+
# GC event trigger for agent timer & status
|
14
|
+
@agent_timeout_timer = EM::PeriodicTimer.new(@gc_period) {
|
15
|
+
StatusDB.pass {
|
16
|
+
#Wakame.log.debug("Started agent GC : agents.size=#{@registered_agents.size}")
|
17
|
+
self.agent_pool.dataset.all.each { |row|
|
18
|
+
agent = Service::Agent.find(row[:agent_id])
|
19
|
+
#next if agent.status == Service::Agent::STATUS_OFFLINE
|
20
|
+
|
21
|
+
diff_time = Time.now - agent.last_ping_at_time
|
22
|
+
#Wakame.log.debug "AgentMonitor GC : #{agent_id}: #{diff_time}"
|
23
|
+
if diff_time > @agent_timeout.to_f
|
24
|
+
agent.update_monitor_status(Service::Agent::STATUS_TIMEOUT)
|
25
|
+
end
|
26
|
+
|
27
|
+
if diff_time > @agent_kill_timeout.to_f
|
28
|
+
agent_pool.unregister(agent)
|
29
|
+
end
|
30
|
+
}
|
31
|
+
|
32
|
+
#Wakame.log.debug("Finished agent GC")
|
33
|
+
}
|
34
|
+
}
|
35
|
+
|
36
|
+
|
37
|
+
master.add_subscriber('registry') { |data|
|
38
|
+
data = eval(data)
|
39
|
+
next if Time.parse(data[:responded_at]) < master.started_at
|
40
|
+
|
41
|
+
StatusDB.pass {
|
42
|
+
agent_id = data[:agent_id]
|
43
|
+
|
44
|
+
agent = agent_pool.agent_find_or_create(agent_id)
|
45
|
+
|
46
|
+
case data[:class_type]
|
47
|
+
when 'Wakame::Packets::Register'
|
48
|
+
agent.update_status(Service::Agent::STATUS_REGISTERRING)
|
49
|
+
agent_pool.register_as_observed(agent)
|
50
|
+
|
51
|
+
agent.root_path = data[:root_path]
|
52
|
+
|
53
|
+
agent.save
|
54
|
+
master.action_manager.trigger_action(Actions::RegisterAgent.new(agent))
|
55
|
+
when 'Wakame::Packets::UnRegister'
|
56
|
+
agent_pool.unregister(agent)
|
57
|
+
end
|
58
|
+
}
|
59
|
+
|
60
|
+
}
|
61
|
+
|
62
|
+
master.add_subscriber('ping') { |data|
|
63
|
+
ping = eval(data)
|
64
|
+
# Skip the old ping responses before starting master node.
|
65
|
+
next if Time.parse(ping[:responded_at]) < master.started_at
|
66
|
+
|
67
|
+
# Variable update function for the common members
|
68
|
+
set_report_values = proc { |agent|
|
69
|
+
agent.last_ping_at = ping[:responded_at]
|
70
|
+
|
71
|
+
agent.renew_reported_services(ping[:services])
|
72
|
+
agent.save
|
73
|
+
|
74
|
+
agent.update_monitor_status(Service::Agent::STATUS_ONLINE)
|
75
|
+
}
|
76
|
+
|
77
|
+
StatusDB.pass {
|
78
|
+
agent = Service::Agent.find(ping[:agent_id])
|
79
|
+
if agent.nil?
|
80
|
+
agent = Service::Agent.new
|
81
|
+
agent.id = ping[:agent_id]
|
82
|
+
|
83
|
+
set_report_values.call(agent)
|
84
|
+
|
85
|
+
agent_pool.register_as_observed(agent)
|
86
|
+
else
|
87
|
+
set_report_values.call(agent)
|
88
|
+
end
|
89
|
+
|
90
|
+
EventDispatcher.fire_event(Event::AgentPong.new(agent))
|
91
|
+
}
|
92
|
+
}
|
93
|
+
|
94
|
+
master.add_subscriber('agent_event') { |data|
|
95
|
+
response = eval(data)
|
96
|
+
next if Time.parse(response[:responded_at]) < master.started_at
|
97
|
+
|
98
|
+
case response[:class_type]
|
99
|
+
when 'Wakame::Packets::StatusCheckResult'
|
100
|
+
StatusDB.pass {
|
101
|
+
svc_inst = Service::ServiceInstance.find(response[:svc_id])
|
102
|
+
if svc_inst
|
103
|
+
svc_inst.monitor_status = response[:status]
|
104
|
+
svc_inst.save
|
105
|
+
else
|
106
|
+
Wakame.log.error("#{self.class}: Unknown service ID: #{response[:svc_id]}")
|
107
|
+
agent = Service::Agent.find(response[:agent_id])
|
108
|
+
correct_svc_monitor_mismatch(agent)
|
109
|
+
end
|
110
|
+
}
|
111
|
+
when 'Wakame::Packets::ServiceStatusChanged'
|
112
|
+
StatusDB.pass {
|
113
|
+
svc_inst = Service::ServiceInstance.find(response[:svc_id])
|
114
|
+
if svc_inst
|
115
|
+
response_time = Time.parse(response[:responded_at])
|
116
|
+
svc_inst.update_monitor_status(response[:new_status], response_time, response[:fail_message])
|
117
|
+
end
|
118
|
+
}
|
119
|
+
when 'Wakame::Packets::ActorResponse'
|
120
|
+
case response[:status]
|
121
|
+
when Actor::STATUS_RUNNING
|
122
|
+
EventDispatcher.fire_event(Event::ActorProgress.new(response[:agent_id], response[:token], 0))
|
123
|
+
when Actor::STATUS_FAILED
|
124
|
+
EventDispatcher.fire_event(Event::ActorComplete.new(response[:agent_id], response[:token], response[:status], nil))
|
125
|
+
else
|
126
|
+
EventDispatcher.fire_event(Event::ActorComplete.new(response[:agent_id], response[:token], response[:status], response[:opts][:return_value]))
|
127
|
+
end
|
128
|
+
else
|
129
|
+
Wakame.log.warn("#{self.class}: Unhandled agent response: #{response[:class_type]}")
|
130
|
+
end
|
131
|
+
}
|
132
|
+
|
133
|
+
EventDispatcher.subscribe(Event::AgentUnMonitored) { |event|
|
134
|
+
StatusDB.pass {
|
135
|
+
agent = Service::Agent.find(event.agent.id)
|
136
|
+
agent.terminate
|
137
|
+
}
|
138
|
+
}
|
139
|
+
end
|
140
|
+
|
141
|
+
def terminate
|
142
|
+
@agent_timeout_timer.cancel
|
143
|
+
end
|
144
|
+
|
145
|
+
def agent_pool
|
146
|
+
Models::AgentPool.instance
|
147
|
+
end
|
148
|
+
|
149
|
+
|
150
|
+
private
|
151
|
+
def correct_svc_monitor_mismatch(agent)
|
152
|
+
if agent.mapped?
|
153
|
+
agent.cloud_host.live_monitors.each { |path, conf|
|
154
|
+
Wakame.log.debug("#{self.class}: Refreshing monitoring setting on #{agent.id}: #{path} => #{conf.inspect}")
|
155
|
+
agent.actor_request('/monitor/reload', path, conf).request
|
156
|
+
}
|
157
|
+
else
|
158
|
+
Wakame.log.debug("#{self.class}: Resetting monitoring setting on #{agent.id}")
|
159
|
+
agent.actor_request('/monitor/reload', '/service', {}).request
|
160
|
+
end
|
161
|
+
|
162
|
+
end
|
163
|
+
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|