wakame 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +20 -0
- data/README.rdoc +63 -0
- data/Rakefile +86 -0
- data/VERSION +1 -0
- data/app_generators/wakame/templates/README +0 -0
- data/app_generators/wakame/templates/Rakefile +18 -0
- data/app_generators/wakame/templates/bin/wakame-agent +9 -0
- data/app_generators/wakame/templates/bin/wakame-master +9 -0
- data/app_generators/wakame/templates/bin/wakameadm +9 -0
- data/app_generators/wakame/templates/cluster/resources/apache_app/apache_app.rb +54 -0
- data/app_generators/wakame/templates/cluster/resources/apache_app/conf/apache2.conf +46 -0
- data/app_generators/wakame/templates/cluster/resources/apache_app/conf/envvars-app +7 -0
- data/app_generators/wakame/templates/cluster/resources/apache_app/conf/sites-app.conf +23 -0
- data/app_generators/wakame/templates/cluster/resources/apache_app/conf/system-app.conf +67 -0
- data/app_generators/wakame/templates/cluster/resources/apache_app/init.d/apache2-app +192 -0
- data/app_generators/wakame/templates/cluster/resources/apache_lb/apache_lb.rb +56 -0
- data/app_generators/wakame/templates/cluster/resources/apache_lb/conf/apache2.conf +46 -0
- data/app_generators/wakame/templates/cluster/resources/apache_lb/conf/envvars-lb +6 -0
- data/app_generators/wakame/templates/cluster/resources/apache_lb/conf/sites-lb.conf +54 -0
- data/app_generators/wakame/templates/cluster/resources/apache_lb/conf/system-lb.conf +75 -0
- data/app_generators/wakame/templates/cluster/resources/apache_lb/init.d/apache2-lb +192 -0
- data/app_generators/wakame/templates/cluster/resources/apache_www/apache_www.rb +50 -0
- data/app_generators/wakame/templates/cluster/resources/apache_www/conf/apache2.conf +47 -0
- data/app_generators/wakame/templates/cluster/resources/apache_www/conf/envvars-www +7 -0
- data/app_generators/wakame/templates/cluster/resources/apache_www/conf/sites-www.conf +23 -0
- data/app_generators/wakame/templates/cluster/resources/apache_www/conf/system-www.conf +63 -0
- data/app_generators/wakame/templates/cluster/resources/apache_www/init.d/apache2-www +192 -0
- data/app_generators/wakame/templates/cluster/resources/ec2_elastic_ip/ec2_elastic_ip.rb +39 -0
- data/app_generators/wakame/templates/cluster/resources/mysql_master/conf/my.cnf +154 -0
- data/app_generators/wakame/templates/cluster/resources/mysql_master/init.d/mysql +185 -0
- data/app_generators/wakame/templates/cluster/resources/mysql_master/mysql_master.rb +174 -0
- data/app_generators/wakame/templates/config/boot.rb +85 -0
- data/app_generators/wakame/templates/config/cluster.rb +64 -0
- data/app_generators/wakame/templates/config/environments/common.rb +0 -0
- data/app_generators/wakame/templates/config/environments/ec2.rb +3 -0
- data/app_generators/wakame/templates/config/environments/stand_alone.rb +0 -0
- data/app_generators/wakame/templates/config/init.d/wakame-agent +72 -0
- data/app_generators/wakame/templates/config/init.d/wakame-master +73 -0
- data/app_generators/wakame/wakame_generator.rb +124 -0
- data/bin/wakame +18 -0
- data/contrib/imagesetup.sh +77 -0
- data/lib/ext/eventmachine.rb +86 -0
- data/lib/ext/shellwords.rb +172 -0
- data/lib/ext/uri.rb +15 -0
- data/lib/wakame/action.rb +156 -0
- data/lib/wakame/actions/destroy_instances.rb +39 -0
- data/lib/wakame/actions/launch_cluster.rb +31 -0
- data/lib/wakame/actions/migrate_service.rb +65 -0
- data/lib/wakame/actions/propagate_instances.rb +95 -0
- data/lib/wakame/actions/reload_service.rb +21 -0
- data/lib/wakame/actions/scaleout_when_high_load.rb +44 -0
- data/lib/wakame/actions/shutdown_cluster.rb +22 -0
- data/lib/wakame/actions/shutdown_vm.rb +19 -0
- data/lib/wakame/actions/start_service.rb +64 -0
- data/lib/wakame/actions/stop_service.rb +49 -0
- data/lib/wakame/actions/util.rb +71 -0
- data/lib/wakame/actor/daemon.rb +37 -0
- data/lib/wakame/actor/service_monitor.rb +21 -0
- data/lib/wakame/actor/system.rb +46 -0
- data/lib/wakame/actor.rb +33 -0
- data/lib/wakame/agent.rb +226 -0
- data/lib/wakame/amqp_client.rb +219 -0
- data/lib/wakame/command/action_status.rb +62 -0
- data/lib/wakame/command/actor.rb +23 -0
- data/lib/wakame/command/clone_service.rb +12 -0
- data/lib/wakame/command/launch_cluster.rb +15 -0
- data/lib/wakame/command/migrate_service.rb +21 -0
- data/lib/wakame/command/propagate_service.rb +24 -0
- data/lib/wakame/command/shutdown_cluster.rb +15 -0
- data/lib/wakame/command/status.rb +81 -0
- data/lib/wakame/command.rb +31 -0
- data/lib/wakame/command_queue.rb +44 -0
- data/lib/wakame/configuration.rb +93 -0
- data/lib/wakame/daemonize.rb +96 -0
- data/lib/wakame/event.rb +232 -0
- data/lib/wakame/event_dispatcher.rb +154 -0
- data/lib/wakame/graph.rb +79 -0
- data/lib/wakame/initializer.rb +162 -0
- data/lib/wakame/instance_counter.rb +78 -0
- data/lib/wakame/logger.rb +12 -0
- data/lib/wakame/manager/commands.rb +134 -0
- data/lib/wakame/master.rb +369 -0
- data/lib/wakame/monitor/agent.rb +50 -0
- data/lib/wakame/monitor/service.rb +183 -0
- data/lib/wakame/monitor.rb +69 -0
- data/lib/wakame/packets.rb +160 -0
- data/lib/wakame/queue_declare.rb +14 -0
- data/lib/wakame/rule.rb +116 -0
- data/lib/wakame/rule_engine.rb +202 -0
- data/lib/wakame/runner/administrator_command.rb +112 -0
- data/lib/wakame/runner/agent.rb +81 -0
- data/lib/wakame/runner/master.rb +93 -0
- data/lib/wakame/scheduler.rb +251 -0
- data/lib/wakame/service.rb +914 -0
- data/lib/wakame/template.rb +189 -0
- data/lib/wakame/trigger.rb +66 -0
- data/lib/wakame/triggers/instance_count_update.rb +45 -0
- data/lib/wakame/triggers/load_history.rb +107 -0
- data/lib/wakame/triggers/maintain_ssh_known_hosts.rb +43 -0
- data/lib/wakame/triggers/process_command.rb +34 -0
- data/lib/wakame/triggers/shutdown_unused_vm.rb +16 -0
- data/lib/wakame/util.rb +569 -0
- data/lib/wakame/vm_manipulator.rb +186 -0
- data/lib/wakame.rb +59 -0
- data/tasks/ec2.rake +127 -0
- data/tests/cluster.json +3 -0
- data/tests/conf/a +1 -0
- data/tests/conf/b +1 -0
- data/tests/conf/c +1 -0
- data/tests/setup_agent.rb +39 -0
- data/tests/setup_master.rb +28 -0
- data/tests/test_actor.rb +54 -0
- data/tests/test_agent.rb +218 -0
- data/tests/test_amqp_client.rb +94 -0
- data/tests/test_graph.rb +36 -0
- data/tests/test_master.rb +167 -0
- data/tests/test_monitor.rb +47 -0
- data/tests/test_rule_engine.rb +127 -0
- data/tests/test_scheduler.rb +123 -0
- data/tests/test_service.rb +60 -0
- data/tests/test_template.rb +67 -0
- data/tests/test_uri_amqp.rb +19 -0
- data/tests/test_util.rb +71 -0
- data/wakame_generators/resource/resource_generator.rb +54 -0
- data/wakame_generators/resource/templates/apache_app/apache_app.rb +60 -0
- data/wakame_generators/resource/templates/apache_app/conf/apache2.conf +46 -0
- data/wakame_generators/resource/templates/apache_app/conf/envvars-app +7 -0
- data/wakame_generators/resource/templates/apache_app/conf/sites-app.conf +23 -0
- data/wakame_generators/resource/templates/apache_app/conf/system-app.conf +67 -0
- data/wakame_generators/resource/templates/apache_app/init.d/apache2-app +192 -0
- data/wakame_generators/resource/templates/apache_lb/apache_lb.rb +67 -0
- data/wakame_generators/resource/templates/apache_lb/conf/apache2.conf +46 -0
- data/wakame_generators/resource/templates/apache_lb/conf/envvars-lb +6 -0
- data/wakame_generators/resource/templates/apache_lb/conf/sites-lb.conf +54 -0
- data/wakame_generators/resource/templates/apache_lb/conf/system-lb.conf +75 -0
- data/wakame_generators/resource/templates/apache_lb/init.d/apache2-lb +192 -0
- data/wakame_generators/resource/templates/apache_www/apache_www.rb +56 -0
- data/wakame_generators/resource/templates/apache_www/conf/apache2.conf +47 -0
- data/wakame_generators/resource/templates/apache_www/conf/envvars-www +7 -0
- data/wakame_generators/resource/templates/apache_www/conf/sites-www.conf +23 -0
- data/wakame_generators/resource/templates/apache_www/conf/system-www.conf +63 -0
- data/wakame_generators/resource/templates/apache_www/init.d/apache2-www +192 -0
- data/wakame_generators/resource/templates/ec2_elastic_ip/ec2_elastic_ip.rb +39 -0
- data/wakame_generators/resource/templates/mysql_master/conf/my.cnf +154 -0
- data/wakame_generators/resource/templates/mysql_master/init.d/mysql +185 -0
- data/wakame_generators/resource/templates/mysql_master/mysql_master.rb +119 -0
- metadata +289 -0
@@ -0,0 +1,369 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
|
5
|
+
require 'wakame'
|
6
|
+
require 'wakame/packets'
|
7
|
+
require 'wakame/service'
|
8
|
+
require 'wakame/queue_declare'
|
9
|
+
require 'wakame/vm_manipulator'
|
10
|
+
|
11
|
+
module Wakame
|
12
|
+
|
13
|
+
class AgentMonitor
|
14
|
+
include ThreadImmutable
|
15
|
+
attr_reader :registered_agents, :unregistered_agents, :master, :gc_period
|
16
|
+
|
17
|
+
|
18
|
+
def initialize(master)
|
19
|
+
bind_thread
|
20
|
+
@master = master
|
21
|
+
@registered_agents = {}
|
22
|
+
@unregistered_agents = {}
|
23
|
+
@agent_timeout = 31.to_f
|
24
|
+
@agent_kill_timeout = @agent_timeout * 2
|
25
|
+
@gc_period = 20.to_f
|
26
|
+
|
27
|
+
# GC event trigger for agent timer & status
|
28
|
+
calc_agent_timeout = proc {
|
29
|
+
#Wakame.log.debug("Started agent GC : agents.size=#{@registered_agents.size}")
|
30
|
+
kill_list=[]
|
31
|
+
registered_agents.each { |agent_id, agent|
|
32
|
+
next if agent.status == Service::Agent::STATUS_OFFLINE
|
33
|
+
diff_time = Time.now - agent.last_ping_at
|
34
|
+
#Wakame.log.debug "AgentMonitor GC : #{agent_id}: #{diff_time}"
|
35
|
+
if diff_time > @agent_timeout.to_f
|
36
|
+
agent.status = Service::Agent::STATUS_TIMEOUT
|
37
|
+
end
|
38
|
+
|
39
|
+
if diff_time > @agent_kill_timeout.to_f
|
40
|
+
kill_list << agent_id
|
41
|
+
end
|
42
|
+
}
|
43
|
+
|
44
|
+
kill_list.each { |agent_id|
|
45
|
+
agent = @agents.delete(agent_id)
|
46
|
+
ED.fire_event(Event::AgentUnMonitored.new(agent)) unless agent.nil?
|
47
|
+
}
|
48
|
+
#Wakame.log.debug("Finished agent GC")
|
49
|
+
}
|
50
|
+
|
51
|
+
@agent_timeout_timer = EventMachine::PeriodicTimer.new(@gc_period, calc_agent_timeout)
|
52
|
+
|
53
|
+
master.add_subscriber('registry') { |data|
|
54
|
+
data = eval(data)
|
55
|
+
|
56
|
+
agent_id = data[:agent_id]
|
57
|
+
case data[:type]
|
58
|
+
when 'Wakame::Packets::Register'
|
59
|
+
register_agent(data)
|
60
|
+
when 'Wakame::Packets::UnRegister'
|
61
|
+
unregister_agent(agent_id)
|
62
|
+
end
|
63
|
+
}
|
64
|
+
|
65
|
+
master.add_subscriber('ping') { |data|
|
66
|
+
ping = eval(data)
|
67
|
+
# Skip the old ping responses before starting master node.
|
68
|
+
next if Time.parse(ping[:responded_at]) < master.started_at
|
69
|
+
|
70
|
+
# Variable update function for the common members
|
71
|
+
set_report_values = proc { |agent|
|
72
|
+
agent.status = Service::Agent::STATUS_ONLINE
|
73
|
+
agent.uptime = 0
|
74
|
+
agent.last_ping_at = Time.parse(ping[:responded_at])
|
75
|
+
|
76
|
+
agent.attr = ping[:attrs]
|
77
|
+
|
78
|
+
agent.services.clear
|
79
|
+
ping.services.each { |svc_id, i|
|
80
|
+
agent.services[svc_id] = master.service_cluster.instances[svc_id]
|
81
|
+
}
|
82
|
+
}
|
83
|
+
|
84
|
+
agent = agent(ping[:agent_id])
|
85
|
+
if agent.nil?
|
86
|
+
agent = Service::Agent.new(ping[:agent_id])
|
87
|
+
|
88
|
+
set_report_values.call(agent)
|
89
|
+
|
90
|
+
unregistered_agents[ping[:agent_id]]=agent
|
91
|
+
else
|
92
|
+
set_report_values.call(agent)
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
ED.fire_event(Event::AgentPong.new(agent))
|
97
|
+
}
|
98
|
+
|
99
|
+
master.add_subscriber('agent_event') { |data|
|
100
|
+
response = eval(data)
|
101
|
+
#p response
|
102
|
+
case response[:type]
|
103
|
+
when 'Wakame::Packets::ServiceStatusChanged'
|
104
|
+
svc_inst = Service::ServiceInstance.instance_collection[response[:svc_id]]
|
105
|
+
if svc_inst
|
106
|
+
response_time = Time.parse(response[:responded_at])
|
107
|
+
svc_inst.update_status(response[:new_status], response_time, response[:fail_message])
|
108
|
+
|
109
|
+
# tmp_event = Event::ServiceStatusChanged.new(response[:svc_id], svc_inst.property, response[:status], response[:previous_status])
|
110
|
+
# tmp_event.time = response_time
|
111
|
+
# ED.fire_event(tmp_event)
|
112
|
+
|
113
|
+
# if response[:previous_status] != Service::STATUS_ONLINE && response[:new_status] == Service::STATUS_ONLINE
|
114
|
+
# tmp_event = Event::ServiceOnline.new(tmp_event.instance_id, svc_inst.property)
|
115
|
+
# tmp_event.time = response_time
|
116
|
+
# ED.fire_event(tmp_event)
|
117
|
+
# elsif response[:previous_status] != Service::STATUS_OFFLINE && response[:new_status] == Service::STATUS_OFFLINE
|
118
|
+
# tmp_event = Event::ServiceOffline.new(tmp_event.instance_id, svc_inst.property)
|
119
|
+
# tmp_event.time = response_time
|
120
|
+
# ED.fire_event(tmp_event)
|
121
|
+
# elsif response[:previous_status] != Service::STATUS_FAIL && response[:new_status] == Service::STATUS_FAIL
|
122
|
+
# tmp_event = Event::ServiceFailed.new(tmp_event.instance_id, svc_inst.property, response[:fail_message])
|
123
|
+
# tmp_event.time = response_time
|
124
|
+
# ED.fire_event(tmp_event)
|
125
|
+
# end
|
126
|
+
end
|
127
|
+
when 'Wakame::Packets::ActorResponse'
|
128
|
+
case response[:status]
|
129
|
+
when Actor::STATUS_RUNNING
|
130
|
+
ED.fire_event(Event::ActorProgress.new(response[:agent_id], response[:token], 0))
|
131
|
+
else
|
132
|
+
ED.fire_event(Event::ActorComplete.new(response[:agent_id], response[:token], response[:status]))
|
133
|
+
end
|
134
|
+
else
|
135
|
+
Wakame.log.warn("#{self.class}: Unhandled agent response: #{response[:type]}")
|
136
|
+
end
|
137
|
+
}
|
138
|
+
|
139
|
+
end
|
140
|
+
|
141
|
+
|
142
|
+
def agent(agent_id)
|
143
|
+
registered_agents[agent_id] || unregistered_agents[agent_id]
|
144
|
+
end
|
145
|
+
|
146
|
+
def register_agent(data)
|
147
|
+
agent_id = data[:agent_id]
|
148
|
+
agent = registered_agents[agent_id]
|
149
|
+
if agent.nil?
|
150
|
+
agent = unregistered_agents[agent_id]
|
151
|
+
if agent.nil?
|
152
|
+
# The agent is going to be registered at first time.
|
153
|
+
agent = Service::Agent.new(agent_id)
|
154
|
+
registered_agents[agent_id] = agent
|
155
|
+
else
|
156
|
+
# Move the reference from unregistered group to the registered group.
|
157
|
+
registered_agents[agent_id] = unregistered_agents[agent_id]
|
158
|
+
unregistered_agents.delete(agent_id)
|
159
|
+
end
|
160
|
+
Wakame.log.debug("The Agent has been registered: #{data.inspect}")
|
161
|
+
#Wakame.log.debug(unregistered_agents)
|
162
|
+
ED.fire_event(Event::AgentMonitored.new(agent))
|
163
|
+
end
|
164
|
+
agent.root_path = data[:root_path]
|
165
|
+
end
|
166
|
+
|
167
|
+
def unregister_agent(agent_id)
|
168
|
+
agent = registered_agents[agent_id]
|
169
|
+
if agent
|
170
|
+
unregistered_agents[agent_id] = registered_agents[agent_id]
|
171
|
+
registered_agents.delete(agent_id)
|
172
|
+
ED.fire_event(Event::AgentUnMonitored.new(agent))
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
|
177
|
+
# def bind_agent(service_instance, &filter)
|
178
|
+
# agent_id, agent = @agents.find { |agent_id, agent|
|
179
|
+
|
180
|
+
# next false if agent.has_service_type?(service_instance.property.class)
|
181
|
+
# filter.call(agent)
|
182
|
+
# }
|
183
|
+
# return nil if agent.nil?
|
184
|
+
# service_instance.bind_agent(agent)
|
185
|
+
# agent
|
186
|
+
# end
|
187
|
+
|
188
|
+
# def unbind_agent(service_instance)
|
189
|
+
# service_instance.unbind_agent
|
190
|
+
# end
|
191
|
+
|
192
|
+
# Retruns the master local agent object
|
193
|
+
def master_local
|
194
|
+
agent = registered_agents[@master.master_local_agent_id]
|
195
|
+
puts "#{agent} = registered_agents[#{@master.master_local_agent_id}]"
|
196
|
+
raise "Master does not identify the master local agent yet." if agent.nil?
|
197
|
+
agent
|
198
|
+
end
|
199
|
+
|
200
|
+
def each_online(&blk)
|
201
|
+
registered_agents.each { |k, v|
|
202
|
+
next if v.status != Service::Agent::STATUS_ONLINE
|
203
|
+
blk.call(v)
|
204
|
+
}
|
205
|
+
end
|
206
|
+
|
207
|
+
def dump_status
|
208
|
+
ag = []
|
209
|
+
res = {:registered=>[], :unregistered=>[]}
|
210
|
+
|
211
|
+
@registered_agents.each { |key, a|
|
212
|
+
res[:registered] << a.dump_status
|
213
|
+
}
|
214
|
+
@unregistered_agents.each { |key, a|
|
215
|
+
res[:unregistered] << a.dump_status
|
216
|
+
}
|
217
|
+
res
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
class Master
|
222
|
+
include Wakame::AMQPClient
|
223
|
+
include Wakame::QueueDeclare
|
224
|
+
|
225
|
+
define_queue 'agent_event', 'agent_event'
|
226
|
+
define_queue 'ping', 'ping'
|
227
|
+
define_queue 'registry', 'registry'
|
228
|
+
|
229
|
+
attr_reader :command_queue, :agent_monitor, :configuration, :service_cluster, :started_at
|
230
|
+
|
231
|
+
def initialize(opts={})
|
232
|
+
pre_setup
|
233
|
+
|
234
|
+
connect(opts) {
|
235
|
+
post_setup
|
236
|
+
}
|
237
|
+
Wakame.log.info("Started master process : WAKAME_ROOT=#{Wakame.config.root_path} WAKAME_ENV=#{Wakame.config.environment}")
|
238
|
+
end
|
239
|
+
|
240
|
+
|
241
|
+
# def send_agent_command(command, agent_id=nil)
|
242
|
+
# raise TypeError unless command.is_a? Packets::RequestBase
|
243
|
+
# EM.next_tick {
|
244
|
+
# if agent_id
|
245
|
+
# publish_to('agent_command', "agent_id.#{agent_id}", Marshal.dump(command))
|
246
|
+
# else
|
247
|
+
# publish_to('agent_command', '*', Marshal.dump(command))
|
248
|
+
# end
|
249
|
+
# }
|
250
|
+
# end
|
251
|
+
|
252
|
+
def actor_request(agent_id, path, *args)
|
253
|
+
request = Wakame::Packets::ActorRequest.new(agent_id, Util.gen_id, path, *args)
|
254
|
+
ActorRequest.new(self, request)
|
255
|
+
end
|
256
|
+
|
257
|
+
|
258
|
+
def attr
|
259
|
+
agent_monitor.master_local.attr
|
260
|
+
end
|
261
|
+
|
262
|
+
|
263
|
+
def cleanup
|
264
|
+
@command_queue.shutdown
|
265
|
+
end
|
266
|
+
|
267
|
+
def master_local_agent_id
|
268
|
+
@master_local_agent_id
|
269
|
+
end
|
270
|
+
|
271
|
+
private
|
272
|
+
def determine_agent_id
|
273
|
+
if Wakame.config.environment == :EC2
|
274
|
+
@master_local_agent_id = VmManipulator::EC2::MetadataService.query_metadata_uri('instance-id')
|
275
|
+
else
|
276
|
+
@master_local_agent_id = VmManipulator::StandAlone::INSTANCE_ID
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
def pre_setup
|
281
|
+
determine_agent_id
|
282
|
+
@started_at = Time.now
|
283
|
+
|
284
|
+
EM.barrier {
|
285
|
+
Wakame.log.debug("Binding thread info to EventDispatcher.")
|
286
|
+
EventDispatcher.instance.bind_thread(Thread.current)
|
287
|
+
}
|
288
|
+
end
|
289
|
+
|
290
|
+
def post_setup
|
291
|
+
raise 'has to be put in EM.run context' unless EM.reactor_running?
|
292
|
+
@command_queue = CommandQueue.new(self)
|
293
|
+
@agent_monitor = AgentMonitor.new(self)
|
294
|
+
|
295
|
+
@service_cluster = Util.new_(Wakame.config.cluster_class, self)
|
296
|
+
end
|
297
|
+
|
298
|
+
end
|
299
|
+
|
300
|
+
|
301
|
+
class ActorRequest
|
302
|
+
attr_reader :master
|
303
|
+
|
304
|
+
def initialize(master, packet)
|
305
|
+
raise TypeError unless packet.is_a?(Wakame::Packets::ActorRequest)
|
306
|
+
|
307
|
+
@master = master
|
308
|
+
@packet = packet
|
309
|
+
@requested = false
|
310
|
+
@event_ticket = nil
|
311
|
+
@wait_lock = ::Queue.new
|
312
|
+
end
|
313
|
+
|
314
|
+
|
315
|
+
def request
|
316
|
+
raise "The request has already been sent." if @requested
|
317
|
+
|
318
|
+
@event_ticket = ED.subscribe(Event::ActorComplete) { |event|
|
319
|
+
if event.token == @packet.token
|
320
|
+
|
321
|
+
# Any of status except RUNNING are accomplishment of the actor request.
|
322
|
+
Wakame.log.debug("#{self.class}: The actor request has been completed: token=#{self.token}, status=#{event.status}")
|
323
|
+
ED.unsubscribe(@event_ticket)
|
324
|
+
@wait_lock.enq(event.status)
|
325
|
+
end
|
326
|
+
}
|
327
|
+
Wakame.log.debug("#{self.class}: Send the actor request: #{@packet.path}@#{@packet.agent_id}, token=#{self.token}")
|
328
|
+
master.publish_to('agent_command', "agent_id.#{@packet.agent_id}", @packet.marshal)
|
329
|
+
@requested = true
|
330
|
+
self
|
331
|
+
end
|
332
|
+
|
333
|
+
|
334
|
+
def token
|
335
|
+
@packet.token
|
336
|
+
end
|
337
|
+
|
338
|
+
def progress
|
339
|
+
check_requested?
|
340
|
+
raise NotImplementedError
|
341
|
+
end
|
342
|
+
|
343
|
+
def cancel
|
344
|
+
check_requested?
|
345
|
+
raise NotImplementedError
|
346
|
+
|
347
|
+
#master.publish_to('agent_command', "agent_id.#{@packet.agent_id}", Wakame::Packets::ActorCancel.new(@packet.agent_id, ).marshal)
|
348
|
+
#ED.unsubscribe(@event_ticket)
|
349
|
+
end
|
350
|
+
|
351
|
+
def wait_completion(tout=60*30)
|
352
|
+
check_requested?
|
353
|
+
timeout(tout) {
|
354
|
+
Wakame.log.debug("#{self.class}: Waiting a response from the actor: #{@packet.path}@#{@packet.agent_id}, token=#{@packet.token}")
|
355
|
+
ret_status = @wait_lock.deq
|
356
|
+
Wakame.log.debug("#{self.class}: A response (status=#{ret_status}) back from the actor: #{@packet.path}@#{@packet.agent_id}, token=#{@packet.token}")
|
357
|
+
if ret_status == Actor::STATUS_FAILED
|
358
|
+
raise RuntimeError, "Failed status has been returned: Actor Request #{token}"
|
359
|
+
end
|
360
|
+
}
|
361
|
+
end
|
362
|
+
alias :wait :wait_completion
|
363
|
+
|
364
|
+
private
|
365
|
+
def check_requested?
|
366
|
+
raise "The request has not been sent yet." unless @requested
|
367
|
+
end
|
368
|
+
end
|
369
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
|
2
|
+
class Wakame::Monitor::Agent
|
3
|
+
include Wakame::Monitor
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
@status = STATUS_ONLINE
|
7
|
+
end
|
8
|
+
|
9
|
+
def send_ping(hash)
|
10
|
+
publish_to('ping', Wakame::Packets::Ping.new(agent, hash[:attrs], hash[:actors], hash[:monitors], hash[:services]).marshal)
|
11
|
+
end
|
12
|
+
|
13
|
+
def setup(path)
|
14
|
+
# Send the first ping signal as soon as possible since the ping contanins vital information to construct the Agent object on master node.
|
15
|
+
send_ping(check())
|
16
|
+
|
17
|
+
# Setup periodical ping publisher.
|
18
|
+
@timer = CheckerTimer.new(10) {
|
19
|
+
send_ping(check())
|
20
|
+
}
|
21
|
+
@timer.start
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
def check
|
26
|
+
if Wakame.environment == :EC2
|
27
|
+
require 'wakame/vm_manipulator'
|
28
|
+
attrs = Wakame::VmManipulator::EC2::MetadataService.fetch_local_attrs
|
29
|
+
else
|
30
|
+
attrs = Wakame::VmManipulator::StandAlone.fetch_local_attrs
|
31
|
+
end
|
32
|
+
|
33
|
+
res = {:attrs=>attrs, :monitors=>[], :actors=>[], :services=>{}}
|
34
|
+
EM.barrier {
|
35
|
+
agent.monitor_registry.monitors.each { |key, m|
|
36
|
+
res[:monitors] << {:class=>m.class.to_s}
|
37
|
+
}
|
38
|
+
agent.actor_registry.actors.each { |key, a|
|
39
|
+
res[:actors] << {:class=>a.class.to_s}
|
40
|
+
}
|
41
|
+
|
42
|
+
svcmon = agent.monitor_registry.find_monitor('/service')
|
43
|
+
svcmon.checkers.each { |svc_id, a|
|
44
|
+
res[:services][svc_id]={:status=>a.status}
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
res
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,183 @@
|
|
1
|
+
|
2
|
+
require 'open4'
|
3
|
+
require 'wakame'
|
4
|
+
|
5
|
+
class Wakame::Monitor::Service
|
6
|
+
|
7
|
+
class ServiceChecker
|
8
|
+
#include Wakame::Packets::Agent
|
9
|
+
attr_reader :timer, :svc_id
|
10
|
+
attr_accessor :last_checked_at, :status
|
11
|
+
|
12
|
+
def initialize(svc_id, svc_mon)
|
13
|
+
@svc_id = svc_id
|
14
|
+
@service_monitor = svc_mon
|
15
|
+
@status = Wakame::Service::STATUS_OFFLINE
|
16
|
+
count = 0
|
17
|
+
@timer = Wakame::Monitor::CheckerTimer.new(3) {
|
18
|
+
self.signal_checker
|
19
|
+
}
|
20
|
+
end
|
21
|
+
|
22
|
+
def start
|
23
|
+
if !@timer.running?
|
24
|
+
@timer.start
|
25
|
+
@service_monitor.send_event(Wakame::Packets::MonitoringStarted.new(@service_monitor.agent, self.svc_id))
|
26
|
+
Wakame.log.debug("#{self.class}: Started the checker")
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def stop
|
31
|
+
if @timer.running?
|
32
|
+
@timer.stop
|
33
|
+
@service_monitor.send_event(Wakame::Packets::MonitoringStopped.new(@service_monitor.agent, self.svc_id))
|
34
|
+
Wakame.log.debug("#{self.class}: Stopped the checker")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def check
|
39
|
+
end
|
40
|
+
|
41
|
+
protected
|
42
|
+
def signal_checker
|
43
|
+
EventMachine.defer proc {
|
44
|
+
res = begin
|
45
|
+
self.last_checked_at = Time.now
|
46
|
+
res = self.check
|
47
|
+
res
|
48
|
+
rescue => e
|
49
|
+
Wakame.log.error("#{self.class}: #{e}")
|
50
|
+
Wakame.log.error(e)
|
51
|
+
e
|
52
|
+
end
|
53
|
+
Thread.pass
|
54
|
+
res
|
55
|
+
}, proc { |res|
|
56
|
+
|
57
|
+
case res
|
58
|
+
when Exception
|
59
|
+
update_status(Wakame::Service::STATUS_FAIL)
|
60
|
+
when Wakame::Service::STATUS_ONLINE, Wakame::Service::STATUS_OFFLINE
|
61
|
+
update_status(res)
|
62
|
+
else
|
63
|
+
Wakame.log.error("#{self.class}: Unknown response type from the checker: #{self.svc_id}, ")
|
64
|
+
end
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
def update_status(new_status)
|
69
|
+
prev_status = self.status
|
70
|
+
if prev_status != new_status
|
71
|
+
self.status = new_status
|
72
|
+
@service_monitor.send_event(Wakame::Packets::ServiceStatusChanged.new(@service_monitor.agent, self.svc_id, prev_status, new_status))
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
class PidFileChecker < ServiceChecker
|
78
|
+
def initialize(svc_id, svc_mon, pidpath)
|
79
|
+
super(svc_id, svc_mon)
|
80
|
+
@pidpath = pidpath
|
81
|
+
end
|
82
|
+
|
83
|
+
def check
|
84
|
+
return Wakame::Service::STATUS_OFFLINE unless File.exist?(@pidpath)
|
85
|
+
#cmdstat = ::Open4.popen4("ps -p \"`cat '#{@pidpath}'`\" > /dev/null"){}
|
86
|
+
#cmdstat.exitstatus == 0 ? Wakame::Service::STATUS_ONLINE : Wakame::Service::STATUS_OFFLINE
|
87
|
+
|
88
|
+
cmdres = system("ps -p \"`cat '#{@pidpath}'`\" > /dev/null")
|
89
|
+
# system() returns true or false.
|
90
|
+
cmdres ? Wakame::Service::STATUS_ONLINE : Wakame::Service::STATUS_OFFLINE
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
class CommandChecker < ServiceChecker
|
95
|
+
attr_reader :command
|
96
|
+
|
97
|
+
def initialize(svc_id, svc_mon, cmdstr)
|
98
|
+
super(svc_id, svc_mon)
|
99
|
+
@command = cmdstr
|
100
|
+
end
|
101
|
+
|
102
|
+
def check()
|
103
|
+
outputs =[]
|
104
|
+
cmdstat = ::Open4.popen4(@command) { |pid, stdin, stdout, stderr|
|
105
|
+
stdout.each { |l|
|
106
|
+
outputs << l
|
107
|
+
}
|
108
|
+
stderr.each { |l|
|
109
|
+
outputs << l
|
110
|
+
}
|
111
|
+
}
|
112
|
+
Wakame.log.debug("#{self.class}: Exit Status #{@command}: #{cmdstat}")
|
113
|
+
if outputs.size > 0
|
114
|
+
@service_monitor.send_event(Wakame::Packets::MonitoringOutput.new(@service_monitor.agent, self.svc_id, outputs.join('')))
|
115
|
+
end
|
116
|
+
cmdstat.exitstatus == 0 ? Wakame::Service::STATUS_ONLINE : Wakame::Service::STATUS_OFFLINE
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
include Wakame::Monitor
|
121
|
+
|
122
|
+
attr_reader :checkers
|
123
|
+
|
124
|
+
def initialize
|
125
|
+
@status = Wakame::Service::STATUS_ONLINE
|
126
|
+
@checkers = {}
|
127
|
+
end
|
128
|
+
|
129
|
+
def setup(path)
|
130
|
+
end
|
131
|
+
|
132
|
+
def handle_request(request)
|
133
|
+
svc_id = request[:svc_id]
|
134
|
+
case request[:command]
|
135
|
+
when :start
|
136
|
+
register(svc_id, request[:cmdstr])
|
137
|
+
when :stop
|
138
|
+
unregister(svc_id)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def send_event(a)
|
143
|
+
Wakame.log.debug("Sending back a event: #{a.class}")
|
144
|
+
publish_to('agent_event', a.marshal)
|
145
|
+
end
|
146
|
+
|
147
|
+
def dump_attrs
|
148
|
+
|
149
|
+
end
|
150
|
+
|
151
|
+
def find_checker(svc_id)
|
152
|
+
@checkers[svc_id]
|
153
|
+
end
|
154
|
+
|
155
|
+
def register(svc_id, checker_type, *args)
|
156
|
+
chk = @checkers[svc_id]
|
157
|
+
if chk
|
158
|
+
Wakame.log.error("#{self.class}: Service registory duplication. #{svc_id}")
|
159
|
+
raise "Service registory duplication. #{svc_id}"
|
160
|
+
end
|
161
|
+
case checker_type.to_sym
|
162
|
+
when :pidfile
|
163
|
+
chk = PidFileChecker.new(svc_id, self, args[0])
|
164
|
+
when :command
|
165
|
+
chk = CommandChecker.new(svc_id, self, args[0])
|
166
|
+
else
|
167
|
+
raise "Unsupported checker type: #{checker_type}"
|
168
|
+
end
|
169
|
+
chk.start
|
170
|
+
@checkers[svc_id]=chk
|
171
|
+
Wakame.log.info("#{self.class}: Registered service checker for #{svc_id}")
|
172
|
+
end
|
173
|
+
|
174
|
+
def unregister(svc_id)
|
175
|
+
chk = @checkers[svc_id]
|
176
|
+
if chk
|
177
|
+
chk.timer.stop
|
178
|
+
@checkers.delete(svc_id)
|
179
|
+
Wakame.log.info("#{self.class}: Unregistered service checker for #{svc_id}")
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
|
4
|
+
module Wakame
|
5
|
+
module Monitor
|
6
|
+
STATUS_OFFLINE=0
|
7
|
+
STATUS_ONLINE=1
|
8
|
+
STATUS_FAIL=2
|
9
|
+
|
10
|
+
def self.included(klass)
|
11
|
+
klass.class_eval {
|
12
|
+
attr_accessor :status, :agent
|
13
|
+
|
14
|
+
}
|
15
|
+
end
|
16
|
+
|
17
|
+
def handle_request(request)
|
18
|
+
end
|
19
|
+
|
20
|
+
def setup(assigned_path)
|
21
|
+
end
|
22
|
+
|
23
|
+
def enable
|
24
|
+
end
|
25
|
+
|
26
|
+
def disable
|
27
|
+
end
|
28
|
+
|
29
|
+
def publish_to(exchange, data)
|
30
|
+
agent.publish_to(exchange, data)
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
module Wakame
|
38
|
+
module Monitor
|
39
|
+
class CheckerTimer < EventMachine::PeriodicTimer
|
40
|
+
def initialize(time, &blk)
|
41
|
+
@interval = time
|
42
|
+
@code = proc {
|
43
|
+
begin
|
44
|
+
blk.call
|
45
|
+
rescue => e
|
46
|
+
Wakame.log.error(e)
|
47
|
+
end
|
48
|
+
}
|
49
|
+
stop
|
50
|
+
end
|
51
|
+
|
52
|
+
def start
|
53
|
+
if !running?
|
54
|
+
@cancelled = false
|
55
|
+
schedule
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def stop
|
60
|
+
@cancelled = true
|
61
|
+
end
|
62
|
+
|
63
|
+
def running?
|
64
|
+
!@cancelled
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|