wakame 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +20 -0
- data/README.rdoc +63 -0
- data/Rakefile +86 -0
- data/VERSION +1 -0
- data/app_generators/wakame/templates/README +0 -0
- data/app_generators/wakame/templates/Rakefile +18 -0
- data/app_generators/wakame/templates/bin/wakame-agent +9 -0
- data/app_generators/wakame/templates/bin/wakame-master +9 -0
- data/app_generators/wakame/templates/bin/wakameadm +9 -0
- data/app_generators/wakame/templates/cluster/resources/apache_app/apache_app.rb +54 -0
- data/app_generators/wakame/templates/cluster/resources/apache_app/conf/apache2.conf +46 -0
- data/app_generators/wakame/templates/cluster/resources/apache_app/conf/envvars-app +7 -0
- data/app_generators/wakame/templates/cluster/resources/apache_app/conf/sites-app.conf +23 -0
- data/app_generators/wakame/templates/cluster/resources/apache_app/conf/system-app.conf +67 -0
- data/app_generators/wakame/templates/cluster/resources/apache_app/init.d/apache2-app +192 -0
- data/app_generators/wakame/templates/cluster/resources/apache_lb/apache_lb.rb +56 -0
- data/app_generators/wakame/templates/cluster/resources/apache_lb/conf/apache2.conf +46 -0
- data/app_generators/wakame/templates/cluster/resources/apache_lb/conf/envvars-lb +6 -0
- data/app_generators/wakame/templates/cluster/resources/apache_lb/conf/sites-lb.conf +54 -0
- data/app_generators/wakame/templates/cluster/resources/apache_lb/conf/system-lb.conf +75 -0
- data/app_generators/wakame/templates/cluster/resources/apache_lb/init.d/apache2-lb +192 -0
- data/app_generators/wakame/templates/cluster/resources/apache_www/apache_www.rb +50 -0
- data/app_generators/wakame/templates/cluster/resources/apache_www/conf/apache2.conf +47 -0
- data/app_generators/wakame/templates/cluster/resources/apache_www/conf/envvars-www +7 -0
- data/app_generators/wakame/templates/cluster/resources/apache_www/conf/sites-www.conf +23 -0
- data/app_generators/wakame/templates/cluster/resources/apache_www/conf/system-www.conf +63 -0
- data/app_generators/wakame/templates/cluster/resources/apache_www/init.d/apache2-www +192 -0
- data/app_generators/wakame/templates/cluster/resources/ec2_elastic_ip/ec2_elastic_ip.rb +39 -0
- data/app_generators/wakame/templates/cluster/resources/mysql_master/conf/my.cnf +154 -0
- data/app_generators/wakame/templates/cluster/resources/mysql_master/init.d/mysql +185 -0
- data/app_generators/wakame/templates/cluster/resources/mysql_master/mysql_master.rb +174 -0
- data/app_generators/wakame/templates/config/boot.rb +85 -0
- data/app_generators/wakame/templates/config/cluster.rb +64 -0
- data/app_generators/wakame/templates/config/environments/common.rb +0 -0
- data/app_generators/wakame/templates/config/environments/ec2.rb +3 -0
- data/app_generators/wakame/templates/config/environments/stand_alone.rb +0 -0
- data/app_generators/wakame/templates/config/init.d/wakame-agent +72 -0
- data/app_generators/wakame/templates/config/init.d/wakame-master +73 -0
- data/app_generators/wakame/wakame_generator.rb +124 -0
- data/bin/wakame +18 -0
- data/contrib/imagesetup.sh +77 -0
- data/lib/ext/eventmachine.rb +86 -0
- data/lib/ext/shellwords.rb +172 -0
- data/lib/ext/uri.rb +15 -0
- data/lib/wakame/action.rb +156 -0
- data/lib/wakame/actions/destroy_instances.rb +39 -0
- data/lib/wakame/actions/launch_cluster.rb +31 -0
- data/lib/wakame/actions/migrate_service.rb +65 -0
- data/lib/wakame/actions/propagate_instances.rb +95 -0
- data/lib/wakame/actions/reload_service.rb +21 -0
- data/lib/wakame/actions/scaleout_when_high_load.rb +44 -0
- data/lib/wakame/actions/shutdown_cluster.rb +22 -0
- data/lib/wakame/actions/shutdown_vm.rb +19 -0
- data/lib/wakame/actions/start_service.rb +64 -0
- data/lib/wakame/actions/stop_service.rb +49 -0
- data/lib/wakame/actions/util.rb +71 -0
- data/lib/wakame/actor/daemon.rb +37 -0
- data/lib/wakame/actor/service_monitor.rb +21 -0
- data/lib/wakame/actor/system.rb +46 -0
- data/lib/wakame/actor.rb +33 -0
- data/lib/wakame/agent.rb +226 -0
- data/lib/wakame/amqp_client.rb +219 -0
- data/lib/wakame/command/action_status.rb +62 -0
- data/lib/wakame/command/actor.rb +23 -0
- data/lib/wakame/command/clone_service.rb +12 -0
- data/lib/wakame/command/launch_cluster.rb +15 -0
- data/lib/wakame/command/migrate_service.rb +21 -0
- data/lib/wakame/command/propagate_service.rb +24 -0
- data/lib/wakame/command/shutdown_cluster.rb +15 -0
- data/lib/wakame/command/status.rb +81 -0
- data/lib/wakame/command.rb +31 -0
- data/lib/wakame/command_queue.rb +44 -0
- data/lib/wakame/configuration.rb +93 -0
- data/lib/wakame/daemonize.rb +96 -0
- data/lib/wakame/event.rb +232 -0
- data/lib/wakame/event_dispatcher.rb +154 -0
- data/lib/wakame/graph.rb +79 -0
- data/lib/wakame/initializer.rb +162 -0
- data/lib/wakame/instance_counter.rb +78 -0
- data/lib/wakame/logger.rb +12 -0
- data/lib/wakame/manager/commands.rb +134 -0
- data/lib/wakame/master.rb +369 -0
- data/lib/wakame/monitor/agent.rb +50 -0
- data/lib/wakame/monitor/service.rb +183 -0
- data/lib/wakame/monitor.rb +69 -0
- data/lib/wakame/packets.rb +160 -0
- data/lib/wakame/queue_declare.rb +14 -0
- data/lib/wakame/rule.rb +116 -0
- data/lib/wakame/rule_engine.rb +202 -0
- data/lib/wakame/runner/administrator_command.rb +112 -0
- data/lib/wakame/runner/agent.rb +81 -0
- data/lib/wakame/runner/master.rb +93 -0
- data/lib/wakame/scheduler.rb +251 -0
- data/lib/wakame/service.rb +914 -0
- data/lib/wakame/template.rb +189 -0
- data/lib/wakame/trigger.rb +66 -0
- data/lib/wakame/triggers/instance_count_update.rb +45 -0
- data/lib/wakame/triggers/load_history.rb +107 -0
- data/lib/wakame/triggers/maintain_ssh_known_hosts.rb +43 -0
- data/lib/wakame/triggers/process_command.rb +34 -0
- data/lib/wakame/triggers/shutdown_unused_vm.rb +16 -0
- data/lib/wakame/util.rb +569 -0
- data/lib/wakame/vm_manipulator.rb +186 -0
- data/lib/wakame.rb +59 -0
- data/tasks/ec2.rake +127 -0
- data/tests/cluster.json +3 -0
- data/tests/conf/a +1 -0
- data/tests/conf/b +1 -0
- data/tests/conf/c +1 -0
- data/tests/setup_agent.rb +39 -0
- data/tests/setup_master.rb +28 -0
- data/tests/test_actor.rb +54 -0
- data/tests/test_agent.rb +218 -0
- data/tests/test_amqp_client.rb +94 -0
- data/tests/test_graph.rb +36 -0
- data/tests/test_master.rb +167 -0
- data/tests/test_monitor.rb +47 -0
- data/tests/test_rule_engine.rb +127 -0
- data/tests/test_scheduler.rb +123 -0
- data/tests/test_service.rb +60 -0
- data/tests/test_template.rb +67 -0
- data/tests/test_uri_amqp.rb +19 -0
- data/tests/test_util.rb +71 -0
- data/wakame_generators/resource/resource_generator.rb +54 -0
- data/wakame_generators/resource/templates/apache_app/apache_app.rb +60 -0
- data/wakame_generators/resource/templates/apache_app/conf/apache2.conf +46 -0
- data/wakame_generators/resource/templates/apache_app/conf/envvars-app +7 -0
- data/wakame_generators/resource/templates/apache_app/conf/sites-app.conf +23 -0
- data/wakame_generators/resource/templates/apache_app/conf/system-app.conf +67 -0
- data/wakame_generators/resource/templates/apache_app/init.d/apache2-app +192 -0
- data/wakame_generators/resource/templates/apache_lb/apache_lb.rb +67 -0
- data/wakame_generators/resource/templates/apache_lb/conf/apache2.conf +46 -0
- data/wakame_generators/resource/templates/apache_lb/conf/envvars-lb +6 -0
- data/wakame_generators/resource/templates/apache_lb/conf/sites-lb.conf +54 -0
- data/wakame_generators/resource/templates/apache_lb/conf/system-lb.conf +75 -0
- data/wakame_generators/resource/templates/apache_lb/init.d/apache2-lb +192 -0
- data/wakame_generators/resource/templates/apache_www/apache_www.rb +56 -0
- data/wakame_generators/resource/templates/apache_www/conf/apache2.conf +47 -0
- data/wakame_generators/resource/templates/apache_www/conf/envvars-www +7 -0
- data/wakame_generators/resource/templates/apache_www/conf/sites-www.conf +23 -0
- data/wakame_generators/resource/templates/apache_www/conf/system-www.conf +63 -0
- data/wakame_generators/resource/templates/apache_www/init.d/apache2-www +192 -0
- data/wakame_generators/resource/templates/ec2_elastic_ip/ec2_elastic_ip.rb +39 -0
- data/wakame_generators/resource/templates/mysql_master/conf/my.cnf +154 -0
- data/wakame_generators/resource/templates/mysql_master/init.d/mysql +185 -0
- data/wakame_generators/resource/templates/mysql_master/mysql_master.rb +119 -0
- metadata +289 -0
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
|
|
3
|
+
require 'rubygems'
|
|
4
|
+
|
|
5
|
+
require 'wakame'
|
|
6
|
+
require 'wakame/packets'
|
|
7
|
+
require 'wakame/service'
|
|
8
|
+
require 'wakame/queue_declare'
|
|
9
|
+
require 'wakame/vm_manipulator'
|
|
10
|
+
|
|
11
|
+
module Wakame
|
|
12
|
+
|
|
13
|
+
class AgentMonitor
|
|
14
|
+
include ThreadImmutable
|
|
15
|
+
attr_reader :registered_agents, :unregistered_agents, :master, :gc_period
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def initialize(master)
|
|
19
|
+
bind_thread
|
|
20
|
+
@master = master
|
|
21
|
+
@registered_agents = {}
|
|
22
|
+
@unregistered_agents = {}
|
|
23
|
+
@agent_timeout = 31.to_f
|
|
24
|
+
@agent_kill_timeout = @agent_timeout * 2
|
|
25
|
+
@gc_period = 20.to_f
|
|
26
|
+
|
|
27
|
+
# GC event trigger for agent timer & status
|
|
28
|
+
calc_agent_timeout = proc {
|
|
29
|
+
#Wakame.log.debug("Started agent GC : agents.size=#{@registered_agents.size}")
|
|
30
|
+
kill_list=[]
|
|
31
|
+
registered_agents.each { |agent_id, agent|
|
|
32
|
+
next if agent.status == Service::Agent::STATUS_OFFLINE
|
|
33
|
+
diff_time = Time.now - agent.last_ping_at
|
|
34
|
+
#Wakame.log.debug "AgentMonitor GC : #{agent_id}: #{diff_time}"
|
|
35
|
+
if diff_time > @agent_timeout.to_f
|
|
36
|
+
agent.status = Service::Agent::STATUS_TIMEOUT
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
if diff_time > @agent_kill_timeout.to_f
|
|
40
|
+
kill_list << agent_id
|
|
41
|
+
end
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
kill_list.each { |agent_id|
|
|
45
|
+
agent = @agents.delete(agent_id)
|
|
46
|
+
ED.fire_event(Event::AgentUnMonitored.new(agent)) unless agent.nil?
|
|
47
|
+
}
|
|
48
|
+
#Wakame.log.debug("Finished agent GC")
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
@agent_timeout_timer = EventMachine::PeriodicTimer.new(@gc_period, calc_agent_timeout)
|
|
52
|
+
|
|
53
|
+
master.add_subscriber('registry') { |data|
|
|
54
|
+
data = eval(data)
|
|
55
|
+
|
|
56
|
+
agent_id = data[:agent_id]
|
|
57
|
+
case data[:type]
|
|
58
|
+
when 'Wakame::Packets::Register'
|
|
59
|
+
register_agent(data)
|
|
60
|
+
when 'Wakame::Packets::UnRegister'
|
|
61
|
+
unregister_agent(agent_id)
|
|
62
|
+
end
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
master.add_subscriber('ping') { |data|
|
|
66
|
+
ping = eval(data)
|
|
67
|
+
# Skip the old ping responses before starting master node.
|
|
68
|
+
next if Time.parse(ping[:responded_at]) < master.started_at
|
|
69
|
+
|
|
70
|
+
# Variable update function for the common members
|
|
71
|
+
set_report_values = proc { |agent|
|
|
72
|
+
agent.status = Service::Agent::STATUS_ONLINE
|
|
73
|
+
agent.uptime = 0
|
|
74
|
+
agent.last_ping_at = Time.parse(ping[:responded_at])
|
|
75
|
+
|
|
76
|
+
agent.attr = ping[:attrs]
|
|
77
|
+
|
|
78
|
+
agent.services.clear
|
|
79
|
+
ping.services.each { |svc_id, i|
|
|
80
|
+
agent.services[svc_id] = master.service_cluster.instances[svc_id]
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
agent = agent(ping[:agent_id])
|
|
85
|
+
if agent.nil?
|
|
86
|
+
agent = Service::Agent.new(ping[:agent_id])
|
|
87
|
+
|
|
88
|
+
set_report_values.call(agent)
|
|
89
|
+
|
|
90
|
+
unregistered_agents[ping[:agent_id]]=agent
|
|
91
|
+
else
|
|
92
|
+
set_report_values.call(agent)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
ED.fire_event(Event::AgentPong.new(agent))
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
master.add_subscriber('agent_event') { |data|
|
|
100
|
+
response = eval(data)
|
|
101
|
+
#p response
|
|
102
|
+
case response[:type]
|
|
103
|
+
when 'Wakame::Packets::ServiceStatusChanged'
|
|
104
|
+
svc_inst = Service::ServiceInstance.instance_collection[response[:svc_id]]
|
|
105
|
+
if svc_inst
|
|
106
|
+
response_time = Time.parse(response[:responded_at])
|
|
107
|
+
svc_inst.update_status(response[:new_status], response_time, response[:fail_message])
|
|
108
|
+
|
|
109
|
+
# tmp_event = Event::ServiceStatusChanged.new(response[:svc_id], svc_inst.property, response[:status], response[:previous_status])
|
|
110
|
+
# tmp_event.time = response_time
|
|
111
|
+
# ED.fire_event(tmp_event)
|
|
112
|
+
|
|
113
|
+
# if response[:previous_status] != Service::STATUS_ONLINE && response[:new_status] == Service::STATUS_ONLINE
|
|
114
|
+
# tmp_event = Event::ServiceOnline.new(tmp_event.instance_id, svc_inst.property)
|
|
115
|
+
# tmp_event.time = response_time
|
|
116
|
+
# ED.fire_event(tmp_event)
|
|
117
|
+
# elsif response[:previous_status] != Service::STATUS_OFFLINE && response[:new_status] == Service::STATUS_OFFLINE
|
|
118
|
+
# tmp_event = Event::ServiceOffline.new(tmp_event.instance_id, svc_inst.property)
|
|
119
|
+
# tmp_event.time = response_time
|
|
120
|
+
# ED.fire_event(tmp_event)
|
|
121
|
+
# elsif response[:previous_status] != Service::STATUS_FAIL && response[:new_status] == Service::STATUS_FAIL
|
|
122
|
+
# tmp_event = Event::ServiceFailed.new(tmp_event.instance_id, svc_inst.property, response[:fail_message])
|
|
123
|
+
# tmp_event.time = response_time
|
|
124
|
+
# ED.fire_event(tmp_event)
|
|
125
|
+
# end
|
|
126
|
+
end
|
|
127
|
+
when 'Wakame::Packets::ActorResponse'
|
|
128
|
+
case response[:status]
|
|
129
|
+
when Actor::STATUS_RUNNING
|
|
130
|
+
ED.fire_event(Event::ActorProgress.new(response[:agent_id], response[:token], 0))
|
|
131
|
+
else
|
|
132
|
+
ED.fire_event(Event::ActorComplete.new(response[:agent_id], response[:token], response[:status]))
|
|
133
|
+
end
|
|
134
|
+
else
|
|
135
|
+
Wakame.log.warn("#{self.class}: Unhandled agent response: #{response[:type]}")
|
|
136
|
+
end
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def agent(agent_id)
|
|
143
|
+
registered_agents[agent_id] || unregistered_agents[agent_id]
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def register_agent(data)
|
|
147
|
+
agent_id = data[:agent_id]
|
|
148
|
+
agent = registered_agents[agent_id]
|
|
149
|
+
if agent.nil?
|
|
150
|
+
agent = unregistered_agents[agent_id]
|
|
151
|
+
if agent.nil?
|
|
152
|
+
# The agent is going to be registered at first time.
|
|
153
|
+
agent = Service::Agent.new(agent_id)
|
|
154
|
+
registered_agents[agent_id] = agent
|
|
155
|
+
else
|
|
156
|
+
# Move the reference from unregistered group to the registered group.
|
|
157
|
+
registered_agents[agent_id] = unregistered_agents[agent_id]
|
|
158
|
+
unregistered_agents.delete(agent_id)
|
|
159
|
+
end
|
|
160
|
+
Wakame.log.debug("The Agent has been registered: #{data.inspect}")
|
|
161
|
+
#Wakame.log.debug(unregistered_agents)
|
|
162
|
+
ED.fire_event(Event::AgentMonitored.new(agent))
|
|
163
|
+
end
|
|
164
|
+
agent.root_path = data[:root_path]
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def unregister_agent(agent_id)
|
|
168
|
+
agent = registered_agents[agent_id]
|
|
169
|
+
if agent
|
|
170
|
+
unregistered_agents[agent_id] = registered_agents[agent_id]
|
|
171
|
+
registered_agents.delete(agent_id)
|
|
172
|
+
ED.fire_event(Event::AgentUnMonitored.new(agent))
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# def bind_agent(service_instance, &filter)
|
|
178
|
+
# agent_id, agent = @agents.find { |agent_id, agent|
|
|
179
|
+
|
|
180
|
+
# next false if agent.has_service_type?(service_instance.property.class)
|
|
181
|
+
# filter.call(agent)
|
|
182
|
+
# }
|
|
183
|
+
# return nil if agent.nil?
|
|
184
|
+
# service_instance.bind_agent(agent)
|
|
185
|
+
# agent
|
|
186
|
+
# end
|
|
187
|
+
|
|
188
|
+
# def unbind_agent(service_instance)
|
|
189
|
+
# service_instance.unbind_agent
|
|
190
|
+
# end
|
|
191
|
+
|
|
192
|
+
# Retruns the master local agent object
|
|
193
|
+
def master_local
|
|
194
|
+
agent = registered_agents[@master.master_local_agent_id]
|
|
195
|
+
puts "#{agent} = registered_agents[#{@master.master_local_agent_id}]"
|
|
196
|
+
raise "Master does not identify the master local agent yet." if agent.nil?
|
|
197
|
+
agent
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def each_online(&blk)
|
|
201
|
+
registered_agents.each { |k, v|
|
|
202
|
+
next if v.status != Service::Agent::STATUS_ONLINE
|
|
203
|
+
blk.call(v)
|
|
204
|
+
}
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def dump_status
|
|
208
|
+
ag = []
|
|
209
|
+
res = {:registered=>[], :unregistered=>[]}
|
|
210
|
+
|
|
211
|
+
@registered_agents.each { |key, a|
|
|
212
|
+
res[:registered] << a.dump_status
|
|
213
|
+
}
|
|
214
|
+
@unregistered_agents.each { |key, a|
|
|
215
|
+
res[:unregistered] << a.dump_status
|
|
216
|
+
}
|
|
217
|
+
res
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
class Master
|
|
222
|
+
include Wakame::AMQPClient
|
|
223
|
+
include Wakame::QueueDeclare
|
|
224
|
+
|
|
225
|
+
define_queue 'agent_event', 'agent_event'
|
|
226
|
+
define_queue 'ping', 'ping'
|
|
227
|
+
define_queue 'registry', 'registry'
|
|
228
|
+
|
|
229
|
+
attr_reader :command_queue, :agent_monitor, :configuration, :service_cluster, :started_at
|
|
230
|
+
|
|
231
|
+
def initialize(opts={})
|
|
232
|
+
pre_setup
|
|
233
|
+
|
|
234
|
+
connect(opts) {
|
|
235
|
+
post_setup
|
|
236
|
+
}
|
|
237
|
+
Wakame.log.info("Started master process : WAKAME_ROOT=#{Wakame.config.root_path} WAKAME_ENV=#{Wakame.config.environment}")
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
# def send_agent_command(command, agent_id=nil)
|
|
242
|
+
# raise TypeError unless command.is_a? Packets::RequestBase
|
|
243
|
+
# EM.next_tick {
|
|
244
|
+
# if agent_id
|
|
245
|
+
# publish_to('agent_command', "agent_id.#{agent_id}", Marshal.dump(command))
|
|
246
|
+
# else
|
|
247
|
+
# publish_to('agent_command', '*', Marshal.dump(command))
|
|
248
|
+
# end
|
|
249
|
+
# }
|
|
250
|
+
# end
|
|
251
|
+
|
|
252
|
+
def actor_request(agent_id, path, *args)
|
|
253
|
+
request = Wakame::Packets::ActorRequest.new(agent_id, Util.gen_id, path, *args)
|
|
254
|
+
ActorRequest.new(self, request)
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def attr
|
|
259
|
+
agent_monitor.master_local.attr
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def cleanup
|
|
264
|
+
@command_queue.shutdown
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def master_local_agent_id
|
|
268
|
+
@master_local_agent_id
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
private
|
|
272
|
+
def determine_agent_id
|
|
273
|
+
if Wakame.config.environment == :EC2
|
|
274
|
+
@master_local_agent_id = VmManipulator::EC2::MetadataService.query_metadata_uri('instance-id')
|
|
275
|
+
else
|
|
276
|
+
@master_local_agent_id = VmManipulator::StandAlone::INSTANCE_ID
|
|
277
|
+
end
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
def pre_setup
|
|
281
|
+
determine_agent_id
|
|
282
|
+
@started_at = Time.now
|
|
283
|
+
|
|
284
|
+
EM.barrier {
|
|
285
|
+
Wakame.log.debug("Binding thread info to EventDispatcher.")
|
|
286
|
+
EventDispatcher.instance.bind_thread(Thread.current)
|
|
287
|
+
}
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
def post_setup
|
|
291
|
+
raise 'has to be put in EM.run context' unless EM.reactor_running?
|
|
292
|
+
@command_queue = CommandQueue.new(self)
|
|
293
|
+
@agent_monitor = AgentMonitor.new(self)
|
|
294
|
+
|
|
295
|
+
@service_cluster = Util.new_(Wakame.config.cluster_class, self)
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
class ActorRequest
|
|
302
|
+
attr_reader :master
|
|
303
|
+
|
|
304
|
+
def initialize(master, packet)
|
|
305
|
+
raise TypeError unless packet.is_a?(Wakame::Packets::ActorRequest)
|
|
306
|
+
|
|
307
|
+
@master = master
|
|
308
|
+
@packet = packet
|
|
309
|
+
@requested = false
|
|
310
|
+
@event_ticket = nil
|
|
311
|
+
@wait_lock = ::Queue.new
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def request
|
|
316
|
+
raise "The request has already been sent." if @requested
|
|
317
|
+
|
|
318
|
+
@event_ticket = ED.subscribe(Event::ActorComplete) { |event|
|
|
319
|
+
if event.token == @packet.token
|
|
320
|
+
|
|
321
|
+
# Any of status except RUNNING are accomplishment of the actor request.
|
|
322
|
+
Wakame.log.debug("#{self.class}: The actor request has been completed: token=#{self.token}, status=#{event.status}")
|
|
323
|
+
ED.unsubscribe(@event_ticket)
|
|
324
|
+
@wait_lock.enq(event.status)
|
|
325
|
+
end
|
|
326
|
+
}
|
|
327
|
+
Wakame.log.debug("#{self.class}: Send the actor request: #{@packet.path}@#{@packet.agent_id}, token=#{self.token}")
|
|
328
|
+
master.publish_to('agent_command', "agent_id.#{@packet.agent_id}", @packet.marshal)
|
|
329
|
+
@requested = true
|
|
330
|
+
self
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def token
|
|
335
|
+
@packet.token
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
def progress
|
|
339
|
+
check_requested?
|
|
340
|
+
raise NotImplementedError
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
def cancel
|
|
344
|
+
check_requested?
|
|
345
|
+
raise NotImplementedError
|
|
346
|
+
|
|
347
|
+
#master.publish_to('agent_command', "agent_id.#{@packet.agent_id}", Wakame::Packets::ActorCancel.new(@packet.agent_id, ).marshal)
|
|
348
|
+
#ED.unsubscribe(@event_ticket)
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
def wait_completion(tout=60*30)
|
|
352
|
+
check_requested?
|
|
353
|
+
timeout(tout) {
|
|
354
|
+
Wakame.log.debug("#{self.class}: Waiting a response from the actor: #{@packet.path}@#{@packet.agent_id}, token=#{@packet.token}")
|
|
355
|
+
ret_status = @wait_lock.deq
|
|
356
|
+
Wakame.log.debug("#{self.class}: A response (status=#{ret_status}) back from the actor: #{@packet.path}@#{@packet.agent_id}, token=#{@packet.token}")
|
|
357
|
+
if ret_status == Actor::STATUS_FAILED
|
|
358
|
+
raise RuntimeError, "Failed status has been returned: Actor Request #{token}"
|
|
359
|
+
end
|
|
360
|
+
}
|
|
361
|
+
end
|
|
362
|
+
alias :wait :wait_completion
|
|
363
|
+
|
|
364
|
+
private
|
|
365
|
+
def check_requested?
|
|
366
|
+
raise "The request has not been sent yet." unless @requested
|
|
367
|
+
end
|
|
368
|
+
end
|
|
369
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
|
|
2
|
+
class Wakame::Monitor::Agent
|
|
3
|
+
include Wakame::Monitor
|
|
4
|
+
|
|
5
|
+
def initialize
|
|
6
|
+
@status = STATUS_ONLINE
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def send_ping(hash)
|
|
10
|
+
publish_to('ping', Wakame::Packets::Ping.new(agent, hash[:attrs], hash[:actors], hash[:monitors], hash[:services]).marshal)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def setup(path)
|
|
14
|
+
# Send the first ping signal as soon as possible since the ping contanins vital information to construct the Agent object on master node.
|
|
15
|
+
send_ping(check())
|
|
16
|
+
|
|
17
|
+
# Setup periodical ping publisher.
|
|
18
|
+
@timer = CheckerTimer.new(10) {
|
|
19
|
+
send_ping(check())
|
|
20
|
+
}
|
|
21
|
+
@timer.start
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def check
|
|
26
|
+
if Wakame.environment == :EC2
|
|
27
|
+
require 'wakame/vm_manipulator'
|
|
28
|
+
attrs = Wakame::VmManipulator::EC2::MetadataService.fetch_local_attrs
|
|
29
|
+
else
|
|
30
|
+
attrs = Wakame::VmManipulator::StandAlone.fetch_local_attrs
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
res = {:attrs=>attrs, :monitors=>[], :actors=>[], :services=>{}}
|
|
34
|
+
EM.barrier {
|
|
35
|
+
agent.monitor_registry.monitors.each { |key, m|
|
|
36
|
+
res[:monitors] << {:class=>m.class.to_s}
|
|
37
|
+
}
|
|
38
|
+
agent.actor_registry.actors.each { |key, a|
|
|
39
|
+
res[:actors] << {:class=>a.class.to_s}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
svcmon = agent.monitor_registry.find_monitor('/service')
|
|
43
|
+
svcmon.checkers.each { |svc_id, a|
|
|
44
|
+
res[:services][svc_id]={:status=>a.status}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
res
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
|
|
2
|
+
require 'open4'
|
|
3
|
+
require 'wakame'
|
|
4
|
+
|
|
5
|
+
class Wakame::Monitor::Service
|
|
6
|
+
|
|
7
|
+
class ServiceChecker
|
|
8
|
+
#include Wakame::Packets::Agent
|
|
9
|
+
attr_reader :timer, :svc_id
|
|
10
|
+
attr_accessor :last_checked_at, :status
|
|
11
|
+
|
|
12
|
+
def initialize(svc_id, svc_mon)
|
|
13
|
+
@svc_id = svc_id
|
|
14
|
+
@service_monitor = svc_mon
|
|
15
|
+
@status = Wakame::Service::STATUS_OFFLINE
|
|
16
|
+
count = 0
|
|
17
|
+
@timer = Wakame::Monitor::CheckerTimer.new(3) {
|
|
18
|
+
self.signal_checker
|
|
19
|
+
}
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def start
|
|
23
|
+
if !@timer.running?
|
|
24
|
+
@timer.start
|
|
25
|
+
@service_monitor.send_event(Wakame::Packets::MonitoringStarted.new(@service_monitor.agent, self.svc_id))
|
|
26
|
+
Wakame.log.debug("#{self.class}: Started the checker")
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def stop
|
|
31
|
+
if @timer.running?
|
|
32
|
+
@timer.stop
|
|
33
|
+
@service_monitor.send_event(Wakame::Packets::MonitoringStopped.new(@service_monitor.agent, self.svc_id))
|
|
34
|
+
Wakame.log.debug("#{self.class}: Stopped the checker")
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def check
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
protected
|
|
42
|
+
def signal_checker
|
|
43
|
+
EventMachine.defer proc {
|
|
44
|
+
res = begin
|
|
45
|
+
self.last_checked_at = Time.now
|
|
46
|
+
res = self.check
|
|
47
|
+
res
|
|
48
|
+
rescue => e
|
|
49
|
+
Wakame.log.error("#{self.class}: #{e}")
|
|
50
|
+
Wakame.log.error(e)
|
|
51
|
+
e
|
|
52
|
+
end
|
|
53
|
+
Thread.pass
|
|
54
|
+
res
|
|
55
|
+
}, proc { |res|
|
|
56
|
+
|
|
57
|
+
case res
|
|
58
|
+
when Exception
|
|
59
|
+
update_status(Wakame::Service::STATUS_FAIL)
|
|
60
|
+
when Wakame::Service::STATUS_ONLINE, Wakame::Service::STATUS_OFFLINE
|
|
61
|
+
update_status(res)
|
|
62
|
+
else
|
|
63
|
+
Wakame.log.error("#{self.class}: Unknown response type from the checker: #{self.svc_id}, ")
|
|
64
|
+
end
|
|
65
|
+
}
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def update_status(new_status)
|
|
69
|
+
prev_status = self.status
|
|
70
|
+
if prev_status != new_status
|
|
71
|
+
self.status = new_status
|
|
72
|
+
@service_monitor.send_event(Wakame::Packets::ServiceStatusChanged.new(@service_monitor.agent, self.svc_id, prev_status, new_status))
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
class PidFileChecker < ServiceChecker
|
|
78
|
+
def initialize(svc_id, svc_mon, pidpath)
|
|
79
|
+
super(svc_id, svc_mon)
|
|
80
|
+
@pidpath = pidpath
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def check
|
|
84
|
+
return Wakame::Service::STATUS_OFFLINE unless File.exist?(@pidpath)
|
|
85
|
+
#cmdstat = ::Open4.popen4("ps -p \"`cat '#{@pidpath}'`\" > /dev/null"){}
|
|
86
|
+
#cmdstat.exitstatus == 0 ? Wakame::Service::STATUS_ONLINE : Wakame::Service::STATUS_OFFLINE
|
|
87
|
+
|
|
88
|
+
cmdres = system("ps -p \"`cat '#{@pidpath}'`\" > /dev/null")
|
|
89
|
+
# system() returns true or false.
|
|
90
|
+
cmdres ? Wakame::Service::STATUS_ONLINE : Wakame::Service::STATUS_OFFLINE
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
class CommandChecker < ServiceChecker
|
|
95
|
+
attr_reader :command
|
|
96
|
+
|
|
97
|
+
def initialize(svc_id, svc_mon, cmdstr)
|
|
98
|
+
super(svc_id, svc_mon)
|
|
99
|
+
@command = cmdstr
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def check()
|
|
103
|
+
outputs =[]
|
|
104
|
+
cmdstat = ::Open4.popen4(@command) { |pid, stdin, stdout, stderr|
|
|
105
|
+
stdout.each { |l|
|
|
106
|
+
outputs << l
|
|
107
|
+
}
|
|
108
|
+
stderr.each { |l|
|
|
109
|
+
outputs << l
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
Wakame.log.debug("#{self.class}: Exit Status #{@command}: #{cmdstat}")
|
|
113
|
+
if outputs.size > 0
|
|
114
|
+
@service_monitor.send_event(Wakame::Packets::MonitoringOutput.new(@service_monitor.agent, self.svc_id, outputs.join('')))
|
|
115
|
+
end
|
|
116
|
+
cmdstat.exitstatus == 0 ? Wakame::Service::STATUS_ONLINE : Wakame::Service::STATUS_OFFLINE
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
include Wakame::Monitor
|
|
121
|
+
|
|
122
|
+
attr_reader :checkers
|
|
123
|
+
|
|
124
|
+
def initialize
|
|
125
|
+
@status = Wakame::Service::STATUS_ONLINE
|
|
126
|
+
@checkers = {}
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def setup(path)
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def handle_request(request)
|
|
133
|
+
svc_id = request[:svc_id]
|
|
134
|
+
case request[:command]
|
|
135
|
+
when :start
|
|
136
|
+
register(svc_id, request[:cmdstr])
|
|
137
|
+
when :stop
|
|
138
|
+
unregister(svc_id)
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def send_event(a)
|
|
143
|
+
Wakame.log.debug("Sending back a event: #{a.class}")
|
|
144
|
+
publish_to('agent_event', a.marshal)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def dump_attrs
|
|
148
|
+
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def find_checker(svc_id)
|
|
152
|
+
@checkers[svc_id]
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def register(svc_id, checker_type, *args)
|
|
156
|
+
chk = @checkers[svc_id]
|
|
157
|
+
if chk
|
|
158
|
+
Wakame.log.error("#{self.class}: Service registory duplication. #{svc_id}")
|
|
159
|
+
raise "Service registory duplication. #{svc_id}"
|
|
160
|
+
end
|
|
161
|
+
case checker_type.to_sym
|
|
162
|
+
when :pidfile
|
|
163
|
+
chk = PidFileChecker.new(svc_id, self, args[0])
|
|
164
|
+
when :command
|
|
165
|
+
chk = CommandChecker.new(svc_id, self, args[0])
|
|
166
|
+
else
|
|
167
|
+
raise "Unsupported checker type: #{checker_type}"
|
|
168
|
+
end
|
|
169
|
+
chk.start
|
|
170
|
+
@checkers[svc_id]=chk
|
|
171
|
+
Wakame.log.info("#{self.class}: Registered service checker for #{svc_id}")
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def unregister(svc_id)
|
|
175
|
+
chk = @checkers[svc_id]
|
|
176
|
+
if chk
|
|
177
|
+
chk.timer.stop
|
|
178
|
+
@checkers.delete(svc_id)
|
|
179
|
+
Wakame.log.info("#{self.class}: Unregistered service checker for #{svc_id}")
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
end
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
module Wakame
|
|
5
|
+
module Monitor
|
|
6
|
+
STATUS_OFFLINE=0
|
|
7
|
+
STATUS_ONLINE=1
|
|
8
|
+
STATUS_FAIL=2
|
|
9
|
+
|
|
10
|
+
def self.included(klass)
|
|
11
|
+
klass.class_eval {
|
|
12
|
+
attr_accessor :status, :agent
|
|
13
|
+
|
|
14
|
+
}
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def handle_request(request)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def setup(assigned_path)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def enable
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def disable
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def publish_to(exchange, data)
|
|
30
|
+
agent.publish_to(exchange, data)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
module Wakame
|
|
38
|
+
module Monitor
|
|
39
|
+
class CheckerTimer < EventMachine::PeriodicTimer
|
|
40
|
+
def initialize(time, &blk)
|
|
41
|
+
@interval = time
|
|
42
|
+
@code = proc {
|
|
43
|
+
begin
|
|
44
|
+
blk.call
|
|
45
|
+
rescue => e
|
|
46
|
+
Wakame.log.error(e)
|
|
47
|
+
end
|
|
48
|
+
}
|
|
49
|
+
stop
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def start
|
|
53
|
+
if !running?
|
|
54
|
+
@cancelled = false
|
|
55
|
+
schedule
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def stop
|
|
60
|
+
@cancelled = true
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def running?
|
|
64
|
+
!@cancelled
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|