wakame 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (147) hide show
  1. data/History.txt +20 -0
  2. data/README.rdoc +63 -0
  3. data/Rakefile +86 -0
  4. data/VERSION +1 -0
  5. data/app_generators/wakame/templates/README +0 -0
  6. data/app_generators/wakame/templates/Rakefile +18 -0
  7. data/app_generators/wakame/templates/bin/wakame-agent +9 -0
  8. data/app_generators/wakame/templates/bin/wakame-master +9 -0
  9. data/app_generators/wakame/templates/bin/wakameadm +9 -0
  10. data/app_generators/wakame/templates/cluster/resources/apache_app/apache_app.rb +54 -0
  11. data/app_generators/wakame/templates/cluster/resources/apache_app/conf/apache2.conf +46 -0
  12. data/app_generators/wakame/templates/cluster/resources/apache_app/conf/envvars-app +7 -0
  13. data/app_generators/wakame/templates/cluster/resources/apache_app/conf/sites-app.conf +23 -0
  14. data/app_generators/wakame/templates/cluster/resources/apache_app/conf/system-app.conf +67 -0
  15. data/app_generators/wakame/templates/cluster/resources/apache_app/init.d/apache2-app +192 -0
  16. data/app_generators/wakame/templates/cluster/resources/apache_lb/apache_lb.rb +56 -0
  17. data/app_generators/wakame/templates/cluster/resources/apache_lb/conf/apache2.conf +46 -0
  18. data/app_generators/wakame/templates/cluster/resources/apache_lb/conf/envvars-lb +6 -0
  19. data/app_generators/wakame/templates/cluster/resources/apache_lb/conf/sites-lb.conf +54 -0
  20. data/app_generators/wakame/templates/cluster/resources/apache_lb/conf/system-lb.conf +75 -0
  21. data/app_generators/wakame/templates/cluster/resources/apache_lb/init.d/apache2-lb +192 -0
  22. data/app_generators/wakame/templates/cluster/resources/apache_www/apache_www.rb +50 -0
  23. data/app_generators/wakame/templates/cluster/resources/apache_www/conf/apache2.conf +47 -0
  24. data/app_generators/wakame/templates/cluster/resources/apache_www/conf/envvars-www +7 -0
  25. data/app_generators/wakame/templates/cluster/resources/apache_www/conf/sites-www.conf +23 -0
  26. data/app_generators/wakame/templates/cluster/resources/apache_www/conf/system-www.conf +63 -0
  27. data/app_generators/wakame/templates/cluster/resources/apache_www/init.d/apache2-www +192 -0
  28. data/app_generators/wakame/templates/cluster/resources/ec2_elastic_ip/ec2_elastic_ip.rb +39 -0
  29. data/app_generators/wakame/templates/cluster/resources/mysql_master/conf/my.cnf +154 -0
  30. data/app_generators/wakame/templates/cluster/resources/mysql_master/init.d/mysql +185 -0
  31. data/app_generators/wakame/templates/cluster/resources/mysql_master/mysql_master.rb +174 -0
  32. data/app_generators/wakame/templates/config/boot.rb +85 -0
  33. data/app_generators/wakame/templates/config/cluster.rb +64 -0
  34. data/app_generators/wakame/templates/config/environments/common.rb +0 -0
  35. data/app_generators/wakame/templates/config/environments/ec2.rb +3 -0
  36. data/app_generators/wakame/templates/config/environments/stand_alone.rb +0 -0
  37. data/app_generators/wakame/templates/config/init.d/wakame-agent +72 -0
  38. data/app_generators/wakame/templates/config/init.d/wakame-master +73 -0
  39. data/app_generators/wakame/wakame_generator.rb +124 -0
  40. data/bin/wakame +18 -0
  41. data/contrib/imagesetup.sh +77 -0
  42. data/lib/ext/eventmachine.rb +86 -0
  43. data/lib/ext/shellwords.rb +172 -0
  44. data/lib/ext/uri.rb +15 -0
  45. data/lib/wakame/action.rb +156 -0
  46. data/lib/wakame/actions/destroy_instances.rb +39 -0
  47. data/lib/wakame/actions/launch_cluster.rb +31 -0
  48. data/lib/wakame/actions/migrate_service.rb +65 -0
  49. data/lib/wakame/actions/propagate_instances.rb +95 -0
  50. data/lib/wakame/actions/reload_service.rb +21 -0
  51. data/lib/wakame/actions/scaleout_when_high_load.rb +44 -0
  52. data/lib/wakame/actions/shutdown_cluster.rb +22 -0
  53. data/lib/wakame/actions/shutdown_vm.rb +19 -0
  54. data/lib/wakame/actions/start_service.rb +64 -0
  55. data/lib/wakame/actions/stop_service.rb +49 -0
  56. data/lib/wakame/actions/util.rb +71 -0
  57. data/lib/wakame/actor/daemon.rb +37 -0
  58. data/lib/wakame/actor/service_monitor.rb +21 -0
  59. data/lib/wakame/actor/system.rb +46 -0
  60. data/lib/wakame/actor.rb +33 -0
  61. data/lib/wakame/agent.rb +226 -0
  62. data/lib/wakame/amqp_client.rb +219 -0
  63. data/lib/wakame/command/action_status.rb +62 -0
  64. data/lib/wakame/command/actor.rb +23 -0
  65. data/lib/wakame/command/clone_service.rb +12 -0
  66. data/lib/wakame/command/launch_cluster.rb +15 -0
  67. data/lib/wakame/command/migrate_service.rb +21 -0
  68. data/lib/wakame/command/propagate_service.rb +24 -0
  69. data/lib/wakame/command/shutdown_cluster.rb +15 -0
  70. data/lib/wakame/command/status.rb +81 -0
  71. data/lib/wakame/command.rb +31 -0
  72. data/lib/wakame/command_queue.rb +44 -0
  73. data/lib/wakame/configuration.rb +93 -0
  74. data/lib/wakame/daemonize.rb +96 -0
  75. data/lib/wakame/event.rb +232 -0
  76. data/lib/wakame/event_dispatcher.rb +154 -0
  77. data/lib/wakame/graph.rb +79 -0
  78. data/lib/wakame/initializer.rb +162 -0
  79. data/lib/wakame/instance_counter.rb +78 -0
  80. data/lib/wakame/logger.rb +12 -0
  81. data/lib/wakame/manager/commands.rb +134 -0
  82. data/lib/wakame/master.rb +369 -0
  83. data/lib/wakame/monitor/agent.rb +50 -0
  84. data/lib/wakame/monitor/service.rb +183 -0
  85. data/lib/wakame/monitor.rb +69 -0
  86. data/lib/wakame/packets.rb +160 -0
  87. data/lib/wakame/queue_declare.rb +14 -0
  88. data/lib/wakame/rule.rb +116 -0
  89. data/lib/wakame/rule_engine.rb +202 -0
  90. data/lib/wakame/runner/administrator_command.rb +112 -0
  91. data/lib/wakame/runner/agent.rb +81 -0
  92. data/lib/wakame/runner/master.rb +93 -0
  93. data/lib/wakame/scheduler.rb +251 -0
  94. data/lib/wakame/service.rb +914 -0
  95. data/lib/wakame/template.rb +189 -0
  96. data/lib/wakame/trigger.rb +66 -0
  97. data/lib/wakame/triggers/instance_count_update.rb +45 -0
  98. data/lib/wakame/triggers/load_history.rb +107 -0
  99. data/lib/wakame/triggers/maintain_ssh_known_hosts.rb +43 -0
  100. data/lib/wakame/triggers/process_command.rb +34 -0
  101. data/lib/wakame/triggers/shutdown_unused_vm.rb +16 -0
  102. data/lib/wakame/util.rb +569 -0
  103. data/lib/wakame/vm_manipulator.rb +186 -0
  104. data/lib/wakame.rb +59 -0
  105. data/tasks/ec2.rake +127 -0
  106. data/tests/cluster.json +3 -0
  107. data/tests/conf/a +1 -0
  108. data/tests/conf/b +1 -0
  109. data/tests/conf/c +1 -0
  110. data/tests/setup_agent.rb +39 -0
  111. data/tests/setup_master.rb +28 -0
  112. data/tests/test_actor.rb +54 -0
  113. data/tests/test_agent.rb +218 -0
  114. data/tests/test_amqp_client.rb +94 -0
  115. data/tests/test_graph.rb +36 -0
  116. data/tests/test_master.rb +167 -0
  117. data/tests/test_monitor.rb +47 -0
  118. data/tests/test_rule_engine.rb +127 -0
  119. data/tests/test_scheduler.rb +123 -0
  120. data/tests/test_service.rb +60 -0
  121. data/tests/test_template.rb +67 -0
  122. data/tests/test_uri_amqp.rb +19 -0
  123. data/tests/test_util.rb +71 -0
  124. data/wakame_generators/resource/resource_generator.rb +54 -0
  125. data/wakame_generators/resource/templates/apache_app/apache_app.rb +60 -0
  126. data/wakame_generators/resource/templates/apache_app/conf/apache2.conf +46 -0
  127. data/wakame_generators/resource/templates/apache_app/conf/envvars-app +7 -0
  128. data/wakame_generators/resource/templates/apache_app/conf/sites-app.conf +23 -0
  129. data/wakame_generators/resource/templates/apache_app/conf/system-app.conf +67 -0
  130. data/wakame_generators/resource/templates/apache_app/init.d/apache2-app +192 -0
  131. data/wakame_generators/resource/templates/apache_lb/apache_lb.rb +67 -0
  132. data/wakame_generators/resource/templates/apache_lb/conf/apache2.conf +46 -0
  133. data/wakame_generators/resource/templates/apache_lb/conf/envvars-lb +6 -0
  134. data/wakame_generators/resource/templates/apache_lb/conf/sites-lb.conf +54 -0
  135. data/wakame_generators/resource/templates/apache_lb/conf/system-lb.conf +75 -0
  136. data/wakame_generators/resource/templates/apache_lb/init.d/apache2-lb +192 -0
  137. data/wakame_generators/resource/templates/apache_www/apache_www.rb +56 -0
  138. data/wakame_generators/resource/templates/apache_www/conf/apache2.conf +47 -0
  139. data/wakame_generators/resource/templates/apache_www/conf/envvars-www +7 -0
  140. data/wakame_generators/resource/templates/apache_www/conf/sites-www.conf +23 -0
  141. data/wakame_generators/resource/templates/apache_www/conf/system-www.conf +63 -0
  142. data/wakame_generators/resource/templates/apache_www/init.d/apache2-www +192 -0
  143. data/wakame_generators/resource/templates/ec2_elastic_ip/ec2_elastic_ip.rb +39 -0
  144. data/wakame_generators/resource/templates/mysql_master/conf/my.cnf +154 -0
  145. data/wakame_generators/resource/templates/mysql_master/init.d/mysql +185 -0
  146. data/wakame_generators/resource/templates/mysql_master/mysql_master.rb +119 -0
  147. metadata +289 -0
@@ -0,0 +1,369 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'rubygems'
4
+
5
+ require 'wakame'
6
+ require 'wakame/packets'
7
+ require 'wakame/service'
8
+ require 'wakame/queue_declare'
9
+ require 'wakame/vm_manipulator'
10
+
11
+ module Wakame
12
+
13
+ class AgentMonitor
14
+ include ThreadImmutable
15
+ attr_reader :registered_agents, :unregistered_agents, :master, :gc_period
16
+
17
+
18
+ def initialize(master)
19
+ bind_thread
20
+ @master = master
21
+ @registered_agents = {}
22
+ @unregistered_agents = {}
23
+ @agent_timeout = 31.to_f
24
+ @agent_kill_timeout = @agent_timeout * 2
25
+ @gc_period = 20.to_f
26
+
27
+ # GC event trigger for agent timer & status
28
+ calc_agent_timeout = proc {
29
+ #Wakame.log.debug("Started agent GC : agents.size=#{@registered_agents.size}")
30
+ kill_list=[]
31
+ registered_agents.each { |agent_id, agent|
32
+ next if agent.status == Service::Agent::STATUS_OFFLINE
33
+ diff_time = Time.now - agent.last_ping_at
34
+ #Wakame.log.debug "AgentMonitor GC : #{agent_id}: #{diff_time}"
35
+ if diff_time > @agent_timeout.to_f
36
+ agent.status = Service::Agent::STATUS_TIMEOUT
37
+ end
38
+
39
+ if diff_time > @agent_kill_timeout.to_f
40
+ kill_list << agent_id
41
+ end
42
+ }
43
+
44
+ kill_list.each { |agent_id|
45
+ agent = @agents.delete(agent_id)
46
+ ED.fire_event(Event::AgentUnMonitored.new(agent)) unless agent.nil?
47
+ }
48
+ #Wakame.log.debug("Finished agent GC")
49
+ }
50
+
51
+ @agent_timeout_timer = EventMachine::PeriodicTimer.new(@gc_period, calc_agent_timeout)
52
+
53
+ master.add_subscriber('registry') { |data|
54
+ data = eval(data)
55
+
56
+ agent_id = data[:agent_id]
57
+ case data[:type]
58
+ when 'Wakame::Packets::Register'
59
+ register_agent(data)
60
+ when 'Wakame::Packets::UnRegister'
61
+ unregister_agent(agent_id)
62
+ end
63
+ }
64
+
65
+ master.add_subscriber('ping') { |data|
66
+ ping = eval(data)
67
+ # Skip the old ping responses before starting master node.
68
+ next if Time.parse(ping[:responded_at]) < master.started_at
69
+
70
+ # Variable update function for the common members
71
+ set_report_values = proc { |agent|
72
+ agent.status = Service::Agent::STATUS_ONLINE
73
+ agent.uptime = 0
74
+ agent.last_ping_at = Time.parse(ping[:responded_at])
75
+
76
+ agent.attr = ping[:attrs]
77
+
78
+ agent.services.clear
79
+ ping.services.each { |svc_id, i|
80
+ agent.services[svc_id] = master.service_cluster.instances[svc_id]
81
+ }
82
+ }
83
+
84
+ agent = agent(ping[:agent_id])
85
+ if agent.nil?
86
+ agent = Service::Agent.new(ping[:agent_id])
87
+
88
+ set_report_values.call(agent)
89
+
90
+ unregistered_agents[ping[:agent_id]]=agent
91
+ else
92
+ set_report_values.call(agent)
93
+ end
94
+
95
+
96
+ ED.fire_event(Event::AgentPong.new(agent))
97
+ }
98
+
99
+ master.add_subscriber('agent_event') { |data|
100
+ response = eval(data)
101
+ #p response
102
+ case response[:type]
103
+ when 'Wakame::Packets::ServiceStatusChanged'
104
+ svc_inst = Service::ServiceInstance.instance_collection[response[:svc_id]]
105
+ if svc_inst
106
+ response_time = Time.parse(response[:responded_at])
107
+ svc_inst.update_status(response[:new_status], response_time, response[:fail_message])
108
+
109
+ # tmp_event = Event::ServiceStatusChanged.new(response[:svc_id], svc_inst.property, response[:status], response[:previous_status])
110
+ # tmp_event.time = response_time
111
+ # ED.fire_event(tmp_event)
112
+
113
+ # if response[:previous_status] != Service::STATUS_ONLINE && response[:new_status] == Service::STATUS_ONLINE
114
+ # tmp_event = Event::ServiceOnline.new(tmp_event.instance_id, svc_inst.property)
115
+ # tmp_event.time = response_time
116
+ # ED.fire_event(tmp_event)
117
+ # elsif response[:previous_status] != Service::STATUS_OFFLINE && response[:new_status] == Service::STATUS_OFFLINE
118
+ # tmp_event = Event::ServiceOffline.new(tmp_event.instance_id, svc_inst.property)
119
+ # tmp_event.time = response_time
120
+ # ED.fire_event(tmp_event)
121
+ # elsif response[:previous_status] != Service::STATUS_FAIL && response[:new_status] == Service::STATUS_FAIL
122
+ # tmp_event = Event::ServiceFailed.new(tmp_event.instance_id, svc_inst.property, response[:fail_message])
123
+ # tmp_event.time = response_time
124
+ # ED.fire_event(tmp_event)
125
+ # end
126
+ end
127
+ when 'Wakame::Packets::ActorResponse'
128
+ case response[:status]
129
+ when Actor::STATUS_RUNNING
130
+ ED.fire_event(Event::ActorProgress.new(response[:agent_id], response[:token], 0))
131
+ else
132
+ ED.fire_event(Event::ActorComplete.new(response[:agent_id], response[:token], response[:status]))
133
+ end
134
+ else
135
+ Wakame.log.warn("#{self.class}: Unhandled agent response: #{response[:type]}")
136
+ end
137
+ }
138
+
139
+ end
140
+
141
+
142
+ def agent(agent_id)
143
+ registered_agents[agent_id] || unregistered_agents[agent_id]
144
+ end
145
+
146
+ def register_agent(data)
147
+ agent_id = data[:agent_id]
148
+ agent = registered_agents[agent_id]
149
+ if agent.nil?
150
+ agent = unregistered_agents[agent_id]
151
+ if agent.nil?
152
+ # The agent is going to be registered at first time.
153
+ agent = Service::Agent.new(agent_id)
154
+ registered_agents[agent_id] = agent
155
+ else
156
+ # Move the reference from unregistered group to the registered group.
157
+ registered_agents[agent_id] = unregistered_agents[agent_id]
158
+ unregistered_agents.delete(agent_id)
159
+ end
160
+ Wakame.log.debug("The Agent has been registered: #{data.inspect}")
161
+ #Wakame.log.debug(unregistered_agents)
162
+ ED.fire_event(Event::AgentMonitored.new(agent))
163
+ end
164
+ agent.root_path = data[:root_path]
165
+ end
166
+
167
+ def unregister_agent(agent_id)
168
+ agent = registered_agents[agent_id]
169
+ if agent
170
+ unregistered_agents[agent_id] = registered_agents[agent_id]
171
+ registered_agents.delete(agent_id)
172
+ ED.fire_event(Event::AgentUnMonitored.new(agent))
173
+ end
174
+ end
175
+
176
+
177
+ # def bind_agent(service_instance, &filter)
178
+ # agent_id, agent = @agents.find { |agent_id, agent|
179
+
180
+ # next false if agent.has_service_type?(service_instance.property.class)
181
+ # filter.call(agent)
182
+ # }
183
+ # return nil if agent.nil?
184
+ # service_instance.bind_agent(agent)
185
+ # agent
186
+ # end
187
+
188
+ # def unbind_agent(service_instance)
189
+ # service_instance.unbind_agent
190
+ # end
191
+
192
+ # Retruns the master local agent object
193
+ def master_local
194
+ agent = registered_agents[@master.master_local_agent_id]
195
+ puts "#{agent} = registered_agents[#{@master.master_local_agent_id}]"
196
+ raise "Master does not identify the master local agent yet." if agent.nil?
197
+ agent
198
+ end
199
+
200
+ def each_online(&blk)
201
+ registered_agents.each { |k, v|
202
+ next if v.status != Service::Agent::STATUS_ONLINE
203
+ blk.call(v)
204
+ }
205
+ end
206
+
207
+ def dump_status
208
+ ag = []
209
+ res = {:registered=>[], :unregistered=>[]}
210
+
211
+ @registered_agents.each { |key, a|
212
+ res[:registered] << a.dump_status
213
+ }
214
+ @unregistered_agents.each { |key, a|
215
+ res[:unregistered] << a.dump_status
216
+ }
217
+ res
218
+ end
219
+ end
220
+
221
+ class Master
222
+ include Wakame::AMQPClient
223
+ include Wakame::QueueDeclare
224
+
225
+ define_queue 'agent_event', 'agent_event'
226
+ define_queue 'ping', 'ping'
227
+ define_queue 'registry', 'registry'
228
+
229
+ attr_reader :command_queue, :agent_monitor, :configuration, :service_cluster, :started_at
230
+
231
+ def initialize(opts={})
232
+ pre_setup
233
+
234
+ connect(opts) {
235
+ post_setup
236
+ }
237
+ Wakame.log.info("Started master process : WAKAME_ROOT=#{Wakame.config.root_path} WAKAME_ENV=#{Wakame.config.environment}")
238
+ end
239
+
240
+
241
+ # def send_agent_command(command, agent_id=nil)
242
+ # raise TypeError unless command.is_a? Packets::RequestBase
243
+ # EM.next_tick {
244
+ # if agent_id
245
+ # publish_to('agent_command', "agent_id.#{agent_id}", Marshal.dump(command))
246
+ # else
247
+ # publish_to('agent_command', '*', Marshal.dump(command))
248
+ # end
249
+ # }
250
+ # end
251
+
252
+ def actor_request(agent_id, path, *args)
253
+ request = Wakame::Packets::ActorRequest.new(agent_id, Util.gen_id, path, *args)
254
+ ActorRequest.new(self, request)
255
+ end
256
+
257
+
258
+ def attr
259
+ agent_monitor.master_local.attr
260
+ end
261
+
262
+
263
+ def cleanup
264
+ @command_queue.shutdown
265
+ end
266
+
267
+ def master_local_agent_id
268
+ @master_local_agent_id
269
+ end
270
+
271
+ private
272
+ def determine_agent_id
273
+ if Wakame.config.environment == :EC2
274
+ @master_local_agent_id = VmManipulator::EC2::MetadataService.query_metadata_uri('instance-id')
275
+ else
276
+ @master_local_agent_id = VmManipulator::StandAlone::INSTANCE_ID
277
+ end
278
+ end
279
+
280
+ def pre_setup
281
+ determine_agent_id
282
+ @started_at = Time.now
283
+
284
+ EM.barrier {
285
+ Wakame.log.debug("Binding thread info to EventDispatcher.")
286
+ EventDispatcher.instance.bind_thread(Thread.current)
287
+ }
288
+ end
289
+
290
+ def post_setup
291
+ raise 'has to be put in EM.run context' unless EM.reactor_running?
292
+ @command_queue = CommandQueue.new(self)
293
+ @agent_monitor = AgentMonitor.new(self)
294
+
295
+ @service_cluster = Util.new_(Wakame.config.cluster_class, self)
296
+ end
297
+
298
+ end
299
+
300
+
301
+ class ActorRequest
302
+ attr_reader :master
303
+
304
+ def initialize(master, packet)
305
+ raise TypeError unless packet.is_a?(Wakame::Packets::ActorRequest)
306
+
307
+ @master = master
308
+ @packet = packet
309
+ @requested = false
310
+ @event_ticket = nil
311
+ @wait_lock = ::Queue.new
312
+ end
313
+
314
+
315
+ def request
316
+ raise "The request has already been sent." if @requested
317
+
318
+ @event_ticket = ED.subscribe(Event::ActorComplete) { |event|
319
+ if event.token == @packet.token
320
+
321
+ # Any of status except RUNNING are accomplishment of the actor request.
322
+ Wakame.log.debug("#{self.class}: The actor request has been completed: token=#{self.token}, status=#{event.status}")
323
+ ED.unsubscribe(@event_ticket)
324
+ @wait_lock.enq(event.status)
325
+ end
326
+ }
327
+ Wakame.log.debug("#{self.class}: Send the actor request: #{@packet.path}@#{@packet.agent_id}, token=#{self.token}")
328
+ master.publish_to('agent_command', "agent_id.#{@packet.agent_id}", @packet.marshal)
329
+ @requested = true
330
+ self
331
+ end
332
+
333
+
334
+ def token
335
+ @packet.token
336
+ end
337
+
338
+ def progress
339
+ check_requested?
340
+ raise NotImplementedError
341
+ end
342
+
343
+ def cancel
344
+ check_requested?
345
+ raise NotImplementedError
346
+
347
+ #master.publish_to('agent_command', "agent_id.#{@packet.agent_id}", Wakame::Packets::ActorCancel.new(@packet.agent_id, ).marshal)
348
+ #ED.unsubscribe(@event_ticket)
349
+ end
350
+
351
+ def wait_completion(tout=60*30)
352
+ check_requested?
353
+ timeout(tout) {
354
+ Wakame.log.debug("#{self.class}: Waiting a response from the actor: #{@packet.path}@#{@packet.agent_id}, token=#{@packet.token}")
355
+ ret_status = @wait_lock.deq
356
+ Wakame.log.debug("#{self.class}: A response (status=#{ret_status}) back from the actor: #{@packet.path}@#{@packet.agent_id}, token=#{@packet.token}")
357
+ if ret_status == Actor::STATUS_FAILED
358
+ raise RuntimeError, "Failed status has been returned: Actor Request #{token}"
359
+ end
360
+ }
361
+ end
362
+ alias :wait :wait_completion
363
+
364
+ private
365
+ def check_requested?
366
+ raise "The request has not been sent yet." unless @requested
367
+ end
368
+ end
369
+ end
@@ -0,0 +1,50 @@
1
+
2
+ class Wakame::Monitor::Agent
3
+ include Wakame::Monitor
4
+
5
+ def initialize
6
+ @status = STATUS_ONLINE
7
+ end
8
+
9
+ def send_ping(hash)
10
+ publish_to('ping', Wakame::Packets::Ping.new(agent, hash[:attrs], hash[:actors], hash[:monitors], hash[:services]).marshal)
11
+ end
12
+
13
+ def setup(path)
14
+ # Send the first ping signal as soon as possible since the ping contanins vital information to construct the Agent object on master node.
15
+ send_ping(check())
16
+
17
+ # Setup periodical ping publisher.
18
+ @timer = CheckerTimer.new(10) {
19
+ send_ping(check())
20
+ }
21
+ @timer.start
22
+ end
23
+
24
+
25
+ def check
26
+ if Wakame.environment == :EC2
27
+ require 'wakame/vm_manipulator'
28
+ attrs = Wakame::VmManipulator::EC2::MetadataService.fetch_local_attrs
29
+ else
30
+ attrs = Wakame::VmManipulator::StandAlone.fetch_local_attrs
31
+ end
32
+
33
+ res = {:attrs=>attrs, :monitors=>[], :actors=>[], :services=>{}}
34
+ EM.barrier {
35
+ agent.monitor_registry.monitors.each { |key, m|
36
+ res[:monitors] << {:class=>m.class.to_s}
37
+ }
38
+ agent.actor_registry.actors.each { |key, a|
39
+ res[:actors] << {:class=>a.class.to_s}
40
+ }
41
+
42
+ svcmon = agent.monitor_registry.find_monitor('/service')
43
+ svcmon.checkers.each { |svc_id, a|
44
+ res[:services][svc_id]={:status=>a.status}
45
+ }
46
+ }
47
+
48
+ res
49
+ end
50
+ end
@@ -0,0 +1,183 @@
1
+
2
+ require 'open4'
3
+ require 'wakame'
4
+
5
+ class Wakame::Monitor::Service
6
+
7
+ class ServiceChecker
8
+ #include Wakame::Packets::Agent
9
+ attr_reader :timer, :svc_id
10
+ attr_accessor :last_checked_at, :status
11
+
12
+ def initialize(svc_id, svc_mon)
13
+ @svc_id = svc_id
14
+ @service_monitor = svc_mon
15
+ @status = Wakame::Service::STATUS_OFFLINE
16
+ count = 0
17
+ @timer = Wakame::Monitor::CheckerTimer.new(3) {
18
+ self.signal_checker
19
+ }
20
+ end
21
+
22
+ def start
23
+ if !@timer.running?
24
+ @timer.start
25
+ @service_monitor.send_event(Wakame::Packets::MonitoringStarted.new(@service_monitor.agent, self.svc_id))
26
+ Wakame.log.debug("#{self.class}: Started the checker")
27
+ end
28
+ end
29
+
30
+ def stop
31
+ if @timer.running?
32
+ @timer.stop
33
+ @service_monitor.send_event(Wakame::Packets::MonitoringStopped.new(@service_monitor.agent, self.svc_id))
34
+ Wakame.log.debug("#{self.class}: Stopped the checker")
35
+ end
36
+ end
37
+
38
+ def check
39
+ end
40
+
41
+ protected
42
+ def signal_checker
43
+ EventMachine.defer proc {
44
+ res = begin
45
+ self.last_checked_at = Time.now
46
+ res = self.check
47
+ res
48
+ rescue => e
49
+ Wakame.log.error("#{self.class}: #{e}")
50
+ Wakame.log.error(e)
51
+ e
52
+ end
53
+ Thread.pass
54
+ res
55
+ }, proc { |res|
56
+
57
+ case res
58
+ when Exception
59
+ update_status(Wakame::Service::STATUS_FAIL)
60
+ when Wakame::Service::STATUS_ONLINE, Wakame::Service::STATUS_OFFLINE
61
+ update_status(res)
62
+ else
63
+ Wakame.log.error("#{self.class}: Unknown response type from the checker: #{self.svc_id}, ")
64
+ end
65
+ }
66
+ end
67
+
68
+ def update_status(new_status)
69
+ prev_status = self.status
70
+ if prev_status != new_status
71
+ self.status = new_status
72
+ @service_monitor.send_event(Wakame::Packets::ServiceStatusChanged.new(@service_monitor.agent, self.svc_id, prev_status, new_status))
73
+ end
74
+ end
75
+ end
76
+
77
+ class PidFileChecker < ServiceChecker
78
+ def initialize(svc_id, svc_mon, pidpath)
79
+ super(svc_id, svc_mon)
80
+ @pidpath = pidpath
81
+ end
82
+
83
+ def check
84
+ return Wakame::Service::STATUS_OFFLINE unless File.exist?(@pidpath)
85
+ #cmdstat = ::Open4.popen4("ps -p \"`cat '#{@pidpath}'`\" > /dev/null"){}
86
+ #cmdstat.exitstatus == 0 ? Wakame::Service::STATUS_ONLINE : Wakame::Service::STATUS_OFFLINE
87
+
88
+ cmdres = system("ps -p \"`cat '#{@pidpath}'`\" > /dev/null")
89
+ # system() returns true or false.
90
+ cmdres ? Wakame::Service::STATUS_ONLINE : Wakame::Service::STATUS_OFFLINE
91
+ end
92
+ end
93
+
94
+ class CommandChecker < ServiceChecker
95
+ attr_reader :command
96
+
97
+ def initialize(svc_id, svc_mon, cmdstr)
98
+ super(svc_id, svc_mon)
99
+ @command = cmdstr
100
+ end
101
+
102
+ def check()
103
+ outputs =[]
104
+ cmdstat = ::Open4.popen4(@command) { |pid, stdin, stdout, stderr|
105
+ stdout.each { |l|
106
+ outputs << l
107
+ }
108
+ stderr.each { |l|
109
+ outputs << l
110
+ }
111
+ }
112
+ Wakame.log.debug("#{self.class}: Exit Status #{@command}: #{cmdstat}")
113
+ if outputs.size > 0
114
+ @service_monitor.send_event(Wakame::Packets::MonitoringOutput.new(@service_monitor.agent, self.svc_id, outputs.join('')))
115
+ end
116
+ cmdstat.exitstatus == 0 ? Wakame::Service::STATUS_ONLINE : Wakame::Service::STATUS_OFFLINE
117
+ end
118
+ end
119
+
120
+ include Wakame::Monitor
121
+
122
+ attr_reader :checkers
123
+
124
+ def initialize
125
+ @status = Wakame::Service::STATUS_ONLINE
126
+ @checkers = {}
127
+ end
128
+
129
+ def setup(path)
130
+ end
131
+
132
+ def handle_request(request)
133
+ svc_id = request[:svc_id]
134
+ case request[:command]
135
+ when :start
136
+ register(svc_id, request[:cmdstr])
137
+ when :stop
138
+ unregister(svc_id)
139
+ end
140
+ end
141
+
142
+ def send_event(a)
143
+ Wakame.log.debug("Sending back a event: #{a.class}")
144
+ publish_to('agent_event', a.marshal)
145
+ end
146
+
147
+ def dump_attrs
148
+
149
+ end
150
+
151
+ def find_checker(svc_id)
152
+ @checkers[svc_id]
153
+ end
154
+
155
+ def register(svc_id, checker_type, *args)
156
+ chk = @checkers[svc_id]
157
+ if chk
158
+ Wakame.log.error("#{self.class}: Service registory duplication. #{svc_id}")
159
+ raise "Service registory duplication. #{svc_id}"
160
+ end
161
+ case checker_type.to_sym
162
+ when :pidfile
163
+ chk = PidFileChecker.new(svc_id, self, args[0])
164
+ when :command
165
+ chk = CommandChecker.new(svc_id, self, args[0])
166
+ else
167
+ raise "Unsupported checker type: #{checker_type}"
168
+ end
169
+ chk.start
170
+ @checkers[svc_id]=chk
171
+ Wakame.log.info("#{self.class}: Registered service checker for #{svc_id}")
172
+ end
173
+
174
+ def unregister(svc_id)
175
+ chk = @checkers[svc_id]
176
+ if chk
177
+ chk.timer.stop
178
+ @checkers.delete(svc_id)
179
+ Wakame.log.info("#{self.class}: Unregistered service checker for #{svc_id}")
180
+ end
181
+ end
182
+
183
+ end
@@ -0,0 +1,69 @@
1
+
2
+
3
+
4
+ module Wakame
5
+ module Monitor
6
+ STATUS_OFFLINE=0
7
+ STATUS_ONLINE=1
8
+ STATUS_FAIL=2
9
+
10
+ def self.included(klass)
11
+ klass.class_eval {
12
+ attr_accessor :status, :agent
13
+
14
+ }
15
+ end
16
+
17
+ def handle_request(request)
18
+ end
19
+
20
+ def setup(assigned_path)
21
+ end
22
+
23
+ def enable
24
+ end
25
+
26
+ def disable
27
+ end
28
+
29
+ def publish_to(exchange, data)
30
+ agent.publish_to(exchange, data)
31
+ end
32
+
33
+ end
34
+ end
35
+
36
+
37
+ module Wakame
38
+ module Monitor
39
+ class CheckerTimer < EventMachine::PeriodicTimer
40
+ def initialize(time, &blk)
41
+ @interval = time
42
+ @code = proc {
43
+ begin
44
+ blk.call
45
+ rescue => e
46
+ Wakame.log.error(e)
47
+ end
48
+ }
49
+ stop
50
+ end
51
+
52
+ def start
53
+ if !running?
54
+ @cancelled = false
55
+ schedule
56
+ end
57
+ end
58
+
59
+ def stop
60
+ @cancelled = true
61
+ end
62
+
63
+ def running?
64
+ !@cancelled
65
+ end
66
+
67
+ end
68
+ end
69
+ end