wakame 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. data/History.txt +20 -0
  2. data/README.rdoc +63 -0
  3. data/Rakefile +86 -0
  4. data/VERSION +1 -0
  5. data/app_generators/wakame/templates/README +0 -0
  6. data/app_generators/wakame/templates/Rakefile +18 -0
  7. data/app_generators/wakame/templates/bin/wakame-agent +9 -0
  8. data/app_generators/wakame/templates/bin/wakame-master +9 -0
  9. data/app_generators/wakame/templates/bin/wakameadm +9 -0
  10. data/app_generators/wakame/templates/cluster/resources/apache_app/apache_app.rb +54 -0
  11. data/app_generators/wakame/templates/cluster/resources/apache_app/conf/apache2.conf +46 -0
  12. data/app_generators/wakame/templates/cluster/resources/apache_app/conf/envvars-app +7 -0
  13. data/app_generators/wakame/templates/cluster/resources/apache_app/conf/sites-app.conf +23 -0
  14. data/app_generators/wakame/templates/cluster/resources/apache_app/conf/system-app.conf +67 -0
  15. data/app_generators/wakame/templates/cluster/resources/apache_app/init.d/apache2-app +192 -0
  16. data/app_generators/wakame/templates/cluster/resources/apache_lb/apache_lb.rb +56 -0
  17. data/app_generators/wakame/templates/cluster/resources/apache_lb/conf/apache2.conf +46 -0
  18. data/app_generators/wakame/templates/cluster/resources/apache_lb/conf/envvars-lb +6 -0
  19. data/app_generators/wakame/templates/cluster/resources/apache_lb/conf/sites-lb.conf +54 -0
  20. data/app_generators/wakame/templates/cluster/resources/apache_lb/conf/system-lb.conf +75 -0
  21. data/app_generators/wakame/templates/cluster/resources/apache_lb/init.d/apache2-lb +192 -0
  22. data/app_generators/wakame/templates/cluster/resources/apache_www/apache_www.rb +50 -0
  23. data/app_generators/wakame/templates/cluster/resources/apache_www/conf/apache2.conf +47 -0
  24. data/app_generators/wakame/templates/cluster/resources/apache_www/conf/envvars-www +7 -0
  25. data/app_generators/wakame/templates/cluster/resources/apache_www/conf/sites-www.conf +23 -0
  26. data/app_generators/wakame/templates/cluster/resources/apache_www/conf/system-www.conf +63 -0
  27. data/app_generators/wakame/templates/cluster/resources/apache_www/init.d/apache2-www +192 -0
  28. data/app_generators/wakame/templates/cluster/resources/ec2_elastic_ip/ec2_elastic_ip.rb +39 -0
  29. data/app_generators/wakame/templates/cluster/resources/mysql_master/conf/my.cnf +154 -0
  30. data/app_generators/wakame/templates/cluster/resources/mysql_master/init.d/mysql +185 -0
  31. data/app_generators/wakame/templates/cluster/resources/mysql_master/mysql_master.rb +174 -0
  32. data/app_generators/wakame/templates/config/boot.rb +85 -0
  33. data/app_generators/wakame/templates/config/cluster.rb +64 -0
  34. data/app_generators/wakame/templates/config/environments/common.rb +0 -0
  35. data/app_generators/wakame/templates/config/environments/ec2.rb +3 -0
  36. data/app_generators/wakame/templates/config/environments/stand_alone.rb +0 -0
  37. data/app_generators/wakame/templates/config/init.d/wakame-agent +72 -0
  38. data/app_generators/wakame/templates/config/init.d/wakame-master +73 -0
  39. data/app_generators/wakame/wakame_generator.rb +124 -0
  40. data/bin/wakame +18 -0
  41. data/contrib/imagesetup.sh +77 -0
  42. data/lib/ext/eventmachine.rb +86 -0
  43. data/lib/ext/shellwords.rb +172 -0
  44. data/lib/ext/uri.rb +15 -0
  45. data/lib/wakame/action.rb +156 -0
  46. data/lib/wakame/actions/destroy_instances.rb +39 -0
  47. data/lib/wakame/actions/launch_cluster.rb +31 -0
  48. data/lib/wakame/actions/migrate_service.rb +65 -0
  49. data/lib/wakame/actions/propagate_instances.rb +95 -0
  50. data/lib/wakame/actions/reload_service.rb +21 -0
  51. data/lib/wakame/actions/scaleout_when_high_load.rb +44 -0
  52. data/lib/wakame/actions/shutdown_cluster.rb +22 -0
  53. data/lib/wakame/actions/shutdown_vm.rb +19 -0
  54. data/lib/wakame/actions/start_service.rb +64 -0
  55. data/lib/wakame/actions/stop_service.rb +49 -0
  56. data/lib/wakame/actions/util.rb +71 -0
  57. data/lib/wakame/actor/daemon.rb +37 -0
  58. data/lib/wakame/actor/service_monitor.rb +21 -0
  59. data/lib/wakame/actor/system.rb +46 -0
  60. data/lib/wakame/actor.rb +33 -0
  61. data/lib/wakame/agent.rb +226 -0
  62. data/lib/wakame/amqp_client.rb +219 -0
  63. data/lib/wakame/command/action_status.rb +62 -0
  64. data/lib/wakame/command/actor.rb +23 -0
  65. data/lib/wakame/command/clone_service.rb +12 -0
  66. data/lib/wakame/command/launch_cluster.rb +15 -0
  67. data/lib/wakame/command/migrate_service.rb +21 -0
  68. data/lib/wakame/command/propagate_service.rb +24 -0
  69. data/lib/wakame/command/shutdown_cluster.rb +15 -0
  70. data/lib/wakame/command/status.rb +81 -0
  71. data/lib/wakame/command.rb +31 -0
  72. data/lib/wakame/command_queue.rb +44 -0
  73. data/lib/wakame/configuration.rb +93 -0
  74. data/lib/wakame/daemonize.rb +96 -0
  75. data/lib/wakame/event.rb +232 -0
  76. data/lib/wakame/event_dispatcher.rb +154 -0
  77. data/lib/wakame/graph.rb +79 -0
  78. data/lib/wakame/initializer.rb +162 -0
  79. data/lib/wakame/instance_counter.rb +78 -0
  80. data/lib/wakame/logger.rb +12 -0
  81. data/lib/wakame/manager/commands.rb +134 -0
  82. data/lib/wakame/master.rb +369 -0
  83. data/lib/wakame/monitor/agent.rb +50 -0
  84. data/lib/wakame/monitor/service.rb +183 -0
  85. data/lib/wakame/monitor.rb +69 -0
  86. data/lib/wakame/packets.rb +160 -0
  87. data/lib/wakame/queue_declare.rb +14 -0
  88. data/lib/wakame/rule.rb +116 -0
  89. data/lib/wakame/rule_engine.rb +202 -0
  90. data/lib/wakame/runner/administrator_command.rb +112 -0
  91. data/lib/wakame/runner/agent.rb +81 -0
  92. data/lib/wakame/runner/master.rb +93 -0
  93. data/lib/wakame/scheduler.rb +251 -0
  94. data/lib/wakame/service.rb +914 -0
  95. data/lib/wakame/template.rb +189 -0
  96. data/lib/wakame/trigger.rb +66 -0
  97. data/lib/wakame/triggers/instance_count_update.rb +45 -0
  98. data/lib/wakame/triggers/load_history.rb +107 -0
  99. data/lib/wakame/triggers/maintain_ssh_known_hosts.rb +43 -0
  100. data/lib/wakame/triggers/process_command.rb +34 -0
  101. data/lib/wakame/triggers/shutdown_unused_vm.rb +16 -0
  102. data/lib/wakame/util.rb +569 -0
  103. data/lib/wakame/vm_manipulator.rb +186 -0
  104. data/lib/wakame.rb +59 -0
  105. data/tasks/ec2.rake +127 -0
  106. data/tests/cluster.json +3 -0
  107. data/tests/conf/a +1 -0
  108. data/tests/conf/b +1 -0
  109. data/tests/conf/c +1 -0
  110. data/tests/setup_agent.rb +39 -0
  111. data/tests/setup_master.rb +28 -0
  112. data/tests/test_actor.rb +54 -0
  113. data/tests/test_agent.rb +218 -0
  114. data/tests/test_amqp_client.rb +94 -0
  115. data/tests/test_graph.rb +36 -0
  116. data/tests/test_master.rb +167 -0
  117. data/tests/test_monitor.rb +47 -0
  118. data/tests/test_rule_engine.rb +127 -0
  119. data/tests/test_scheduler.rb +123 -0
  120. data/tests/test_service.rb +60 -0
  121. data/tests/test_template.rb +67 -0
  122. data/tests/test_uri_amqp.rb +19 -0
  123. data/tests/test_util.rb +71 -0
  124. data/wakame_generators/resource/resource_generator.rb +54 -0
  125. data/wakame_generators/resource/templates/apache_app/apache_app.rb +60 -0
  126. data/wakame_generators/resource/templates/apache_app/conf/apache2.conf +46 -0
  127. data/wakame_generators/resource/templates/apache_app/conf/envvars-app +7 -0
  128. data/wakame_generators/resource/templates/apache_app/conf/sites-app.conf +23 -0
  129. data/wakame_generators/resource/templates/apache_app/conf/system-app.conf +67 -0
  130. data/wakame_generators/resource/templates/apache_app/init.d/apache2-app +192 -0
  131. data/wakame_generators/resource/templates/apache_lb/apache_lb.rb +67 -0
  132. data/wakame_generators/resource/templates/apache_lb/conf/apache2.conf +46 -0
  133. data/wakame_generators/resource/templates/apache_lb/conf/envvars-lb +6 -0
  134. data/wakame_generators/resource/templates/apache_lb/conf/sites-lb.conf +54 -0
  135. data/wakame_generators/resource/templates/apache_lb/conf/system-lb.conf +75 -0
  136. data/wakame_generators/resource/templates/apache_lb/init.d/apache2-lb +192 -0
  137. data/wakame_generators/resource/templates/apache_www/apache_www.rb +56 -0
  138. data/wakame_generators/resource/templates/apache_www/conf/apache2.conf +47 -0
  139. data/wakame_generators/resource/templates/apache_www/conf/envvars-www +7 -0
  140. data/wakame_generators/resource/templates/apache_www/conf/sites-www.conf +23 -0
  141. data/wakame_generators/resource/templates/apache_www/conf/system-www.conf +63 -0
  142. data/wakame_generators/resource/templates/apache_www/init.d/apache2-www +192 -0
  143. data/wakame_generators/resource/templates/ec2_elastic_ip/ec2_elastic_ip.rb +39 -0
  144. data/wakame_generators/resource/templates/mysql_master/conf/my.cnf +154 -0
  145. data/wakame_generators/resource/templates/mysql_master/init.d/mysql +185 -0
  146. data/wakame_generators/resource/templates/mysql_master/mysql_master.rb +119 -0
  147. metadata +289 -0
@@ -0,0 +1,369 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'rubygems'
4
+
5
+ require 'wakame'
6
+ require 'wakame/packets'
7
+ require 'wakame/service'
8
+ require 'wakame/queue_declare'
9
+ require 'wakame/vm_manipulator'
10
+
11
+ module Wakame
12
+
13
+ class AgentMonitor
14
+ include ThreadImmutable
15
+ attr_reader :registered_agents, :unregistered_agents, :master, :gc_period
16
+
17
+
18
+ def initialize(master)
19
+ bind_thread
20
+ @master = master
21
+ @registered_agents = {}
22
+ @unregistered_agents = {}
23
+ @agent_timeout = 31.to_f
24
+ @agent_kill_timeout = @agent_timeout * 2
25
+ @gc_period = 20.to_f
26
+
27
+ # GC event trigger for agent timer & status
28
+ calc_agent_timeout = proc {
29
+ #Wakame.log.debug("Started agent GC : agents.size=#{@registered_agents.size}")
30
+ kill_list=[]
31
+ registered_agents.each { |agent_id, agent|
32
+ next if agent.status == Service::Agent::STATUS_OFFLINE
33
+ diff_time = Time.now - agent.last_ping_at
34
+ #Wakame.log.debug "AgentMonitor GC : #{agent_id}: #{diff_time}"
35
+ if diff_time > @agent_timeout.to_f
36
+ agent.status = Service::Agent::STATUS_TIMEOUT
37
+ end
38
+
39
+ if diff_time > @agent_kill_timeout.to_f
40
+ kill_list << agent_id
41
+ end
42
+ }
43
+
44
+ kill_list.each { |agent_id|
45
+ agent = @agents.delete(agent_id)
46
+ ED.fire_event(Event::AgentUnMonitored.new(agent)) unless agent.nil?
47
+ }
48
+ #Wakame.log.debug("Finished agent GC")
49
+ }
50
+
51
+ @agent_timeout_timer = EventMachine::PeriodicTimer.new(@gc_period, calc_agent_timeout)
52
+
53
+ master.add_subscriber('registry') { |data|
54
+ data = eval(data)
55
+
56
+ agent_id = data[:agent_id]
57
+ case data[:type]
58
+ when 'Wakame::Packets::Register'
59
+ register_agent(data)
60
+ when 'Wakame::Packets::UnRegister'
61
+ unregister_agent(agent_id)
62
+ end
63
+ }
64
+
65
+ master.add_subscriber('ping') { |data|
66
+ ping = eval(data)
67
+ # Skip the old ping responses before starting master node.
68
+ next if Time.parse(ping[:responded_at]) < master.started_at
69
+
70
+ # Variable update function for the common members
71
+ set_report_values = proc { |agent|
72
+ agent.status = Service::Agent::STATUS_ONLINE
73
+ agent.uptime = 0
74
+ agent.last_ping_at = Time.parse(ping[:responded_at])
75
+
76
+ agent.attr = ping[:attrs]
77
+
78
+ agent.services.clear
79
+ ping.services.each { |svc_id, i|
80
+ agent.services[svc_id] = master.service_cluster.instances[svc_id]
81
+ }
82
+ }
83
+
84
+ agent = agent(ping[:agent_id])
85
+ if agent.nil?
86
+ agent = Service::Agent.new(ping[:agent_id])
87
+
88
+ set_report_values.call(agent)
89
+
90
+ unregistered_agents[ping[:agent_id]]=agent
91
+ else
92
+ set_report_values.call(agent)
93
+ end
94
+
95
+
96
+ ED.fire_event(Event::AgentPong.new(agent))
97
+ }
98
+
99
+ master.add_subscriber('agent_event') { |data|
100
+ response = eval(data)
101
+ #p response
102
+ case response[:type]
103
+ when 'Wakame::Packets::ServiceStatusChanged'
104
+ svc_inst = Service::ServiceInstance.instance_collection[response[:svc_id]]
105
+ if svc_inst
106
+ response_time = Time.parse(response[:responded_at])
107
+ svc_inst.update_status(response[:new_status], response_time, response[:fail_message])
108
+
109
+ # tmp_event = Event::ServiceStatusChanged.new(response[:svc_id], svc_inst.property, response[:status], response[:previous_status])
110
+ # tmp_event.time = response_time
111
+ # ED.fire_event(tmp_event)
112
+
113
+ # if response[:previous_status] != Service::STATUS_ONLINE && response[:new_status] == Service::STATUS_ONLINE
114
+ # tmp_event = Event::ServiceOnline.new(tmp_event.instance_id, svc_inst.property)
115
+ # tmp_event.time = response_time
116
+ # ED.fire_event(tmp_event)
117
+ # elsif response[:previous_status] != Service::STATUS_OFFLINE && response[:new_status] == Service::STATUS_OFFLINE
118
+ # tmp_event = Event::ServiceOffline.new(tmp_event.instance_id, svc_inst.property)
119
+ # tmp_event.time = response_time
120
+ # ED.fire_event(tmp_event)
121
+ # elsif response[:previous_status] != Service::STATUS_FAIL && response[:new_status] == Service::STATUS_FAIL
122
+ # tmp_event = Event::ServiceFailed.new(tmp_event.instance_id, svc_inst.property, response[:fail_message])
123
+ # tmp_event.time = response_time
124
+ # ED.fire_event(tmp_event)
125
+ # end
126
+ end
127
+ when 'Wakame::Packets::ActorResponse'
128
+ case response[:status]
129
+ when Actor::STATUS_RUNNING
130
+ ED.fire_event(Event::ActorProgress.new(response[:agent_id], response[:token], 0))
131
+ else
132
+ ED.fire_event(Event::ActorComplete.new(response[:agent_id], response[:token], response[:status]))
133
+ end
134
+ else
135
+ Wakame.log.warn("#{self.class}: Unhandled agent response: #{response[:type]}")
136
+ end
137
+ }
138
+
139
+ end
140
+
141
+
142
+ def agent(agent_id)
143
+ registered_agents[agent_id] || unregistered_agents[agent_id]
144
+ end
145
+
146
+ def register_agent(data)
147
+ agent_id = data[:agent_id]
148
+ agent = registered_agents[agent_id]
149
+ if agent.nil?
150
+ agent = unregistered_agents[agent_id]
151
+ if agent.nil?
152
+ # The agent is going to be registered at first time.
153
+ agent = Service::Agent.new(agent_id)
154
+ registered_agents[agent_id] = agent
155
+ else
156
+ # Move the reference from unregistered group to the registered group.
157
+ registered_agents[agent_id] = unregistered_agents[agent_id]
158
+ unregistered_agents.delete(agent_id)
159
+ end
160
+ Wakame.log.debug("The Agent has been registered: #{data.inspect}")
161
+ #Wakame.log.debug(unregistered_agents)
162
+ ED.fire_event(Event::AgentMonitored.new(agent))
163
+ end
164
+ agent.root_path = data[:root_path]
165
+ end
166
+
167
+ def unregister_agent(agent_id)
168
+ agent = registered_agents[agent_id]
169
+ if agent
170
+ unregistered_agents[agent_id] = registered_agents[agent_id]
171
+ registered_agents.delete(agent_id)
172
+ ED.fire_event(Event::AgentUnMonitored.new(agent))
173
+ end
174
+ end
175
+
176
+
177
+ # def bind_agent(service_instance, &filter)
178
+ # agent_id, agent = @agents.find { |agent_id, agent|
179
+
180
+ # next false if agent.has_service_type?(service_instance.property.class)
181
+ # filter.call(agent)
182
+ # }
183
+ # return nil if agent.nil?
184
+ # service_instance.bind_agent(agent)
185
+ # agent
186
+ # end
187
+
188
+ # def unbind_agent(service_instance)
189
+ # service_instance.unbind_agent
190
+ # end
191
+
192
+ # Retruns the master local agent object
193
+ def master_local
194
+ agent = registered_agents[@master.master_local_agent_id]
195
+ puts "#{agent} = registered_agents[#{@master.master_local_agent_id}]"
196
+ raise "Master does not identify the master local agent yet." if agent.nil?
197
+ agent
198
+ end
199
+
200
+ def each_online(&blk)
201
+ registered_agents.each { |k, v|
202
+ next if v.status != Service::Agent::STATUS_ONLINE
203
+ blk.call(v)
204
+ }
205
+ end
206
+
207
+ def dump_status
208
+ ag = []
209
+ res = {:registered=>[], :unregistered=>[]}
210
+
211
+ @registered_agents.each { |key, a|
212
+ res[:registered] << a.dump_status
213
+ }
214
+ @unregistered_agents.each { |key, a|
215
+ res[:unregistered] << a.dump_status
216
+ }
217
+ res
218
+ end
219
+ end
220
+
221
+ class Master
222
+ include Wakame::AMQPClient
223
+ include Wakame::QueueDeclare
224
+
225
+ define_queue 'agent_event', 'agent_event'
226
+ define_queue 'ping', 'ping'
227
+ define_queue 'registry', 'registry'
228
+
229
+ attr_reader :command_queue, :agent_monitor, :configuration, :service_cluster, :started_at
230
+
231
+ def initialize(opts={})
232
+ pre_setup
233
+
234
+ connect(opts) {
235
+ post_setup
236
+ }
237
+ Wakame.log.info("Started master process : WAKAME_ROOT=#{Wakame.config.root_path} WAKAME_ENV=#{Wakame.config.environment}")
238
+ end
239
+
240
+
241
+ # def send_agent_command(command, agent_id=nil)
242
+ # raise TypeError unless command.is_a? Packets::RequestBase
243
+ # EM.next_tick {
244
+ # if agent_id
245
+ # publish_to('agent_command', "agent_id.#{agent_id}", Marshal.dump(command))
246
+ # else
247
+ # publish_to('agent_command', '*', Marshal.dump(command))
248
+ # end
249
+ # }
250
+ # end
251
+
252
+ def actor_request(agent_id, path, *args)
253
+ request = Wakame::Packets::ActorRequest.new(agent_id, Util.gen_id, path, *args)
254
+ ActorRequest.new(self, request)
255
+ end
256
+
257
+
258
+ def attr
259
+ agent_monitor.master_local.attr
260
+ end
261
+
262
+
263
+ def cleanup
264
+ @command_queue.shutdown
265
+ end
266
+
267
+ def master_local_agent_id
268
+ @master_local_agent_id
269
+ end
270
+
271
+ private
272
+ def determine_agent_id
273
+ if Wakame.config.environment == :EC2
274
+ @master_local_agent_id = VmManipulator::EC2::MetadataService.query_metadata_uri('instance-id')
275
+ else
276
+ @master_local_agent_id = VmManipulator::StandAlone::INSTANCE_ID
277
+ end
278
+ end
279
+
280
+ def pre_setup
281
+ determine_agent_id
282
+ @started_at = Time.now
283
+
284
+ EM.barrier {
285
+ Wakame.log.debug("Binding thread info to EventDispatcher.")
286
+ EventDispatcher.instance.bind_thread(Thread.current)
287
+ }
288
+ end
289
+
290
+ def post_setup
291
+ raise 'has to be put in EM.run context' unless EM.reactor_running?
292
+ @command_queue = CommandQueue.new(self)
293
+ @agent_monitor = AgentMonitor.new(self)
294
+
295
+ @service_cluster = Util.new_(Wakame.config.cluster_class, self)
296
+ end
297
+
298
+ end
299
+
300
+
301
+ class ActorRequest
302
+ attr_reader :master
303
+
304
+ def initialize(master, packet)
305
+ raise TypeError unless packet.is_a?(Wakame::Packets::ActorRequest)
306
+
307
+ @master = master
308
+ @packet = packet
309
+ @requested = false
310
+ @event_ticket = nil
311
+ @wait_lock = ::Queue.new
312
+ end
313
+
314
+
315
+ def request
316
+ raise "The request has already been sent." if @requested
317
+
318
+ @event_ticket = ED.subscribe(Event::ActorComplete) { |event|
319
+ if event.token == @packet.token
320
+
321
+ # Any of status except RUNNING are accomplishment of the actor request.
322
+ Wakame.log.debug("#{self.class}: The actor request has been completed: token=#{self.token}, status=#{event.status}")
323
+ ED.unsubscribe(@event_ticket)
324
+ @wait_lock.enq(event.status)
325
+ end
326
+ }
327
+ Wakame.log.debug("#{self.class}: Send the actor request: #{@packet.path}@#{@packet.agent_id}, token=#{self.token}")
328
+ master.publish_to('agent_command', "agent_id.#{@packet.agent_id}", @packet.marshal)
329
+ @requested = true
330
+ self
331
+ end
332
+
333
+
334
+ def token
335
+ @packet.token
336
+ end
337
+
338
+ def progress
339
+ check_requested?
340
+ raise NotImplementedError
341
+ end
342
+
343
+ def cancel
344
+ check_requested?
345
+ raise NotImplementedError
346
+
347
+ #master.publish_to('agent_command', "agent_id.#{@packet.agent_id}", Wakame::Packets::ActorCancel.new(@packet.agent_id, ).marshal)
348
+ #ED.unsubscribe(@event_ticket)
349
+ end
350
+
351
+ def wait_completion(tout=60*30)
352
+ check_requested?
353
+ timeout(tout) {
354
+ Wakame.log.debug("#{self.class}: Waiting a response from the actor: #{@packet.path}@#{@packet.agent_id}, token=#{@packet.token}")
355
+ ret_status = @wait_lock.deq
356
+ Wakame.log.debug("#{self.class}: A response (status=#{ret_status}) back from the actor: #{@packet.path}@#{@packet.agent_id}, token=#{@packet.token}")
357
+ if ret_status == Actor::STATUS_FAILED
358
+ raise RuntimeError, "Failed status has been returned: Actor Request #{token}"
359
+ end
360
+ }
361
+ end
362
+ alias :wait :wait_completion
363
+
364
+ private
365
+ def check_requested?
366
+ raise "The request has not been sent yet." unless @requested
367
+ end
368
+ end
369
+ end
@@ -0,0 +1,50 @@
1
+
2
+ class Wakame::Monitor::Agent
3
+ include Wakame::Monitor
4
+
5
+ def initialize
6
+ @status = STATUS_ONLINE
7
+ end
8
+
9
+ def send_ping(hash)
10
+ publish_to('ping', Wakame::Packets::Ping.new(agent, hash[:attrs], hash[:actors], hash[:monitors], hash[:services]).marshal)
11
+ end
12
+
13
+ def setup(path)
14
+ # Send the first ping signal as soon as possible since the ping contanins vital information to construct the Agent object on master node.
15
+ send_ping(check())
16
+
17
+ # Setup periodical ping publisher.
18
+ @timer = CheckerTimer.new(10) {
19
+ send_ping(check())
20
+ }
21
+ @timer.start
22
+ end
23
+
24
+
25
+ def check
26
+ if Wakame.environment == :EC2
27
+ require 'wakame/vm_manipulator'
28
+ attrs = Wakame::VmManipulator::EC2::MetadataService.fetch_local_attrs
29
+ else
30
+ attrs = Wakame::VmManipulator::StandAlone.fetch_local_attrs
31
+ end
32
+
33
+ res = {:attrs=>attrs, :monitors=>[], :actors=>[], :services=>{}}
34
+ EM.barrier {
35
+ agent.monitor_registry.monitors.each { |key, m|
36
+ res[:monitors] << {:class=>m.class.to_s}
37
+ }
38
+ agent.actor_registry.actors.each { |key, a|
39
+ res[:actors] << {:class=>a.class.to_s}
40
+ }
41
+
42
+ svcmon = agent.monitor_registry.find_monitor('/service')
43
+ svcmon.checkers.each { |svc_id, a|
44
+ res[:services][svc_id]={:status=>a.status}
45
+ }
46
+ }
47
+
48
+ res
49
+ end
50
+ end
@@ -0,0 +1,183 @@
1
+
2
+ require 'open4'
3
+ require 'wakame'
4
+
5
+ class Wakame::Monitor::Service
6
+
7
+ class ServiceChecker
8
+ #include Wakame::Packets::Agent
9
+ attr_reader :timer, :svc_id
10
+ attr_accessor :last_checked_at, :status
11
+
12
+ def initialize(svc_id, svc_mon)
13
+ @svc_id = svc_id
14
+ @service_monitor = svc_mon
15
+ @status = Wakame::Service::STATUS_OFFLINE
16
+ count = 0
17
+ @timer = Wakame::Monitor::CheckerTimer.new(3) {
18
+ self.signal_checker
19
+ }
20
+ end
21
+
22
+ def start
23
+ if !@timer.running?
24
+ @timer.start
25
+ @service_monitor.send_event(Wakame::Packets::MonitoringStarted.new(@service_monitor.agent, self.svc_id))
26
+ Wakame.log.debug("#{self.class}: Started the checker")
27
+ end
28
+ end
29
+
30
+ def stop
31
+ if @timer.running?
32
+ @timer.stop
33
+ @service_monitor.send_event(Wakame::Packets::MonitoringStopped.new(@service_monitor.agent, self.svc_id))
34
+ Wakame.log.debug("#{self.class}: Stopped the checker")
35
+ end
36
+ end
37
+
38
+ def check
39
+ end
40
+
41
+ protected
42
+ def signal_checker
43
+ EventMachine.defer proc {
44
+ res = begin
45
+ self.last_checked_at = Time.now
46
+ res = self.check
47
+ res
48
+ rescue => e
49
+ Wakame.log.error("#{self.class}: #{e}")
50
+ Wakame.log.error(e)
51
+ e
52
+ end
53
+ Thread.pass
54
+ res
55
+ }, proc { |res|
56
+
57
+ case res
58
+ when Exception
59
+ update_status(Wakame::Service::STATUS_FAIL)
60
+ when Wakame::Service::STATUS_ONLINE, Wakame::Service::STATUS_OFFLINE
61
+ update_status(res)
62
+ else
63
+ Wakame.log.error("#{self.class}: Unknown response type from the checker: #{self.svc_id}, ")
64
+ end
65
+ }
66
+ end
67
+
68
+ def update_status(new_status)
69
+ prev_status = self.status
70
+ if prev_status != new_status
71
+ self.status = new_status
72
+ @service_monitor.send_event(Wakame::Packets::ServiceStatusChanged.new(@service_monitor.agent, self.svc_id, prev_status, new_status))
73
+ end
74
+ end
75
+ end
76
+
77
+ class PidFileChecker < ServiceChecker
78
+ def initialize(svc_id, svc_mon, pidpath)
79
+ super(svc_id, svc_mon)
80
+ @pidpath = pidpath
81
+ end
82
+
83
+ def check
84
+ return Wakame::Service::STATUS_OFFLINE unless File.exist?(@pidpath)
85
+ #cmdstat = ::Open4.popen4("ps -p \"`cat '#{@pidpath}'`\" > /dev/null"){}
86
+ #cmdstat.exitstatus == 0 ? Wakame::Service::STATUS_ONLINE : Wakame::Service::STATUS_OFFLINE
87
+
88
+ cmdres = system("ps -p \"`cat '#{@pidpath}'`\" > /dev/null")
89
+ # system() returns true or false.
90
+ cmdres ? Wakame::Service::STATUS_ONLINE : Wakame::Service::STATUS_OFFLINE
91
+ end
92
+ end
93
+
94
+ class CommandChecker < ServiceChecker
95
+ attr_reader :command
96
+
97
+ def initialize(svc_id, svc_mon, cmdstr)
98
+ super(svc_id, svc_mon)
99
+ @command = cmdstr
100
+ end
101
+
102
+ def check()
103
+ outputs =[]
104
+ cmdstat = ::Open4.popen4(@command) { |pid, stdin, stdout, stderr|
105
+ stdout.each { |l|
106
+ outputs << l
107
+ }
108
+ stderr.each { |l|
109
+ outputs << l
110
+ }
111
+ }
112
+ Wakame.log.debug("#{self.class}: Exit Status #{@command}: #{cmdstat}")
113
+ if outputs.size > 0
114
+ @service_monitor.send_event(Wakame::Packets::MonitoringOutput.new(@service_monitor.agent, self.svc_id, outputs.join('')))
115
+ end
116
+ cmdstat.exitstatus == 0 ? Wakame::Service::STATUS_ONLINE : Wakame::Service::STATUS_OFFLINE
117
+ end
118
+ end
119
+
120
+ include Wakame::Monitor
121
+
122
+ attr_reader :checkers
123
+
124
+ def initialize
125
+ @status = Wakame::Service::STATUS_ONLINE
126
+ @checkers = {}
127
+ end
128
+
129
+ def setup(path)
130
+ end
131
+
132
+ def handle_request(request)
133
+ svc_id = request[:svc_id]
134
+ case request[:command]
135
+ when :start
136
+ register(svc_id, request[:cmdstr])
137
+ when :stop
138
+ unregister(svc_id)
139
+ end
140
+ end
141
+
142
+ def send_event(a)
143
+ Wakame.log.debug("Sending back a event: #{a.class}")
144
+ publish_to('agent_event', a.marshal)
145
+ end
146
+
147
+ def dump_attrs
148
+
149
+ end
150
+
151
+ def find_checker(svc_id)
152
+ @checkers[svc_id]
153
+ end
154
+
155
+ def register(svc_id, checker_type, *args)
156
+ chk = @checkers[svc_id]
157
+ if chk
158
+ Wakame.log.error("#{self.class}: Service registory duplication. #{svc_id}")
159
+ raise "Service registory duplication. #{svc_id}"
160
+ end
161
+ case checker_type.to_sym
162
+ when :pidfile
163
+ chk = PidFileChecker.new(svc_id, self, args[0])
164
+ when :command
165
+ chk = CommandChecker.new(svc_id, self, args[0])
166
+ else
167
+ raise "Unsupported checker type: #{checker_type}"
168
+ end
169
+ chk.start
170
+ @checkers[svc_id]=chk
171
+ Wakame.log.info("#{self.class}: Registered service checker for #{svc_id}")
172
+ end
173
+
174
+ def unregister(svc_id)
175
+ chk = @checkers[svc_id]
176
+ if chk
177
+ chk.timer.stop
178
+ @checkers.delete(svc_id)
179
+ Wakame.log.info("#{self.class}: Unregistered service checker for #{svc_id}")
180
+ end
181
+ end
182
+
183
+ end
@@ -0,0 +1,69 @@
1
+
2
+
3
+
4
+ module Wakame
5
+ module Monitor
6
+ STATUS_OFFLINE=0
7
+ STATUS_ONLINE=1
8
+ STATUS_FAIL=2
9
+
10
+ def self.included(klass)
11
+ klass.class_eval {
12
+ attr_accessor :status, :agent
13
+
14
+ }
15
+ end
16
+
17
+ def handle_request(request)
18
+ end
19
+
20
+ def setup(assigned_path)
21
+ end
22
+
23
+ def enable
24
+ end
25
+
26
+ def disable
27
+ end
28
+
29
+ def publish_to(exchange, data)
30
+ agent.publish_to(exchange, data)
31
+ end
32
+
33
+ end
34
+ end
35
+
36
+
37
+ module Wakame
38
+ module Monitor
39
+ class CheckerTimer < EventMachine::PeriodicTimer
40
+ def initialize(time, &blk)
41
+ @interval = time
42
+ @code = proc {
43
+ begin
44
+ blk.call
45
+ rescue => e
46
+ Wakame.log.error(e)
47
+ end
48
+ }
49
+ stop
50
+ end
51
+
52
+ def start
53
+ if !running?
54
+ @cancelled = false
55
+ schedule
56
+ end
57
+ end
58
+
59
+ def stop
60
+ @cancelled = true
61
+ end
62
+
63
+ def running?
64
+ !@cancelled
65
+ end
66
+
67
+ end
68
+ end
69
+ end