smith 0.5.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. data/bin/agency +55 -0
  2. data/bin/smithctl +102 -0
  3. data/lib/smith.rb +237 -0
  4. data/lib/smith/acl_compiler.rb +74 -0
  5. data/lib/smith/agent.rb +207 -0
  6. data/lib/smith/agent_cache.rb +40 -0
  7. data/lib/smith/agent_config.rb +22 -0
  8. data/lib/smith/agent_monitoring.rb +52 -0
  9. data/lib/smith/agent_process.rb +181 -0
  10. data/lib/smith/application/agency.rb +126 -0
  11. data/lib/smith/bootstrap.rb +153 -0
  12. data/lib/smith/cache.rb +61 -0
  13. data/lib/smith/command.rb +128 -0
  14. data/lib/smith/commands/agency/agents.rb +28 -0
  15. data/lib/smith/commands/agency/common.rb +18 -0
  16. data/lib/smith/commands/agency/kill.rb +13 -0
  17. data/lib/smith/commands/agency/list.rb +65 -0
  18. data/lib/smith/commands/agency/logger.rb +56 -0
  19. data/lib/smith/commands/agency/metadata.rb +14 -0
  20. data/lib/smith/commands/agency/restart.rb +39 -0
  21. data/lib/smith/commands/agency/start.rb +62 -0
  22. data/lib/smith/commands/agency/state.rb +14 -0
  23. data/lib/smith/commands/agency/stop.rb +70 -0
  24. data/lib/smith/commands/agency/version.rb +23 -0
  25. data/lib/smith/commands/smithctl/cat.rb +70 -0
  26. data/lib/smith/commands/smithctl/pop.rb +76 -0
  27. data/lib/smith/commands/smithctl/rm.rb +36 -0
  28. data/lib/smith/commands/smithctl/smithctl_version.rb +23 -0
  29. data/lib/smith/commands/smithctl/top.rb +42 -0
  30. data/lib/smith/commands/template.rb +9 -0
  31. data/lib/smith/config.rb +32 -0
  32. data/lib/smith/logger.rb +91 -0
  33. data/lib/smith/messaging/acl/agency_command.proto +5 -0
  34. data/lib/smith/messaging/acl/agent_command.proto +5 -0
  35. data/lib/smith/messaging/acl/agent_config_request.proto +4 -0
  36. data/lib/smith/messaging/acl/agent_config_update.proto +5 -0
  37. data/lib/smith/messaging/acl/agent_keepalive.proto +6 -0
  38. data/lib/smith/messaging/acl/agent_lifecycle.proto +12 -0
  39. data/lib/smith/messaging/acl/agent_stats.proto +14 -0
  40. data/lib/smith/messaging/acl/default.rb +51 -0
  41. data/lib/smith/messaging/acl/search.proto +9 -0
  42. data/lib/smith/messaging/amqp_options.rb +55 -0
  43. data/lib/smith/messaging/endpoint.rb +116 -0
  44. data/lib/smith/messaging/exceptions.rb +7 -0
  45. data/lib/smith/messaging/payload.rb +102 -0
  46. data/lib/smith/messaging/queue_factory.rb +67 -0
  47. data/lib/smith/messaging/receiver.rb +237 -0
  48. data/lib/smith/messaging/responder.rb +15 -0
  49. data/lib/smith/messaging/sender.rb +61 -0
  50. metadata +239 -0
@@ -0,0 +1,207 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module Smith
4
+ class Agent
5
+
6
+ include Logger
7
+
8
+ @@agent_options = Smith.config.agent
9
+
10
+ attr_reader :factory, :name, :pid
11
+
12
+ def initialize(options={})
13
+ @name = self.class.to_s
14
+ @pid = $$
15
+
16
+ @factory = QueueFactory.new
17
+
18
+ @signal_handlers = Hash.new { |h,k| h[k] = Array.new }
19
+
20
+ setup_control_queue
21
+ setup_stats_queue
22
+
23
+ @start_time = Time.now
24
+
25
+ on_started do
26
+ logger.info { "#{name}:[#{pid}] started." }
27
+ end
28
+
29
+ on_stopped do
30
+ logger.info { "#{name}:[#{pid}] stopped." }
31
+ end
32
+
33
+ EM.threadpool_size = 1
34
+
35
+ acknowledge_start
36
+ start_keep_alive
37
+ end
38
+
39
+ # Overide this method to implement your own agent. You can use task but this may
40
+ # go away in the future. This method must not block.
41
+ def run
42
+ raise ArgumentError, "You need to call Agent.task(&block)" if @@task.nil?
43
+
44
+ logger.debug { "Setting up default queue: #{default_queue_name}" }
45
+
46
+ subscribe(default_queue_name, :auto_delete => false) do |r|
47
+ @@task.call(r.payload)
48
+ end
49
+ end
50
+
51
+ def on_started(&blk)
52
+ @on_started = blk
53
+ end
54
+
55
+ def on_stopped(&blk)
56
+ Smith.shutdown_hook(&blk)
57
+ end
58
+
59
+ def install_signal_handler(signal, position=:end, &blk)
60
+ raise ArgumentError, "Unknown position: #{position}" if ![:beginning, :end].include?(position)
61
+
62
+ logger.verbose { "Installing signal handler for #{signal}" }
63
+ @signal_handlers[signal].insert((position == :beginning) ? 0 : -1, blk)
64
+ @signal_handlers.each do |sig, handlers|
65
+ trap(sig, proc { |sig| run_signal_handlers(sig, handlers) })
66
+ end
67
+ end
68
+
69
+ def started
70
+ @on_started.call
71
+ end
72
+
73
+ def receiver(queue_name, opts={})
74
+ queues.receiver(queue_name, opts) do |receiver|
75
+ receiver.subscribe do |r|
76
+ yield r
77
+ end
78
+ end
79
+ end
80
+
81
+ def sender(queue_name, opts={})
82
+ queues.sender(queue_name, opts) { |sender| yield sender }
83
+ end
84
+
85
+ class << self
86
+ def task(opts={}, &blk)
87
+ @@task = blk
88
+ end
89
+
90
+ # Options supported:
91
+ # :monitor, the agency will monitor the agent & if dies restart.
92
+ # :singleton, only every have one agent. If this is set to false
93
+ # multiple agents are allow.
94
+ def options(opts)
95
+ opts.each { |k,v| merge_options(k, v) }
96
+ end
97
+
98
+ def merge_options(option, value)
99
+ if @@agent_options[option]
100
+ @@agent_options[option] = value
101
+ else
102
+ raise ArgumentError, "Unknown option: #{option}"
103
+ end
104
+ end
105
+ private :merge_options
106
+ end
107
+
108
+ protected
109
+
110
+ def run_signal_handlers(sig, handlers)
111
+ logger.debug { "Running signal handlers for agent: #{name}: #{sig}" }
112
+ handlers.each { |handler| handler.call(sig) }
113
+ end
114
+
115
+ def setup_control_queue
116
+ logger.debug { "Setting up control queue: #{control_queue_name}" }
117
+ receiver(control_queue_name, :auto_delete => true, :durable => false) do |r|
118
+ logger.debug { "Command received on agent control queue: #{r.payload.command} #{r.payload.options}" }
119
+
120
+ case r.payload.command
121
+ when 'stop'
122
+ acknowledge_stop { Smith.stop }
123
+ when 'log_level'
124
+ begin
125
+ level = r.payload.options.first
126
+ logger.info { "Setting log level to #{level} for: #{name}" }
127
+ log_level(level)
128
+ rescue ArgumentError => e
129
+ logger.error { "Incorrect log level: #{level}" }
130
+ end
131
+ else
132
+ logger.warn { "Unknown command: #{level} -> #{level.inspect}" }
133
+ end
134
+ end
135
+ end
136
+
137
+ def setup_stats_queue
138
+ # instantiate this queue without using the factory so it doesn't show
139
+ # up in the stats.
140
+ sender('agent.stats', :dont_cache => true, :durable => false, :auto_delete => false) do |stats_queue|
141
+ EventMachine.add_periodic_timer(2) do
142
+ callback = proc do |consumers|
143
+ payload = ACL::Payload.new(:agent_stats).content do |p|
144
+ p.agent_name = self.name
145
+ p.pid = self.pid
146
+ p.rss = (File.read("/proc/#{pid}/statm").split[1].to_i * 4) / 1024 # This assums the page size is 4K & is MB
147
+ p.up_time = (Time.now - @start_time).to_i
148
+ factory.each_queue do |q|
149
+ p.queues << ACL::AgentStats::QueueStats.new(:name => q.denomalized_queue_name, :type => q.class.to_s, :length => q.counter)
150
+ end
151
+ end
152
+
153
+ stats_queue.publish(payload)
154
+ end
155
+
156
+ # The errback argument is set to nil so as to suppres the default message.
157
+ stats_queue.consumers?(callback, nil)
158
+ end
159
+ end
160
+ end
161
+
162
+ def acknowledge_start
163
+ sender('agent.lifecycle', :auto_delete => true, :durable => false, :dont_cache => true) do |ack_start_queue|
164
+ message = {:state => 'acknowledge_start', :pid => $$.to_s, :name => self.class.to_s, :metadata => agent_options[:metadata], :started_at => Time.now.utc.to_i.to_s}
165
+ ack_start_queue.publish(ACL::Payload.new(:agent_lifecycle).content(agent_options.merge(message)))
166
+ end
167
+ end
168
+
169
+ def acknowledge_stop(&block)
170
+ sender('agent.lifecycle', :auto_delete => true, :durable => false, :dont_cache => true) do |ack_stop_queue|
171
+ message = {:state => 'acknowledge_stop', :pid => $$.to_s, :name => self.class.to_s}
172
+ ack_stop_queue.publish(ACL::Payload.new(:agent_lifecycle).content(message), &block)
173
+ end
174
+ end
175
+
176
+ def start_keep_alive
177
+ if agent_options[:monitor]
178
+ EventMachine::add_periodic_timer(1) do
179
+ sender('agent.keepalive', :auto_delete => true, :durable => false, :dont_cache => true) do |keep_alive_queue|
180
+ message = {:name => self.class.to_s, :pid => $$.to_s, :time => Time.now.utc.to_i.to_s}
181
+ keep_alive_queue.consumers? do |sender|
182
+ keep_alive_queue.publish(ACL::Payload.new(:agent_keepalive).content(message))
183
+ end
184
+ end
185
+ end
186
+ else
187
+ logger.info { "Not initiating keep alive, agent is not being monitored: #{@name}" }
188
+ end
189
+ end
190
+
191
+ def queues
192
+ @factory
193
+ end
194
+
195
+ def agent_options
196
+ @@agent_options._child
197
+ end
198
+
199
+ def control_queue_name
200
+ "#{default_queue_name}.control"
201
+ end
202
+
203
+ def default_queue_name
204
+ "agent.#{name.sub(/Agent$/, '').snake_case}"
205
+ end
206
+ end
207
+ end
@@ -0,0 +1,40 @@
1
+ # -*- encoding: utf-8 -*-
2
+ module Smith
3
+ class AgentCache < Cache
4
+
5
+ attr_accessor :path
6
+
7
+ def initialize(opts={})
8
+ super()
9
+ @paths = opts[:paths]
10
+
11
+ operator ->(agent_name, options={}) { AgentProcess.first(:name => agent_name) || AgentProcess.new(:name => agent_name, :path => agent_path(agent_name)) }
12
+
13
+ populate
14
+ end
15
+
16
+ def alive?(name)
17
+ (exist?(name)) ? entry(name).alive? : false
18
+ end
19
+
20
+ def state(state)
21
+ select {|a| a.state == state.to_s }
22
+ end
23
+
24
+ alias names :entries
25
+ alias :[] :entry
26
+
27
+ private
28
+
29
+ # When we start load any new data from the db.
30
+ def populate
31
+ AgentProcess.all.each { |a| update(a.name, a) }
32
+ end
33
+
34
+ def agent_path(agent_name)
35
+ @paths.detect do |path|
36
+ Pathname.new(path).join("#{agent_name.snake_case}.rb").exist?
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'leveldb'
4
+ require 'pathname'
5
+
6
+ module Smith
7
+ class AgentConfig
8
+
9
+ def initialize(path, name)
10
+ @path = Pathname.new(path)
11
+ @db ||= LevelDB::DB.new(@path.join(name).to_s)
12
+ end
13
+
14
+ def for(agent)
15
+ @db[agent]
16
+ end
17
+
18
+ def update(agent, value)
19
+ @db[agent] = value
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,52 @@
1
+ # -*- encoding: utf-8 -*-
2
+ module Smith
3
+ class AgentMonitoring
4
+
5
+ include Logger
6
+
7
+ def initialize(agent_processes)
8
+ @agent_processes = agent_processes
9
+ end
10
+
11
+ def start_monitoring
12
+ EventMachine::add_periodic_timer(1) do
13
+ @agent_processes.each do |agent_process|
14
+ if agent_process.monitor
15
+ logger.verbose { "Agent state for #{agent_process.name}: #{agent_process.state}" }
16
+ case agent_process.state
17
+ when 'running'
18
+ if agent_process.last_keep_alive
19
+ if agent_process.last_keep_alive > agent_process.started_at
20
+ if (Time.now.utc.to_i - agent_process.last_keep_alive) > 10
21
+ logger.fatal { "Agent not responding: #{agent_process.name}" }
22
+ agent_process.no_process_running
23
+ end
24
+ else
25
+ logger.warn { "Discarding keep_alives with timestamp before agent started: #{Time.at(agent_process.started_at)} > #{Time.at(agent_process.last_keep_alive)}" }
26
+ end
27
+ end
28
+ when 'starting'
29
+ if (Time.now.utc.to_i - agent_process.started_at) > 10
30
+ logger.error { "No response from agent for > 10 seconds. Agent probably didn't start" }
31
+ agent_process.not_responding
32
+ else
33
+ logger.debug { "no keep alive from #{agent_process.name}" }
34
+ end
35
+ when 'stoping'
36
+ logger.info { "Agent is shutting down: #{agent_process.name}" }
37
+ when 'dead'
38
+ logger.info { "Restarting dead agent: #{agent_process.name}" }
39
+ Messaging::Sender.new('agency.control', :auto_delete => true, :durable => false, :strict => true).ready do |sender|
40
+ sender.publish_and_receive(ACL::Payload.new(:agency_command).content(:command => 'start', :args => [agent_process.name])) do |r|
41
+ logger.debug { "Agent restart message acknowledged: #{agent_process.name}" }
42
+ end
43
+ end
44
+ when 'unknown'
45
+ logger.info { "Agent is in an unknown state: #{agent_process.name}" }
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,181 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'pp'
3
+ require 'state_machine'
4
+ require 'dm-observer'
5
+
6
+ module Smith
7
+
8
+ class AgentProcess
9
+
10
+ include Logger
11
+ include Extlib
12
+ include DataMapper::Resource
13
+
14
+ property :id, Serial
15
+ property :path, String, :required => true
16
+ property :name, String, :required => true
17
+ property :state, String, :required => true
18
+ property :pid, String
19
+ property :started_at, Integer
20
+ property :last_keep_alive, Integer
21
+ property :metadata, String
22
+ property :monitor, Boolean
23
+ property :singleton, Boolean
24
+
25
+ state_machine :initial => :null do
26
+
27
+ before_transition do |transition|
28
+ logger.debug { "Transition [#{name}]: :#{transition.from} -> :#{transition.to}" }
29
+ end
30
+
31
+ after_failure do |transition|
32
+ logger.debug { "Illegal state change [#{name}]: :#{transition.from} -> :#{transition.event}" }
33
+ end
34
+
35
+ event :instantiate do
36
+ transition [:null] => :stopped
37
+ end
38
+
39
+ event :start do
40
+ transition [:null, :stopped, :dead] => :starting
41
+ end
42
+
43
+ event :acknowledge_start do
44
+ transition [:starting] => :running
45
+ end
46
+
47
+ event :stop do
48
+ transition [:running, :unknown] => :stopping
49
+ end
50
+
51
+ event :acknowledge_stop do
52
+ transition [:stopping] => :stopped
53
+ end
54
+
55
+ event :no_process_running do
56
+ transition [:unknown, :starting, :running, :stopping] => :dead
57
+ end
58
+
59
+ event :not_responding do
60
+ transition [:starting, :acknowledge_start, :acknowledge_stop, :running, :stopping] => :unknown
61
+ end
62
+
63
+ event :kill do
64
+ transition [:null, :unknown, :starting, :acknowledge_start, :stopping, :acknowledge_stop, :running, :dead] => :null
65
+ end
66
+ end
67
+
68
+ # Check to see if the agent is alive.
69
+ def alive?
70
+ if self.pid
71
+ begin
72
+ Process.kill(0, self.pid.to_i)
73
+ true
74
+ rescue Exception
75
+ false
76
+ end
77
+ else
78
+ false
79
+ end
80
+ end
81
+
82
+ # Return the agent control queue.
83
+ def control_queue_name
84
+ "agent.#{name.sub(/Agent$/, '').snake_case}.control"
85
+ end
86
+ end
87
+
88
+ class AgentProcessObserver
89
+
90
+ include Logger
91
+ include DataMapper::Observer
92
+
93
+ observe AgentProcess
94
+
95
+ # Start an agent. This forks and execs the bootstrapper class
96
+ # which then becomes responsible for managing the agent process.
97
+ def self.start(agent_process)
98
+ agent_process.started_at = Time.now.utc
99
+ agent_process.pid = fork do
100
+
101
+ # Detach from the controlling terminal
102
+ unless sess_id = Process.setsid
103
+ raise 'Cannot detach from controlling terminal'
104
+ end
105
+
106
+ # Close all file descriptors apart from stdin, stdout, stderr
107
+ ObjectSpace.each_object(IO) do |io|
108
+ unless [STDIN, STDOUT, STDERR].include?(io)
109
+ io.close rescue nil
110
+ end
111
+ end
112
+
113
+ # Sort out the remaining file descriptors. Don't do anything with
114
+ # stdout (and by extension stderr) as want the agency to manage it.
115
+ STDIN.reopen("/dev/null")
116
+ STDERR.reopen(STDOUT)
117
+
118
+ bootstraper = File.expand_path(File.join(File.dirname(__FILE__), 'bootstrap.rb'))
119
+
120
+ exec('ruby', bootstraper, agent_process.path, agent_process.name, Smith.acl_cache_path.to_s)
121
+ end
122
+
123
+ # We don't want any zombies.
124
+ Process.detach(agent_process.pid.to_i)
125
+ end
126
+
127
+ def self.acknowledge_start(agent_process)
128
+ end
129
+
130
+ def self.stop(agent_process)
131
+ Messaging::Sender.new(agent_process.control_queue_name, :durable => false, :auto_delete => true).ready do |sender|
132
+ callback = proc {|sender| sender.publish(ACL::Payload.new(:agent_command).content(:command => 'stop')) }
133
+ errback = proc do
134
+ logger.warn { "Agent is not listening. Setting state to dead." }
135
+ agent_process.no_process_running
136
+ end
137
+
138
+ sender.consumers?(callback, errback)
139
+ end
140
+ end
141
+
142
+ def self.acknowledge_stop(agent_process)
143
+ end
144
+
145
+ def self.kill(agent_process)
146
+ if agent_process.pid
147
+ logger.info { "Sending kill signal: #{agent_process.name}(#{agent_process.pid})" }
148
+ begin
149
+ Process.kill('TERM', agent_process.pid.to_i)
150
+ rescue
151
+ logger.error { "Process does not exist. PID is stale: #{agent_process.pid}: #{agent_process.name}" }
152
+ end
153
+ else
154
+ logger.error { "Not sending kill signal, agent pid is not set: #{agent_process.name}" }
155
+ end
156
+ end
157
+
158
+ # If an agent is in an unknown state then this will check to see
159
+ # if the process is still alive and if it is kill it, otherwise
160
+ # log a message. TODO this is not really a reaper but I haven't
161
+ # quite worked out what I'm going to do with it so I'll leave it
162
+ # as is
163
+ def self.reap_agent(agent_process)
164
+ logger.info { "Reaping agent: #{agent_process.name}" }
165
+ if Pathname.new('/proc').join(agent_process.pid.to_s).exist?
166
+ logger.warn { "Agent is still alive: #{agent_process.name}" }
167
+ else
168
+ logger.warn { "Agent is already dead: #{agent_process.name}" }
169
+ end
170
+ end
171
+ end
172
+
173
+ AgentProcess.state_machine do
174
+ after_transition :on => :start, :do => AgentProcessObserver.method(:start)
175
+ after_transition :on => :acknowledge_start, :do => AgentProcessObserver.method(:acknowledge_start)
176
+ after_transition :on => :stop, :do => AgentProcessObserver.method(:stop)
177
+ after_transition :on => :kill, :do => AgentProcessObserver.method(:kill)
178
+ after_transition :on => :acknowledge_stop, :do => AgentProcessObserver.method(:acknowledge_stop)
179
+ after_transition :on => :not_responding, :do => AgentProcessObserver.method(:reap_agent)
180
+ end
181
+ end