smith 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. data/bin/agency +55 -0
  2. data/bin/smithctl +102 -0
  3. data/lib/smith.rb +237 -0
  4. data/lib/smith/acl_compiler.rb +74 -0
  5. data/lib/smith/agent.rb +207 -0
  6. data/lib/smith/agent_cache.rb +40 -0
  7. data/lib/smith/agent_config.rb +22 -0
  8. data/lib/smith/agent_monitoring.rb +52 -0
  9. data/lib/smith/agent_process.rb +181 -0
  10. data/lib/smith/application/agency.rb +126 -0
  11. data/lib/smith/bootstrap.rb +153 -0
  12. data/lib/smith/cache.rb +61 -0
  13. data/lib/smith/command.rb +128 -0
  14. data/lib/smith/commands/agency/agents.rb +28 -0
  15. data/lib/smith/commands/agency/common.rb +18 -0
  16. data/lib/smith/commands/agency/kill.rb +13 -0
  17. data/lib/smith/commands/agency/list.rb +65 -0
  18. data/lib/smith/commands/agency/logger.rb +56 -0
  19. data/lib/smith/commands/agency/metadata.rb +14 -0
  20. data/lib/smith/commands/agency/restart.rb +39 -0
  21. data/lib/smith/commands/agency/start.rb +62 -0
  22. data/lib/smith/commands/agency/state.rb +14 -0
  23. data/lib/smith/commands/agency/stop.rb +70 -0
  24. data/lib/smith/commands/agency/version.rb +23 -0
  25. data/lib/smith/commands/smithctl/cat.rb +70 -0
  26. data/lib/smith/commands/smithctl/pop.rb +76 -0
  27. data/lib/smith/commands/smithctl/rm.rb +36 -0
  28. data/lib/smith/commands/smithctl/smithctl_version.rb +23 -0
  29. data/lib/smith/commands/smithctl/top.rb +42 -0
  30. data/lib/smith/commands/template.rb +9 -0
  31. data/lib/smith/config.rb +32 -0
  32. data/lib/smith/logger.rb +91 -0
  33. data/lib/smith/messaging/acl/agency_command.proto +5 -0
  34. data/lib/smith/messaging/acl/agent_command.proto +5 -0
  35. data/lib/smith/messaging/acl/agent_config_request.proto +4 -0
  36. data/lib/smith/messaging/acl/agent_config_update.proto +5 -0
  37. data/lib/smith/messaging/acl/agent_keepalive.proto +6 -0
  38. data/lib/smith/messaging/acl/agent_lifecycle.proto +12 -0
  39. data/lib/smith/messaging/acl/agent_stats.proto +14 -0
  40. data/lib/smith/messaging/acl/default.rb +51 -0
  41. data/lib/smith/messaging/acl/search.proto +9 -0
  42. data/lib/smith/messaging/amqp_options.rb +55 -0
  43. data/lib/smith/messaging/endpoint.rb +116 -0
  44. data/lib/smith/messaging/exceptions.rb +7 -0
  45. data/lib/smith/messaging/payload.rb +102 -0
  46. data/lib/smith/messaging/queue_factory.rb +67 -0
  47. data/lib/smith/messaging/receiver.rb +237 -0
  48. data/lib/smith/messaging/responder.rb +15 -0
  49. data/lib/smith/messaging/sender.rb +61 -0
  50. metadata +239 -0
@@ -0,0 +1,207 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module Smith
4
+ class Agent
5
+
6
+ include Logger
7
+
8
+ @@agent_options = Smith.config.agent
9
+
10
+ attr_reader :factory, :name, :pid
11
+
12
+ def initialize(options={})
13
+ @name = self.class.to_s
14
+ @pid = $$
15
+
16
+ @factory = QueueFactory.new
17
+
18
+ @signal_handlers = Hash.new { |h,k| h[k] = Array.new }
19
+
20
+ setup_control_queue
21
+ setup_stats_queue
22
+
23
+ @start_time = Time.now
24
+
25
+ on_started do
26
+ logger.info { "#{name}:[#{pid}] started." }
27
+ end
28
+
29
+ on_stopped do
30
+ logger.info { "#{name}:[#{pid}] stopped." }
31
+ end
32
+
33
+ EM.threadpool_size = 1
34
+
35
+ acknowledge_start
36
+ start_keep_alive
37
+ end
38
+
39
+ # Overide this method to implement your own agent. You can use task but this may
40
+ # go away in the future. This method must not block.
41
+ def run
42
+ raise ArgumentError, "You need to call Agent.task(&block)" if @@task.nil?
43
+
44
+ logger.debug { "Setting up default queue: #{default_queue_name}" }
45
+
46
+ subscribe(default_queue_name, :auto_delete => false) do |r|
47
+ @@task.call(r.payload)
48
+ end
49
+ end
50
+
51
+ def on_started(&blk)
52
+ @on_started = blk
53
+ end
54
+
55
+ def on_stopped(&blk)
56
+ Smith.shutdown_hook(&blk)
57
+ end
58
+
59
+ def install_signal_handler(signal, position=:end, &blk)
60
+ raise ArgumentError, "Unknown position: #{position}" if ![:beginning, :end].include?(position)
61
+
62
+ logger.verbose { "Installing signal handler for #{signal}" }
63
+ @signal_handlers[signal].insert((position == :beginning) ? 0 : -1, blk)
64
+ @signal_handlers.each do |sig, handlers|
65
+ trap(sig, proc { |sig| run_signal_handlers(sig, handlers) })
66
+ end
67
+ end
68
+
69
+ def started
70
+ @on_started.call
71
+ end
72
+
73
+ def receiver(queue_name, opts={})
74
+ queues.receiver(queue_name, opts) do |receiver|
75
+ receiver.subscribe do |r|
76
+ yield r
77
+ end
78
+ end
79
+ end
80
+
81
+ def sender(queue_name, opts={})
82
+ queues.sender(queue_name, opts) { |sender| yield sender }
83
+ end
84
+
85
+ class << self
86
+ def task(opts={}, &blk)
87
+ @@task = blk
88
+ end
89
+
90
+ # Options supported:
91
+ # :monitor, the agency will monitor the agent & if dies restart.
92
+ # :singleton, only every have one agent. If this is set to false
93
+ # multiple agents are allow.
94
+ def options(opts)
95
+ opts.each { |k,v| merge_options(k, v) }
96
+ end
97
+
98
+ def merge_options(option, value)
99
+ if @@agent_options[option]
100
+ @@agent_options[option] = value
101
+ else
102
+ raise ArgumentError, "Unknown option: #{option}"
103
+ end
104
+ end
105
+ private :merge_options
106
+ end
107
+
108
+ protected
109
+
110
+ def run_signal_handlers(sig, handlers)
111
+ logger.debug { "Running signal handlers for agent: #{name}: #{sig}" }
112
+ handlers.each { |handler| handler.call(sig) }
113
+ end
114
+
115
+ def setup_control_queue
116
+ logger.debug { "Setting up control queue: #{control_queue_name}" }
117
+ receiver(control_queue_name, :auto_delete => true, :durable => false) do |r|
118
+ logger.debug { "Command received on agent control queue: #{r.payload.command} #{r.payload.options}" }
119
+
120
+ case r.payload.command
121
+ when 'stop'
122
+ acknowledge_stop { Smith.stop }
123
+ when 'log_level'
124
+ begin
125
+ level = r.payload.options.first
126
+ logger.info { "Setting log level to #{level} for: #{name}" }
127
+ log_level(level)
128
+ rescue ArgumentError => e
129
+ logger.error { "Incorrect log level: #{level}" }
130
+ end
131
+ else
132
+ logger.warn { "Unknown command: #{level} -> #{level.inspect}" }
133
+ end
134
+ end
135
+ end
136
+
137
+ def setup_stats_queue
138
+ # instantiate this queue without using the factory so it doesn't show
139
+ # up in the stats.
140
+ sender('agent.stats', :dont_cache => true, :durable => false, :auto_delete => false) do |stats_queue|
141
+ EventMachine.add_periodic_timer(2) do
142
+ callback = proc do |consumers|
143
+ payload = ACL::Payload.new(:agent_stats).content do |p|
144
+ p.agent_name = self.name
145
+ p.pid = self.pid
146
+ p.rss = (File.read("/proc/#{pid}/statm").split[1].to_i * 4) / 1024 # This assums the page size is 4K & is MB
147
+ p.up_time = (Time.now - @start_time).to_i
148
+ factory.each_queue do |q|
149
+ p.queues << ACL::AgentStats::QueueStats.new(:name => q.denomalized_queue_name, :type => q.class.to_s, :length => q.counter)
150
+ end
151
+ end
152
+
153
+ stats_queue.publish(payload)
154
+ end
155
+
156
+ # The errback argument is set to nil so as to suppres the default message.
157
+ stats_queue.consumers?(callback, nil)
158
+ end
159
+ end
160
+ end
161
+
162
+ def acknowledge_start
163
+ sender('agent.lifecycle', :auto_delete => true, :durable => false, :dont_cache => true) do |ack_start_queue|
164
+ message = {:state => 'acknowledge_start', :pid => $$.to_s, :name => self.class.to_s, :metadata => agent_options[:metadata], :started_at => Time.now.utc.to_i.to_s}
165
+ ack_start_queue.publish(ACL::Payload.new(:agent_lifecycle).content(agent_options.merge(message)))
166
+ end
167
+ end
168
+
169
+ def acknowledge_stop(&block)
170
+ sender('agent.lifecycle', :auto_delete => true, :durable => false, :dont_cache => true) do |ack_stop_queue|
171
+ message = {:state => 'acknowledge_stop', :pid => $$.to_s, :name => self.class.to_s}
172
+ ack_stop_queue.publish(ACL::Payload.new(:agent_lifecycle).content(message), &block)
173
+ end
174
+ end
175
+
176
+ def start_keep_alive
177
+ if agent_options[:monitor]
178
+ EventMachine::add_periodic_timer(1) do
179
+ sender('agent.keepalive', :auto_delete => true, :durable => false, :dont_cache => true) do |keep_alive_queue|
180
+ message = {:name => self.class.to_s, :pid => $$.to_s, :time => Time.now.utc.to_i.to_s}
181
+ keep_alive_queue.consumers? do |sender|
182
+ keep_alive_queue.publish(ACL::Payload.new(:agent_keepalive).content(message))
183
+ end
184
+ end
185
+ end
186
+ else
187
+ logger.info { "Not initiating keep alive, agent is not being monitored: #{@name}" }
188
+ end
189
+ end
190
+
191
+ def queues
192
+ @factory
193
+ end
194
+
195
+ def agent_options
196
+ @@agent_options._child
197
+ end
198
+
199
+ def control_queue_name
200
+ "#{default_queue_name}.control"
201
+ end
202
+
203
+ def default_queue_name
204
+ "agent.#{name.sub(/Agent$/, '').snake_case}"
205
+ end
206
+ end
207
+ end
@@ -0,0 +1,40 @@
1
+ # -*- encoding: utf-8 -*-
2
+ module Smith
3
+ class AgentCache < Cache
4
+
5
+ attr_accessor :path
6
+
7
+ def initialize(opts={})
8
+ super()
9
+ @paths = opts[:paths]
10
+
11
+ operator ->(agent_name, options={}) { AgentProcess.first(:name => agent_name) || AgentProcess.new(:name => agent_name, :path => agent_path(agent_name)) }
12
+
13
+ populate
14
+ end
15
+
16
+ def alive?(name)
17
+ (exist?(name)) ? entry(name).alive? : false
18
+ end
19
+
20
+ def state(state)
21
+ select {|a| a.state == state.to_s }
22
+ end
23
+
24
+ alias names :entries
25
+ alias :[] :entry
26
+
27
+ private
28
+
29
+ # When we start load any new data from the db.
30
+ def populate
31
+ AgentProcess.all.each { |a| update(a.name, a) }
32
+ end
33
+
34
+ def agent_path(agent_name)
35
+ @paths.detect do |path|
36
+ Pathname.new(path).join("#{agent_name.snake_case}.rb").exist?
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'leveldb'
4
+ require 'pathname'
5
+
6
+ module Smith
7
+ class AgentConfig
8
+
9
+ def initialize(path, name)
10
+ @path = Pathname.new(path)
11
+ @db ||= LevelDB::DB.new(@path.join(name).to_s)
12
+ end
13
+
14
+ def for(agent)
15
+ @db[agent]
16
+ end
17
+
18
+ def update(agent, value)
19
+ @db[agent] = value
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,52 @@
1
+ # -*- encoding: utf-8 -*-
2
+ module Smith
3
+ class AgentMonitoring
4
+
5
+ include Logger
6
+
7
+ def initialize(agent_processes)
8
+ @agent_processes = agent_processes
9
+ end
10
+
11
+ def start_monitoring
12
+ EventMachine::add_periodic_timer(1) do
13
+ @agent_processes.each do |agent_process|
14
+ if agent_process.monitor
15
+ logger.verbose { "Agent state for #{agent_process.name}: #{agent_process.state}" }
16
+ case agent_process.state
17
+ when 'running'
18
+ if agent_process.last_keep_alive
19
+ if agent_process.last_keep_alive > agent_process.started_at
20
+ if (Time.now.utc.to_i - agent_process.last_keep_alive) > 10
21
+ logger.fatal { "Agent not responding: #{agent_process.name}" }
22
+ agent_process.no_process_running
23
+ end
24
+ else
25
+ logger.warn { "Discarding keep_alives with timestamp before agent started: #{Time.at(agent_process.started_at)} > #{Time.at(agent_process.last_keep_alive)}" }
26
+ end
27
+ end
28
+ when 'starting'
29
+ if (Time.now.utc.to_i - agent_process.started_at) > 10
30
+ logger.error { "No response from agent for > 10 seconds. Agent probably didn't start" }
31
+ agent_process.not_responding
32
+ else
33
+ logger.debug { "no keep alive from #{agent_process.name}" }
34
+ end
35
+ when 'stoping'
36
+ logger.info { "Agent is shutting down: #{agent_process.name}" }
37
+ when 'dead'
38
+ logger.info { "Restarting dead agent: #{agent_process.name}" }
39
+ Messaging::Sender.new('agency.control', :auto_delete => true, :durable => false, :strict => true).ready do |sender|
40
+ sender.publish_and_receive(ACL::Payload.new(:agency_command).content(:command => 'start', :args => [agent_process.name])) do |r|
41
+ logger.debug { "Agent restart message acknowledged: #{agent_process.name}" }
42
+ end
43
+ end
44
+ when 'unknown'
45
+ logger.info { "Agent is in an unknown state: #{agent_process.name}" }
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,181 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'pp'
3
+ require 'state_machine'
4
+ require 'dm-observer'
5
+
6
+ module Smith
7
+
8
+ class AgentProcess
9
+
10
+ include Logger
11
+ include Extlib
12
+ include DataMapper::Resource
13
+
14
+ property :id, Serial
15
+ property :path, String, :required => true
16
+ property :name, String, :required => true
17
+ property :state, String, :required => true
18
+ property :pid, String
19
+ property :started_at, Integer
20
+ property :last_keep_alive, Integer
21
+ property :metadata, String
22
+ property :monitor, Boolean
23
+ property :singleton, Boolean
24
+
25
+ state_machine :initial => :null do
26
+
27
+ before_transition do |transition|
28
+ logger.debug { "Transition [#{name}]: :#{transition.from} -> :#{transition.to}" }
29
+ end
30
+
31
+ after_failure do |transition|
32
+ logger.debug { "Illegal state change [#{name}]: :#{transition.from} -> :#{transition.event}" }
33
+ end
34
+
35
+ event :instantiate do
36
+ transition [:null] => :stopped
37
+ end
38
+
39
+ event :start do
40
+ transition [:null, :stopped, :dead] => :starting
41
+ end
42
+
43
+ event :acknowledge_start do
44
+ transition [:starting] => :running
45
+ end
46
+
47
+ event :stop do
48
+ transition [:running, :unknown] => :stopping
49
+ end
50
+
51
+ event :acknowledge_stop do
52
+ transition [:stopping] => :stopped
53
+ end
54
+
55
+ event :no_process_running do
56
+ transition [:unknown, :starting, :running, :stopping] => :dead
57
+ end
58
+
59
+ event :not_responding do
60
+ transition [:starting, :acknowledge_start, :acknowledge_stop, :running, :stopping] => :unknown
61
+ end
62
+
63
+ event :kill do
64
+ transition [:null, :unknown, :starting, :acknowledge_start, :stopping, :acknowledge_stop, :running, :dead] => :null
65
+ end
66
+ end
67
+
68
+ # Check to see if the agent is alive.
69
+ def alive?
70
+ if self.pid
71
+ begin
72
+ Process.kill(0, self.pid.to_i)
73
+ true
74
+ rescue Exception
75
+ false
76
+ end
77
+ else
78
+ false
79
+ end
80
+ end
81
+
82
+ # Return the agent control queue.
83
+ def control_queue_name
84
+ "agent.#{name.sub(/Agent$/, '').snake_case}.control"
85
+ end
86
+ end
87
+
88
+ class AgentProcessObserver
89
+
90
+ include Logger
91
+ include DataMapper::Observer
92
+
93
+ observe AgentProcess
94
+
95
+ # Start an agent. This forks and execs the bootstrapper class
96
+ # which then becomes responsible for managing the agent process.
97
+ def self.start(agent_process)
98
+ agent_process.started_at = Time.now.utc
99
+ agent_process.pid = fork do
100
+
101
+ # Detach from the controlling terminal
102
+ unless sess_id = Process.setsid
103
+ raise 'Cannot detach from controlling terminal'
104
+ end
105
+
106
+ # Close all file descriptors apart from stdin, stdout, stderr
107
+ ObjectSpace.each_object(IO) do |io|
108
+ unless [STDIN, STDOUT, STDERR].include?(io)
109
+ io.close rescue nil
110
+ end
111
+ end
112
+
113
+ # Sort out the remaining file descriptors. Don't do anything with
114
+ # stdout (and by extension stderr) as want the agency to manage it.
115
+ STDIN.reopen("/dev/null")
116
+ STDERR.reopen(STDOUT)
117
+
118
+ bootstraper = File.expand_path(File.join(File.dirname(__FILE__), 'bootstrap.rb'))
119
+
120
+ exec('ruby', bootstraper, agent_process.path, agent_process.name, Smith.acl_cache_path.to_s)
121
+ end
122
+
123
+ # We don't want any zombies.
124
+ Process.detach(agent_process.pid.to_i)
125
+ end
126
+
127
+ def self.acknowledge_start(agent_process)
128
+ end
129
+
130
+ def self.stop(agent_process)
131
+ Messaging::Sender.new(agent_process.control_queue_name, :durable => false, :auto_delete => true).ready do |sender|
132
+ callback = proc {|sender| sender.publish(ACL::Payload.new(:agent_command).content(:command => 'stop')) }
133
+ errback = proc do
134
+ logger.warn { "Agent is not listening. Setting state to dead." }
135
+ agent_process.no_process_running
136
+ end
137
+
138
+ sender.consumers?(callback, errback)
139
+ end
140
+ end
141
+
142
+ def self.acknowledge_stop(agent_process)
143
+ end
144
+
145
+ def self.kill(agent_process)
146
+ if agent_process.pid
147
+ logger.info { "Sending kill signal: #{agent_process.name}(#{agent_process.pid})" }
148
+ begin
149
+ Process.kill('TERM', agent_process.pid.to_i)
150
+ rescue
151
+ logger.error { "Process does not exist. PID is stale: #{agent_process.pid}: #{agent_process.name}" }
152
+ end
153
+ else
154
+ logger.error { "Not sending kill signal, agent pid is not set: #{agent_process.name}" }
155
+ end
156
+ end
157
+
158
+ # If an agent is in an unknown state then this will check to see
159
+ # if the process is still alive and if it is kill it, otherwise
160
+ # log a message. TODO this is not really a reaper but I haven't
161
+ # quite worked out what I'm going to do with it so I'll leave it
162
+ # as is
163
+ def self.reap_agent(agent_process)
164
+ logger.info { "Reaping agent: #{agent_process.name}" }
165
+ if Pathname.new('/proc').join(agent_process.pid.to_s).exist?
166
+ logger.warn { "Agent is still alive: #{agent_process.name}" }
167
+ else
168
+ logger.warn { "Agent is already dead: #{agent_process.name}" }
169
+ end
170
+ end
171
+ end
172
+
173
+ AgentProcess.state_machine do
174
+ after_transition :on => :start, :do => AgentProcessObserver.method(:start)
175
+ after_transition :on => :acknowledge_start, :do => AgentProcessObserver.method(:acknowledge_start)
176
+ after_transition :on => :stop, :do => AgentProcessObserver.method(:stop)
177
+ after_transition :on => :kill, :do => AgentProcessObserver.method(:kill)
178
+ after_transition :on => :acknowledge_stop, :do => AgentProcessObserver.method(:acknowledge_stop)
179
+ after_transition :on => :not_responding, :do => AgentProcessObserver.method(:reap_agent)
180
+ end
181
+ end