scout_agent 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. data/AUTHORS +4 -0
  2. data/CHANGELOG +3 -0
  3. data/COPYING +340 -0
  4. data/INSTALL +17 -0
  5. data/LICENSE +6 -0
  6. data/README +3 -0
  7. data/Rakefile +123 -0
  8. data/TODO +3 -0
  9. data/bin/scout_agent +11 -0
  10. data/lib/scout_agent.rb +73 -0
  11. data/lib/scout_agent/agent.rb +42 -0
  12. data/lib/scout_agent/agent/communication_agent.rb +85 -0
  13. data/lib/scout_agent/agent/master_agent.rb +301 -0
  14. data/lib/scout_agent/api.rb +241 -0
  15. data/lib/scout_agent/assignment.rb +105 -0
  16. data/lib/scout_agent/assignment/configuration.rb +30 -0
  17. data/lib/scout_agent/assignment/identify.rb +110 -0
  18. data/lib/scout_agent/assignment/queue.rb +95 -0
  19. data/lib/scout_agent/assignment/reset.rb +91 -0
  20. data/lib/scout_agent/assignment/snapshot.rb +92 -0
  21. data/lib/scout_agent/assignment/start.rb +149 -0
  22. data/lib/scout_agent/assignment/status.rb +44 -0
  23. data/lib/scout_agent/assignment/stop.rb +60 -0
  24. data/lib/scout_agent/assignment/upload_log.rb +61 -0
  25. data/lib/scout_agent/core_extensions.rb +260 -0
  26. data/lib/scout_agent/database.rb +386 -0
  27. data/lib/scout_agent/database/mission_log.rb +282 -0
  28. data/lib/scout_agent/database/queue.rb +126 -0
  29. data/lib/scout_agent/database/snapshots.rb +187 -0
  30. data/lib/scout_agent/database/statuses.rb +65 -0
  31. data/lib/scout_agent/dispatcher.rb +157 -0
  32. data/lib/scout_agent/id_card.rb +143 -0
  33. data/lib/scout_agent/lifeline.rb +243 -0
  34. data/lib/scout_agent/mission.rb +212 -0
  35. data/lib/scout_agent/order.rb +58 -0
  36. data/lib/scout_agent/order/check_in_order.rb +32 -0
  37. data/lib/scout_agent/order/snapshot_order.rb +33 -0
  38. data/lib/scout_agent/plan.rb +306 -0
  39. data/lib/scout_agent/server.rb +123 -0
  40. data/lib/scout_agent/tracked.rb +59 -0
  41. data/lib/scout_agent/wire_tap.rb +513 -0
  42. data/setup.rb +1360 -0
  43. data/test/tc_core_extensions.rb +89 -0
  44. data/test/tc_id_card.rb +115 -0
  45. data/test/tc_plan.rb +285 -0
  46. data/test/test_helper.rb +22 -0
  47. data/test/ts_all.rb +7 -0
  48. metadata +171 -0
data/TODO ADDED
@@ -0,0 +1,3 @@
1
+ = To Do List
2
+
3
+ Coming soon...
data/bin/scout_agent ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $VERBOSE = true # -w
4
+ $KCODE = "u" # -Ku
5
+
6
+ # load the agent
7
+ $LOAD_PATH << File.join(File.expand_path(File.dirname(__FILE__)), *%w[.. lib])
8
+ require "scout_agent"
9
+
10
+ # dispatch this command to the agent
11
+ ScoutAgent::Dispatcher.dispatch
@@ -0,0 +1,73 @@
1
+ #!/usr/bin/env ruby -wKU
2
+
3
+ # require standard libraries
4
+ require "etc"
5
+ require "fileutils"
6
+ require "optparse"
7
+ require "ostruct"
8
+ require "pathname"
9
+ require "stringio"
10
+ require "thread"
11
+ require "timeout"
12
+ require "uri"
13
+ require "zlib"
14
+
15
+ # load agent
16
+ require "scout_agent/core_extensions"
17
+ require "scout_agent/wire_tap"
18
+ require "scout_agent/database"
19
+ require "scout_agent/tracked"
20
+ require "scout_agent/server"
21
+ require "scout_agent/plan"
22
+ require "scout_agent/id_card"
23
+ require "scout_agent/assignment"
24
+ require "scout_agent/lifeline"
25
+ require "scout_agent/dispatcher"
26
+
27
+ # require gems
28
+ require_lib_or_gem "json"
29
+ require_lib_or_gem "amalgalite"
30
+ require_lib_or_gem "rest_client"
31
+ require_lib_or_gem "xmpp4r"
32
+ require_lib_or_gem "xmpp4r/roster"
33
+
34
+ # The namespace for all agent software.
35
+ module ScoutAgent
36
+ # Returns the name of the agent executable.
37
+ def self.agent_name
38
+ name.snake_case
39
+ end
40
+
41
+ # Returns agent_name() converted to a proper human-readable name.
42
+ def self.proper_agent_name
43
+ agent_name.tr("_", " ").gsub(/\w+/) { |w| w.capitalize }
44
+ end
45
+
46
+ #
47
+ # A helper that prepares a log for +process_name+. This log will use the
48
+ # configured log level and be rotated daily. If the agent is not running as
49
+ # a daemon, the log will also be teed to <tt>$stdout</tt>, but you can pass
50
+ # a +true+ value into +skip_stdout+ to prevent this.
51
+ #
52
+ def self.prepare_wire_tap(process_name, skip_stdout = false)
53
+ wire_tap = WireTap.new(Plan.log_dir + "#{agent_name}.log", :daily)
54
+ begin
55
+ wire_tap.level = ScoutAgent::WireTap::Severity.const_get(
56
+ Plan.logging_level
57
+ )
58
+ rescue NameError # unrecognized level
59
+ # do nothing: we will stick with the default
60
+ end
61
+ wire_tap.progname = process_name
62
+ wire_tap.tap = $stdout unless skip_stdout or Plan.run_as_daemon?
63
+ wire_tap
64
+ end
65
+
66
+ # The version of this agent.
67
+ VERSION = "3.0.0".freeze
68
+ # A Pathname reference to the agent code directory, used in dynamic loading.
69
+ LIB_DIR = Pathname.new(File.dirname(__FILE__)) + agent_name
70
+ end
71
+
72
+ # An external alias.
73
+ Scout = ScoutAgent
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby -wKU
2
+
3
+ module ScoutAgent
4
+ class Agent
5
+ include Tracked
6
+
7
+ def initialize
8
+ @log = ScoutAgent.prepare_wire_tap(file_name)
9
+ log.info("Loading.")
10
+
11
+ status("Loading", file_name)
12
+ at_my_exit do
13
+ clear_status(file_name)
14
+ end
15
+ end
16
+
17
+ attr_reader :log
18
+
19
+ def authorize
20
+ IDCard.new(file_name).authorize
21
+ end
22
+
23
+ def run
24
+ raise NotImplementedError,
25
+ "Subclasses must override ScoutAgent::Agent#run()."
26
+ end
27
+
28
+ def notice_changes
29
+ # do nothing: specific agents can override for their purposes
30
+ end
31
+
32
+ def finish
33
+ exit
34
+ end
35
+
36
+ private
37
+
38
+ def file_name
39
+ self.class.short_name.sub(/Agent\z/, "").snake_case
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,85 @@
1
+ #!/usr/bin/env ruby -wKU
2
+
3
+ # load agent extensions
4
+ require "scout_agent/api"
5
+ require "scout_agent/order"
6
+
7
+ module ScoutAgent
8
+ class Agent
9
+ class CommunicationAgent < Agent
10
+ def initialize
11
+ super # setup our log and status
12
+
13
+ Order.log = log # pass on our log
14
+ Order.load_all # load supported orders
15
+
16
+ @agent_jid = nil
17
+ @jabber = nil
18
+ @roster = nil
19
+ @shutdown_thread = nil
20
+ end
21
+
22
+ def run
23
+ if Plan.test_mode?
24
+ login
25
+ update_status("Online since #{Time.now.utc.to_db_s}")
26
+ fetch_roster
27
+ install_subscriptions_callback
28
+ install_messages_callback
29
+ listen
30
+ else
31
+ loop { sleep 60 }
32
+ end
33
+ end
34
+
35
+ def finish
36
+ if @shutdown_thread
37
+ @shutdown_thread.run
38
+ else
39
+ exit
40
+ end
41
+ end
42
+
43
+ private
44
+
45
+ def login
46
+ Thread.abort_on_exception = true # make XMPP4R fail fast
47
+ @agent_jid = Jabber::JID.new("#{Plan.agent_key}@jabber.org/agent")
48
+ @jabber = Jabber::Client.new(@agent_jid)
49
+ no_warnings { @jabber.connect }
50
+ @jabber.auth(Plan.agent_key)
51
+ end
52
+
53
+ def update_status(message, status = nil)
54
+ presence = Jabber::Presence.new
55
+ presence.status = message
56
+ presence.show = status
57
+ @jabber.send(presence)
58
+ end
59
+
60
+ def fetch_roster
61
+ @roster = Jabber::Roster::Helper.new(@jabber)
62
+ end
63
+
64
+ def install_subscriptions_callback
65
+ @roster.add_subscription_request_callback do |_, presence|
66
+ @roster.accept_subscription(presence.from)
67
+ end
68
+ end
69
+
70
+ def install_messages_callback
71
+ @jabber.add_message_callback do |message|
72
+ if order = Order.can_handle?(message)
73
+ order.execute
74
+ end
75
+ end
76
+ end
77
+
78
+ def listen
79
+ @shutdown_thread = Thread.current
80
+ Thread.stop
81
+ @jabber.close
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,301 @@
1
+ #!/usr/bin/env ruby -wKU
2
+
3
+ # load agent extensions
4
+ require "scout_agent/mission"
5
+
6
+ module ScoutAgent
7
+ class Agent
8
+ class MasterAgent < Agent
9
+ def initialize
10
+ super # setup our log and status
11
+
12
+ @running = true
13
+ @main_loop = nil
14
+ @server = Server.new(log)
15
+ @db = Database.load(:mission_log, log)
16
+ @queue = Database.load(:queue, log)
17
+ @snapshots = Database.load(:snapshots, log)
18
+
19
+ if [@db, @queue, @snapshots].any? { |db| db.nil? }
20
+ log.fatal("Could not load all required databases.")
21
+ exit
22
+ end
23
+ end
24
+
25
+ def run
26
+ log.info("Running.")
27
+ @main_loop = Thread.new do
28
+ Thread.current.abort_on_exception = true
29
+ loop do
30
+ %w[ fetch_plan
31
+ execute_missions
32
+ checkin
33
+ perform_maintenance
34
+ wait_for_orders ].each do |stage|
35
+ send(stage)
36
+ check_running_status
37
+ end
38
+ end
39
+ end
40
+ @main_loop.join
41
+ end
42
+
43
+ def notice_changes
44
+ @main_loop.run if @main_loop
45
+ rescue ThreadError # Thread was already killed
46
+ # do nothing: we're shutting down and can't notice new things
47
+ end
48
+
49
+ def finish
50
+ if @running
51
+ log.info("Shutting down.")
52
+ else
53
+ log.warn("Received multiple shutdown signals.")
54
+ end
55
+ @running = false
56
+ notice_changes
57
+ end
58
+
59
+ private
60
+
61
+ #############
62
+ ### Agent ###
63
+ #############
64
+
65
+ def fetch_plan
66
+ log.info("Fetching plan from server.")
67
+ status("Fetching plan from server")
68
+ headers = {}
69
+ if not Plan.test_mode? and (old_plan = @db.current_plan)
70
+ log.debug( "Adding If-Modified-Since for plan fetch: " +
71
+ "#{old_plan[:last_modified]}." )
72
+ headers[:if_modified_since] = old_plan[:last_modified]
73
+ end
74
+ json_plan = @server.get_plan(headers)
75
+ if json_plan.nil? # failed to retrieve plan
76
+ log.warn("Could not retrieve plan from server.")
77
+ return
78
+ elsif json_plan.to_s.empty? # skip empty plans
79
+ log.info("Received empty plan.")
80
+ return
81
+ else
82
+ log.info("Received plan (#{json_plan.to_s.size} bytes).")
83
+ end
84
+ begin
85
+ ruby_plan = JSON.parse(json_plan.to_s)
86
+ rescue JSON::ParserError # bad JSON
87
+ log.error("Plan from server was malformed JSON.")
88
+ return # skip plan update
89
+ end
90
+ @db.update_plan( json_plan.headers[:last_modified],
91
+ Array(ruby_plan["plugins"]) )
92
+ @snapshots.update_commands(Array(ruby_plan["commands"]))
93
+ end
94
+
95
+ def execute_missions
96
+ status("Running missions")
97
+ ran_a_mission = false
98
+ while mission = @db.current_mission
99
+ log.info("Running #{mission[:name]} mission.")
100
+ ran_a_mission = true
101
+ pid = fork do
102
+ reset_environment
103
+ compile_mission(mission)
104
+ run_mission(mission)
105
+ complete_mission(mission)
106
+ end
107
+
108
+ begin
109
+ Timeout.timeout(mission[:timeout]) do
110
+ Process.wait(pid)
111
+ end
112
+ unless $?.success?
113
+ log.warn( "#{mission[:name]} exited with an error: " +
114
+ "#{$?.exitstatus}." )
115
+ @db.write_report(
116
+ mission[:id],
117
+ :error,
118
+ :subject => "#{mission[:name]} exited with an error",
119
+ :body => "Exit status: #{$?.exitstatus}"
120
+ )
121
+ end
122
+ rescue Timeout::Error # mission exceeded allowed execution
123
+ status = Process.term_or_kill(pid)
124
+ log.error( "#{mission[:name]} took too long to run: " +
125
+ "#{status.exitstatus}." )
126
+ @db.write_report(
127
+ mission[:id],
128
+ :error,
129
+ :subject => "#{mission[:name]} took too long to run",
130
+ :body => "Exit status: #{status.exitstatus}"
131
+ )
132
+ end
133
+ # prevent an infinite loop if we can't complete the mission
134
+ unless @db.complete_mission(mission)
135
+ log.error("Mission run cancelled due to imcomplete missions.")
136
+ break
137
+ end
138
+ end
139
+ log.warn("No missions to run.") unless ran_a_mission
140
+ end
141
+
142
+ def checkin
143
+ reports = @db.current_reports
144
+ queued = @queue.queued_reports
145
+ snapshots = @snapshots.current_runs
146
+ if reports.empty? and queued.empty? and snapshots.empty?
147
+ log.warn("No data to report to the server.")
148
+ return
149
+ end
150
+
151
+ log.info("Checking in with server.")
152
+ status("Checking in with server")
153
+ checkin = { :reports => Array.new,
154
+ :hints => Array.new,
155
+ :alerts => Array.new,
156
+ :errors => Array.new }
157
+ (reports + queued).each do |report|
158
+ type = report.delete_at(:type)
159
+ checkin["#{type}s".to_sym] << report.to_hash
160
+ end
161
+ checkin[:snapshots] = snapshots.map { |run| run.to_hash }
162
+
163
+ report_dates = String.new
164
+ if reports.first or queued.first
165
+ dates = [ [reports.first, queued.first],
166
+ [reports.last, queued.last] ]
167
+ first_date, last_date = dates.map { |date_group|
168
+ date_group.compact.map { |report| report[:created_at] }.min
169
+ }
170
+ report_dates = " from #{first_date} to #{last_date}"
171
+ end
172
+ snapshot_dates = String.new
173
+ if snapshots.first
174
+ snapshot_dates = " from #{snapshots.first[:created_at]} to " +
175
+ "#{snapshots.last[:created_at]}"
176
+ end
177
+ log.info( "Sending #{reports.size} reports " +
178
+ "(#{checkin[:reports].size} reports, " +
179
+ "#{checkin[:hints].size} hints, " +
180
+ "#{checkin[:alerts].size} alerts, " +
181
+ "and #{checkin[:errors].size} errors)#{report_dates} " +
182
+ "and #{snapshots.size} snapshot runs#{snapshot_dates}." )
183
+ if @server.post_checkin(checkin)
184
+ log.info("Server received data.")
185
+ else
186
+ log.warn("Could not get data to server.")
187
+ end
188
+ end
189
+
190
+ def perform_maintenance
191
+ log.info("Running maintenance tasks.")
192
+ status("Running maintenance tasks")
193
+
194
+ # VACUUM databases
195
+ [@db, @queue, @snapshots, status_database].compact.each do |db|
196
+ result = db.maintain
197
+ if result.nil?
198
+ log.error("Maintenance for #{db.path} database failed.")
199
+ elsif result
200
+ log.debug("VACUUM successful for #{db.path}.")
201
+ end
202
+ end
203
+
204
+ # clean out old logs
205
+ Plan.log_dir.each_entry do |log_file|
206
+ if log_file.to_s =~ /\.(\d{4})(\d{2})(\d{2})\z/
207
+ log_day = Time.local(*$~.captures.map { |n| n.to_i })
208
+ if Time.now - log_day > 60 * 60 * 24 * 7
209
+ begin
210
+ (Plan.log_dir + log_file).unlink
211
+ rescue Exception => error # file cannot be unlinked
212
+ log.error( "Failed to unlink old log file '#{log_file}': " +
213
+ "#{error.message} (#{error.class})." )
214
+ next
215
+ end
216
+ log.debug("Successfully unlinked old log file '#{log_file}'.")
217
+ end
218
+ end
219
+ end
220
+ end
221
+
222
+ def wait_for_orders
223
+ pause = @db.seconds_to_next_mission
224
+ log.info("Waiting #{pause} seconds for next mission run.")
225
+ status("Idle")
226
+ sleep pause
227
+ end
228
+
229
+ def check_running_status
230
+ exit unless @running
231
+ end
232
+
233
+ ###############
234
+ ### Mission ###
235
+ ###############
236
+
237
+ def reset_environment
238
+ # swap out our parent's signal handlers
239
+ install_shutdown_handler { exit }
240
+
241
+ # clear the parent's identity and assume mine
242
+ IDCard.me = nil
243
+ IDCard.new(:mission).authorize or exit(1)
244
+
245
+ # get a handle on the log
246
+ @log = ScoutAgent.prepare_wire_tap(:mission)
247
+
248
+ # record the new process status
249
+ force_status_database_reload
250
+ at_my_exit do
251
+ clear_status
252
+ end
253
+ end
254
+
255
+ def compile_mission(mission)
256
+ log.info("Compiling #{mission[:name]} mission.")
257
+ status("Compiling")
258
+ begin
259
+ eval(mission[:code], TOPLEVEL_BINDING, mission[:name])
260
+ rescue Exception => error # any compile error
261
+ raise if $!.is_a? SystemExit # don't catch exit() calls
262
+ log.error( "#{mission[:name]} could not be compiled: " +
263
+ "#{error.message} (#{error.class})." )
264
+ reported = @db.write_report(
265
+ mission[:id],
266
+ :error,
267
+ :subject => "#{mission[:name]} could not be compiled",
268
+ :body => "#{error.message}\n#{error.backtrace.join("\n")}"
269
+ )
270
+ exit(reported ? 0 : 2) # warn parent if we can't report
271
+ end
272
+ end
273
+
274
+ def run_mission(mission)
275
+ log.info("Preparing #{mission[:name]} mission.")
276
+ if prepared = Mission.prepared
277
+ log.info("Starting #{mission[:name]} mission.")
278
+ status("Running")
279
+ prepared.new( *mission.values_at( :id,
280
+ :name,
281
+ :last_run_at,
282
+ :memory,
283
+ :options ) ).run
284
+ else # no mission loaded
285
+ log.error("#{mission[:name]} could not be prepared.")
286
+ reported = @db.write_report(
287
+ mission[:id],
288
+ :error,
289
+ :subject => "#{mission[:name]} could not be prepared",
290
+ :body => "The code didn't define a Scout::Plugin subclass"
291
+ )
292
+ exit(reported ? 0 : 3) # warn parent if we can't report
293
+ end
294
+ end
295
+
296
+ def complete_mission(mission)
297
+ log.info("#{mission[:name]} mission complete.")
298
+ end
299
+ end
300
+ end
301
+ end