RubyGems - scout_agent - Versions diffs - 3.0.6 → 3.0.7 - Mend

scout_agent 3.0.6 → 3.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

data/CHANGELOG +18 -0
data/Rakefile +1 -1
data/lib/scout_agent/agent/communication_agent.rb +5 -1
data/lib/scout_agent/agent/master_agent.rb +6 -6
data/lib/scout_agent/agent.rb +26 -5
data/lib/scout_agent/api.rb +52 -12
data/lib/scout_agent/assignment/configuration.rb +11 -0
data/lib/scout_agent/assignment/identify.rb +26 -0
data/lib/scout_agent/assignment/queue.rb +35 -0
data/lib/scout_agent/assignment/reset.rb +30 -0
data/lib/scout_agent/assignment/snapshot.rb +50 -4
data/lib/scout_agent/assignment/start.rb +65 -2
data/lib/scout_agent/assignment/status.rb +22 -3
data/lib/scout_agent/assignment/stop.rb +42 -5
data/lib/scout_agent/assignment/test.rb +366 -0
data/lib/scout_agent/assignment/update.rb +20 -0
data/lib/scout_agent/assignment/upload_log.rb +31 -1
data/lib/scout_agent/assignment.rb +92 -13
data/lib/scout_agent/core_extensions.rb +24 -5
data/lib/scout_agent/dispatcher.rb +45 -1
data/lib/scout_agent/lifeline.rb +7 -2
data/lib/scout_agent/mission.rb +31 -11
data/lib/scout_agent/order/check_in_order.rb +27 -1
data/lib/scout_agent/order/snapshot_order.rb +16 -0
data/lib/scout_agent/order.rb +57 -11
data/lib/scout_agent/plan.rb +1 -1
data/lib/scout_agent.rb +1 -1
metadata +13 -5

data/lib/scout_agent/assignment/snapshot.rb CHANGED Viewed

@@ -3,19 +3,43 @@
 module ScoutAgent
   class Assignment
+    #
+    # Invoke with:
+    #
+    #   scout_agent snap [force]
+    #
+    # This command requests that a snapshot be taken of the environment the
+    # agent is running on.  Snapshots are a collection of commands sent down
+    # from the Scout server that can be used to measure the current health of
+    # the environment.  Their output, exit status, and run time are passed back
+    # up to the server in response to this request.
+    #
+    # Note that this is just a request.  It may not be honored if enough time
+    # hasn't passed since the last snapshot.  This is to protect your server
+    # from overmuch busy work, but you can choose to override this limitation
+    # with the optional "force" parameter.
+    #
     class Snapshot < Assignment
+      # Runs the snapshot command.
       def execute
+        # prepare the log
         log = ScoutAgent.prepare_wire_tap(:snapshot, :skip_stdout)
+        # load the snapshot database
         unless db = Database.load(:snapshots, log)
           abort_with_missing_db
         end
+        #
+        # lock on this file to ensure only one process can run a snapshot at a
+        # time
+        #
         open(__FILE__) do |this_file|
           unless this_file.flock(File::LOCK_EX | File::LOCK_NB)
             exit  # snapshot in progress
           end
+          # record our status and set removal at_exit()
           log.info("Building snapshot.")
           status_database(log)
           status("Building snapshot", :snapshot)
@@ -23,13 +47,16 @@ module ScoutAgent
             clear_status(:snapshot)
           end
+          # reset commands, if requested
           if Array(other_args).shift == "force"
             log.info("Clearing command run times to force a full snapshot.")
             db.reset_all_commands
           end
+          # read current commands
           commands = db.current_commands
+          # bail out if there's no commands to run
           if commands.empty?
             if db.have_commands?
               abort_with_too_recent
@@ -38,12 +65,14 @@ module ScoutAgent
               abort_with_no_commands
             end
           end
+          # build snapshot
           snapshot_started = Time.now
-          commands.each do |command|
+          commands.each do |command|  # run each command
             log.info("Running `#{command[:code]}`.")
             command_started = Time.now
             reader, writer  = IO.pipe
+            # run the command in a child process
             run             = fork do
               reader.close
               STDOUT.reopen(writer)
@@ -57,6 +86,7 @@ module ScoutAgent
             exit_status     = nil
             output          = nil
             writer.close
+            # make sure the child process stops in a reasonable time
             begin
               Timeout.timeout(command[:timeout]) do
                 output      = reader.read
@@ -68,6 +98,7 @@ module ScoutAgent
               output        = "Error:  This command took too long to run"
             end
             run_time        = Time.now - command_started
+            # record results
             db.complete_run( command,
                              output,
                              exit_status,
@@ -76,26 +107,41 @@ module ScoutAgent
             log.debug( "`#{command[:code]}` exited (#{exit_status}) in " +
                        "#{run_time} seconds." )
           end
+          # maintain the snapshots database
           db.maintain
           log.info("Snapshot complete.")
-          this_file.flock(File::LOCK_UN)
+          this_file.flock(File::LOCK_UN)  # release our snapshot lock
         end
       end
+      #######
       private
+      #######
+      #
+      # Abort with an error message to the user that says we cannot load the
+      # snapshot database.
+      #
       def abort_with_missing_db
         warn "Snapshots database could not be loaded."
         exit 1
       end
+      #
+      # Abort with an error message to the user that says a snapshot was
+      # recently taken and enough time has not yet passed to grab another.
+      #
       def abort_with_too_recent
         warn "A snapshot was recently taken."
         exit 2
       end
+      #
+      # Abort with an error message to the user that says we don't have any
+      # commands from the server yet to run as a snapshot.
+      #
       def abort_with_no_commands
         warn "No snapshot commands have been received from the server."
         exit 3

data/lib/scout_agent/assignment/start.rb CHANGED Viewed

@@ -3,25 +3,45 @@
 module ScoutAgent
   class Assignment
+    #
+    # Invoke with:
+    #
+    #   sudo scout_agent start
+    #
+    # This command starts the Scout agent, if it is not already running.
+    #
+    # The agent requires super user privileges to prepare it's environment, but
+    # it will relenquish these privileges before it begins normal work.
+    #
+    # The agent daemonizes itself as part of the startup process (unless
+    # configured not to) and thus will detach from your terminal.  It will still
+    # be running happily in the background.  You can check up on it with the
+    # status command, if you like.
+    #
     class Start < Assignment
       choose_user  true
       choose_group true
+      # Runs the start command.
       def execute
+        # build the directory for PID file storage, if needed
         unless Plan.pid_dir.exist?
           unless Plan.build_pid_dir(group.gid)
             abort_with_missing_pid_dir
           end
         end
+        # switch to the selected user and group
         unless switch_group_and_user
           abort_with_wrong_group_or_user
         end
+        # test our ability to reach the server
         unless test_server_connection(:quiet)
           abort_with_cannot_connect
         end
+        # make a final check to see if everything looks properly prepared
         unless Plan.valid?            and
                Plan.pid_dir.exist?    and
                Plan.pid_dir.readable? and
@@ -29,6 +49,7 @@ module ScoutAgent
           abort_with_missing_resources
         end
+        # make sure we are the only process running and daemonize, if configured
         running_mode = Plan.run_as_daemon? ? " as a daemon" : ""
         puts "Starting #{ScoutAgent.proper_agent_name}#{running_mode}..."
         card = IDCard.new(:lifeline)
@@ -41,12 +62,15 @@ module ScoutAgent
           end
         end
+        # prepare the log
         log = ScoutAgent.prepare_wire_tap(:lifeline)
         log.info("Loading monitors.")
+        # load configured agents
         agents = %w[master]
         agents << "communication" if Plan.enable_xmpp?
+        # start each agent through a Lifeline monitor
         lifelines = agents.map { |agent| Lifeline.new(agent, log) }
         %w[TERM INT].each do |signal|
           trap(signal) do
@@ -56,13 +80,24 @@ module ScoutAgent
             end
           end
         end
-        lifelines.each { |line| line.launch_and_monitor }
+        lifelines.each do |line|
+          line.launch_and_monitor
+        end
-        lifelines.each { |line| line.join }
+        # wait for all monitors to finish
+        lifelines.each do |line|
+          line.join
+        end
       end
+      #######
       private
+      #######
+      #
+      # A typical Unix daemonization process.  Returns +true+ if successful,
+      # +false+ otherwise.
+      #
       def daemonize
         exit!(0) if fork
         Process.setsid
@@ -77,6 +112,10 @@ module ScoutAgent
         false
       end
+      #
+      # Switches the actual and effective user and group of this process as
+      # configured.
+      #
       def switch_group_and_user
         if Process.euid != user.uid or Process.egid != group.egid
           Process.initgroups(user.name, group.gid)  # prepare groups
@@ -88,6 +127,10 @@ module ScoutAgent
         false
       end
+      #
+      # Abort with an error message to the user that says we cannot prepare PID
+      # file storage because we don't have the privileges.
+      #
       def abort_with_missing_pid_dir
         abort <<-END_PID_DIR.trim
         Unable to prepare PID file storage.  Please start the daemon
@@ -99,6 +142,10 @@ module ScoutAgent
         END_PID_DIR
       end
+      #
+      # Abort with an error message to the user that warns of our inability to
+      # reach the server do to connectivity issues or a bad configuration.
+      #
       def abort_with_cannot_connect
         abort <<-END_CANNOT_CONNECT.trim
         Unable to load a plan from the server at:
@@ -114,6 +161,10 @@ module ScoutAgent
         END_CANNOT_CONNECT
       end
+      #
+      # Abort with an error message to the user that says we were unable to
+      # switch to the configured user or group.
+      #
       def abort_with_wrong_group_or_user
         abort <<-END_GROUP_USER.trim
         Unable to switch to the selected user and group.  Please
@@ -125,6 +176,10 @@ module ScoutAgent
         END_GROUP_USER
       end
+      #
+      # Abort with an error message to the user that says we were unable to load
+      # all expected resources due to a damaged configuration.
+      #
       def abort_with_missing_resources
         abort <<-END_RESOURCES.trim
         Some resources needed to complete the startup process are
@@ -141,10 +196,18 @@ module ScoutAgent
         END_RESOURCES
       end
+      #
+      # Abort with an error message to the user that informs of an agent already
+      # running on this system.
+      #
       def abort_with_other_process_running(pid)
         abort "The daemon is already running with the process ID of #{pid}."
       end
+      #
+      # Abort with an error message to the user that says we are unable to
+      # daemonize this process.
+      #
       def abort_with_failure_to_daemonize
         abort "Unable to daemonize this process."
       end

data/lib/scout_agent/assignment/status.rb CHANGED Viewed

@@ -3,7 +3,18 @@
 module ScoutAgent
   class Assignment
+    #
+    # Invoke with:
+    #
+    #   scout_agent status
+    #
+    # This command dumps the status database to <tt>$stdout</tt>.  This data
+    # will show what processes are running, what they are currently working on,
+    # and when that status was last updated.  It can be useful in tracking down
+    # issues with the agent to see where it went wrong.
+    #
     class Status < Assignment
+      # Runs the status command.
       def execute
         unless db = status_database
           abort_with_missing_db
@@ -25,7 +36,8 @@ module ScoutAgent
             %w[Last\ Updated last_updated_at]
           ].map { |title, data| [title] + statuses.map { |row| row[data] } }
           sizes   = columns.map { |column|
-                      column.map { |field| field.to_s.size }.max }
+                      column.map { |field| field.to_s.size }.max
+                    }
           format  = sizes.map { |size| "%-#{size}s" }.join(" ")
           puts format % columns.map { |column| column.first }
           puts format % sizes.map { |size| "-" * size }
@@ -36,9 +48,16 @@ module ScoutAgent
         end
       end
+      #######
+      private
+      #######
+      #
+      # Abort with an error message to the user that says we cannot load the
+      # status database.
+      #
       def abort_with_missing_db
-        warn "Statuses database could not be loaded."
-        exit 1
+        abort "Statuses database could not be loaded."
       end
     end
   end

data/lib/scout_agent/assignment/stop.rb CHANGED Viewed

@@ -3,7 +3,17 @@
 module ScoutAgent
   class Assignment
+    #
+    # Invoke with:
+    #
+    #   sudo scout_agent stop
+    #
+    # This command halts the currently running agent.  The agent is asked nicely
+    # to stop.  If it doesn't respond within a few seconds though, it is forced
+    # to terminate.
+    #
     class Stop < Assignment
+      # Runs the stop command.
       def execute
         @agent = IDCard.new(:lifeline)
         if @agent.pid_file.exist?
@@ -22,24 +32,47 @@ module ScoutAgent
         end
       end
+      #######
       private
+      #######
+      #
+      # Sends +signal_name+ (which is expected to be some kind of stop request
+      # to the <tt>@agent</tt>.  This method will then wait +wait_count+ periods
+      # of +wait_delay+ seconds checking between waits to see if the agent has
+      # complied.  It returns when the agent has exited or the total wait period
+      # has expired.
+      #
       def signal_and_wait(signal_name, wait_count = 10, wait_delay = 0.5)
-        begin
-          @agent.signal(signal_name)
-        rescue Errno::EPERM  # we don't have permission
-          abort_with_no_permission
-        end
+        # signal the main process
+        @agent.signal(signal_name)
+        # wait for it to stop
         wait_count.times do
           sleep wait_delay
           break unless @agent.pid_file.exist?
         end
+        # signal other stray processes
+        Plan.pid_dir.each_entry do |process|
+          if process.to_s =~ /(\w+)\.pid\z/
+            IDCard.new($1).signal(signal_name)
+          end
+        end
+      rescue Errno::EPERM  # we don't have permission
+        abort_with_no_permission
       end
+      #
+      # Abort with an error message to the user that says that the agent isn't
+      # currently running.
+      #
       def abort_with_not_running_notice
         puts "#{ScoutAgent.proper_agent_name} is not currently running."
       end
+      #
+      # Abort with an error message to the user that says we don't have enough
+      # permission to stop the agent due to how it was started.
+      #
       def abort_with_no_permission
         abort <<-END_PERMISSION.trim
         Unable to signal the daemon.  Please rerun this command with
@@ -50,6 +83,10 @@ module ScoutAgent
         END_PERMISSION
       end
+      #
+      # Abort with an error message to the user that says we don't have enough
+      # permission to stop the agent due to how it was started.
+      #
       def abort_with_failed_to_stop
         abort <<-END_FAILED.trim
         Unable to stop the daemon.  You may need to use the PID files