riemann-monitors 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +2 -0
- data/LICENSE +21 -0
- data/README.adoc +75 -0
- data/Rakefile +125 -0
- data/bin/riemann-apache-status +98 -0
- data/bin/riemann-bench +71 -0
- data/bin/riemann-cloudant +58 -0
- data/bin/riemann-consul +106 -0
- data/bin/riemann-dir-files-count +55 -0
- data/bin/riemann-dir-space +55 -0
- data/bin/riemann-diskstats +95 -0
- data/bin/riemann-fd +66 -0
- data/bin/riemann-freeswitch +119 -0
- data/bin/riemann-haproxy +58 -0
- data/bin/riemann-health +289 -0
- data/bin/riemann-httpstatus +73 -0
- data/bin/riemann-kvminstance +22 -0
- data/bin/riemann-memcached +38 -0
- data/bin/riemann-net +81 -0
- data/bin/riemann-nginx-status +84 -0
- data/bin/riemann-ntp +35 -0
- data/bin/riemann-proc +131 -0
- data/bin/riemann-varnish +54 -0
- data/bin/riemann-zookeeper +41 -0
- data/data/statfields +49 -0
- data/lib/riemann-monitors/main.rb +111 -0
- data/lib/riemann-monitors/version.rb +6 -0
- data/lib/riemann-monitors.rb +8 -0
- data/project.yaml +12 -0
- data/riemann-monitors.gemspec +73 -0
- metadata +210 -0
| @@ -0,0 +1,22 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require_relative "../lib/riemann-monitors"
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            class Riemann::Monitors::KVM
         | 
| 6 | 
            +
              include Riemann::Monitors
         | 
| 7 | 
            +
             | 
| 8 | 
            +
              def tick
         | 
| 9 | 
            +
             | 
| 10 | 
            +
              #determine how many instances I have according to libvirt
         | 
| 11 | 
            +
              kvm_instances = %x[virsh list |grep i-|wc -l]
         | 
| 12 | 
            +
             | 
| 13 | 
            +
              #submit them to riemann
         | 
| 14 | 
            +
              report(
         | 
| 15 | 
            +
                 :service => "KVM Running VMs",
         | 
| 16 | 
            +
                 :metric => kvm_instances.to_i,
         | 
| 17 | 
            +
                 :state => "info"
         | 
| 18 | 
            +
                   )
         | 
| 19 | 
            +
              end
         | 
| 20 | 
            +
            end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            Riemann::Monitors::KVM.run
         | 
| @@ -0,0 +1,38 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            # Gathers memcached STATS and submits them to Riemann.
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            require_relative "../lib/riemann-monitors"
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            class Riemann::Monitors::Memcached
         | 
| 8 | 
            +
              include Riemann::Monitors
         | 
| 9 | 
            +
              require 'socket'
         | 
| 10 | 
            +
             | 
| 11 | 
            +
              opt :memcached_host, "Memcached hostname", :default => 'localhost'
         | 
| 12 | 
            +
              opt :memcached_port, "Memcached port", :default => 11211
         | 
| 13 | 
            +
             | 
| 14 | 
            +
              def tick
         | 
| 15 | 
            +
                sock = TCPSocket.new(opts[:memcached_host], opts[:memcached_port])
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                sock.print("stats\r\n")
         | 
| 18 | 
            +
                sock.flush
         | 
| 19 | 
            +
                stats = sock.gets
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                data = {}
         | 
| 22 | 
            +
                while true
         | 
| 23 | 
            +
                  stats = sock.gets
         | 
| 24 | 
            +
                  break if stats.strip == 'END'
         | 
| 25 | 
            +
                  m = stats.match /STAT (\w+) (\S+)/
         | 
| 26 | 
            +
                  report(
         | 
| 27 | 
            +
                         :host => opts[:memcached_host].dup,
         | 
| 28 | 
            +
                         :service => "memcached/#{m[1]}",
         | 
| 29 | 
            +
                         :metric => m[2].to_f,
         | 
| 30 | 
            +
                         :state => 'ok',
         | 
| 31 | 
            +
                         :tags => ['memcached']
         | 
| 32 | 
            +
                         )
         | 
| 33 | 
            +
                end
         | 
| 34 | 
            +
                sock.close
         | 
| 35 | 
            +
              end
         | 
| 36 | 
            +
            end
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            Riemann::Monitors::Memcached.run
         | 
    
        data/bin/riemann-net
    ADDED
    
    | @@ -0,0 +1,81 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            # Gathers network interface statistics and submits them to Riemann.
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            require "pry"
         | 
| 6 | 
            +
            require_relative "../lib/riemann-monitors"
         | 
| 7 | 
            +
            require 'set'
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            class Riemann::Monitors::Net
         | 
| 10 | 
            +
              include Riemann::Monitors
         | 
| 11 | 
            +
             | 
| 12 | 
            +
              opt :interfaces, "Interfaces to monitor", :type => :strings, :default => []
         | 
| 13 | 
            +
              opt :ignore_interfaces, "Interfaces to ignore", :type => :strings, :default =>['lo']
         | 
| 14 | 
            +
             | 
| 15 | 
            +
              def initialize
         | 
| 16 | 
            +
                @old_state = nil
         | 
| 17 | 
            +
                @fields = ["rx bytes", "rx packets", "rx errs", "rx drop", "rx fifo", "rx frame", "rx compressed", "rx multicast",
         | 
| 18 | 
            +
                           "tx bytes", "tx packets", "tx errs", "tx drops", "tx fifo", "tx colls", "tx carrier", "tx compressed"]
         | 
| 19 | 
            +
                @use_interfaces = Set.new(opts[:interfaces])
         | 
| 20 | 
            +
                @ignore_interfaces = Set.new(opts[:ignore_interfaces])
         | 
| 21 | 
            +
              end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
              def state
         | 
| 24 | 
            +
                net = File.open("/proc/net/dev", "r")
         | 
| 25 | 
            +
                net.readline ; net.readline  # Skip first two lines.
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                metrics = {}
         | 
| 28 | 
            +
                net.readlines.each do |line|
         | 
| 29 | 
            +
                  interface = line[/^[^:]+/]
         | 
| 30 | 
            +
                  if @use_interfaces.include?(interface) && !@ignore_interfaces.include?(interface)
         | 
| 31 | 
            +
                    metrics[interface] = @fields.zip(line.split(/\s+/).drop(1).map(&:to_i)).to_h
         | 
| 32 | 
            +
                  end
         | 
| 33 | 
            +
                end
         | 
| 34 | 
            +
                metrics
         | 
| 35 | 
            +
              end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
              # TODO
         | 
| 38 | 
            +
              def tick
         | 
| 39 | 
            +
                state = self.state
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                if @old_state
         | 
| 42 | 
            +
                  # Report services from `@old_state` that don't exist in `state` as expired
         | 
| 43 | 
            +
                  @old_state.reject { |k| state.has_key?(k) }.each do |service, metric|
         | 
| 44 | 
            +
                    report(:service => service.dup, :state => 'expired')
         | 
| 45 | 
            +
                  end
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                  # Report delta for services that have values in both `@old_state` and `state`
         | 
| 48 | 
            +
                  state.each do |service, metric|
         | 
| 49 | 
            +
                    next unless @old_state.has_key?(service)
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                    delta = metric - @old_state[service]
         | 
| 52 | 
            +
                    svc_state = case service
         | 
| 53 | 
            +
                      when /drop$/
         | 
| 54 | 
            +
                        if metric > 0
         | 
| 55 | 
            +
                          'warning'
         | 
| 56 | 
            +
                        else
         | 
| 57 | 
            +
                          'ok'
         | 
| 58 | 
            +
                        end
         | 
| 59 | 
            +
                      when /errs$/
         | 
| 60 | 
            +
                        if metric > 0
         | 
| 61 | 
            +
                          'warning'
         | 
| 62 | 
            +
                        else
         | 
| 63 | 
            +
                          'ok'
         | 
| 64 | 
            +
                        end
         | 
| 65 | 
            +
                      else
         | 
| 66 | 
            +
                        'ok'
         | 
| 67 | 
            +
                      end
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                    report(
         | 
| 70 | 
            +
                      :service => service.dup,
         | 
| 71 | 
            +
                      :metric => (delta.to_f / opts[:interval]),
         | 
| 72 | 
            +
                      :state => svc_state
         | 
| 73 | 
            +
                    )
         | 
| 74 | 
            +
                  end
         | 
| 75 | 
            +
                end
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                @old_state = state
         | 
| 78 | 
            +
              end
         | 
| 79 | 
            +
            end
         | 
| 80 | 
            +
              
         | 
| 81 | 
            +
            Riemann::Monitors::Net.run
         | 
| @@ -0,0 +1,84 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            # Gathers nginx status stub statistics and submits them to Riemann.
         | 
| 4 | 
            +
            # See http://wiki.nginx.org/HttpStubStatusModule for configuring Nginx appropriately
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            require_relative "../lib/riemann-monitors"
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            class Riemann::Monitors::NginxStatus
         | 
| 9 | 
            +
              include Riemann::Monitors
         | 
| 10 | 
            +
              require 'net/http'
         | 
| 11 | 
            +
              require 'uri'
         | 
| 12 | 
            +
             | 
| 13 | 
            +
              opt :uri, "Nginx Stub Status URI", :default => 'http://localhost:8080/nginx_status'
         | 
| 14 | 
            +
              opt :checks, "Which metrics to report.", :type => :strings, :default => %w{active accepted handled requests reading writing waiting}
         | 
| 15 | 
            +
              opt :active_warning, "Active connections warning threshold", :default => 0
         | 
| 16 | 
            +
              opt :active_critical, "Active connections critical threshold", :default => 0
         | 
| 17 | 
            +
              opt :reading_warning, "Reading connections warning threshold", :default => 0
         | 
| 18 | 
            +
              opt :reading_critical, "Reading connections critical threshold", :default => 0
         | 
| 19 | 
            +
              opt :writing_warning, "Writing connections warning threshold", :default => 0
         | 
| 20 | 
            +
              opt :writing_critical, "Writing connections critical threshold", :default => 0
         | 
| 21 | 
            +
              opt :waiting_warning, "Waiting connections warning threshold", :default => 0
         | 
| 22 | 
            +
              opt :waiting_critical, "Waiting connections critical threshold", :default => 0
         | 
| 23 | 
            +
             | 
| 24 | 
            +
              def initialize
         | 
| 25 | 
            +
                @uri = URI.parse(opts[:uri])
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                # sample response:
         | 
| 28 | 
            +
                #
         | 
| 29 | 
            +
                # Active connections: 1 
         | 
| 30 | 
            +
                # server accepts handled requests
         | 
| 31 | 
            +
                #  39 39 39 
         | 
| 32 | 
            +
                # Reading: 0 Writing: 1 Waiting: 0 
         | 
| 33 | 
            +
                @keys = %w{active accepted handled requests reading writing waiting}
         | 
| 34 | 
            +
                @re = /Active connections: (\d+) \n.+\n (\d+) (\d+) (\d+) \nReading: (\d+) Writing: (\d+) Waiting: (\d+)/m
         | 
| 35 | 
            +
              end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
              def state(key, value)
         | 
| 38 | 
            +
                if opts.has_key? "#{key}_critical".to_sym
         | 
| 39 | 
            +
                  critical_threshold = opts["#{key}_critical".to_sym]
         | 
| 40 | 
            +
                  return 'critical' if critical_threshold > 0 and value >= critical_threshold
         | 
| 41 | 
            +
                end
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                if opts.has_key? "#{key}_warning".to_sym
         | 
| 44 | 
            +
                  warning_threshold = opts["#{key}_warning".to_sym]
         | 
| 45 | 
            +
                  return 'warning' if warning_threshold > 0 and value >= warning_threshold
         | 
| 46 | 
            +
                end
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                return 'ok'
         | 
| 49 | 
            +
              end
         | 
| 50 | 
            +
             | 
| 51 | 
            +
              def tick
         | 
| 52 | 
            +
                response = nil
         | 
| 53 | 
            +
                begin
         | 
| 54 | 
            +
                  response = Net::HTTP.get(@uri)
         | 
| 55 | 
            +
                rescue => e
         | 
| 56 | 
            +
                  report(
         | 
| 57 | 
            +
                    :service => "nginx health",
         | 
| 58 | 
            +
                    :state => "critical",
         | 
| 59 | 
            +
                    :description => "Connection error: #{e.class} - #{e.message}"
         | 
| 60 | 
            +
                  )
         | 
| 61 | 
            +
                end
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                return if response.nil?
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                report(
         | 
| 66 | 
            +
                  :service => "nginx health",
         | 
| 67 | 
            +
                  :state => "ok",
         | 
| 68 | 
            +
                  :description => "Nginx status connection ok"
         | 
| 69 | 
            +
                )
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                values = @re.match(response).to_a[1,7].map { |v| v.to_i }
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                @keys.zip(values).each do |key, value|
         | 
| 74 | 
            +
                  report({
         | 
| 75 | 
            +
                    :service => "nginx #{key}",
         | 
| 76 | 
            +
                    :metric  => value,
         | 
| 77 | 
            +
                    :state   => state(key, value),
         | 
| 78 | 
            +
                    :tags    => ['nginx']
         | 
| 79 | 
            +
                  })
         | 
| 80 | 
            +
                end
         | 
| 81 | 
            +
              end
         | 
| 82 | 
            +
            end
         | 
| 83 | 
            +
             | 
| 84 | 
            +
            Riemann::Monitors::NginxStatus.run
         | 
    
        data/bin/riemann-ntp
    ADDED
    
    | @@ -0,0 +1,35 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            # Reports NTP stats to Riemann.
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            require_relative "../lib/riemann-monitors"
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            class Riemann::Monitors::Ntp
         | 
| 8 | 
            +
              include Riemann::Monitors
         | 
| 9 | 
            +
             | 
| 10 | 
            +
              def initialize
         | 
| 11 | 
            +
                @hostname = Socket.gethostname
         | 
| 12 | 
            +
              end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
              def tick
         | 
| 15 | 
            +
                stats = `ntpq -p -n`
         | 
| 16 | 
            +
                stats.each_line do |stat|
         | 
| 17 | 
            +
                  m = stat.split()
         | 
| 18 | 
            +
                  next if m.grep(/^===/).any? || m.grep(/^remote/).any?
         | 
| 19 | 
            +
                  @ntp_host = m[0].gsub("*","").gsub("-","").gsub("+","")
         | 
| 20 | 
            +
                  send("delay",m[7])
         | 
| 21 | 
            +
                  send("offset",m[8])
         | 
| 22 | 
            +
                  send("jitter",m[9])
         | 
| 23 | 
            +
                end
         | 
| 24 | 
            +
              end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
              def send(type,metric)
         | 
| 27 | 
            +
                  report(host: @hostname,
         | 
| 28 | 
            +
                        service: "ntp/#{type}",
         | 
| 29 | 
            +
                        metric: metric.to_f,
         | 
| 30 | 
            +
                        description: @ntp_host.to_s,
         | 
| 31 | 
            +
                        tags: ["ntp"])
         | 
| 32 | 
            +
              end
         | 
| 33 | 
            +
            end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            Riemann::Monitors::Ntp.run
         | 
    
        data/bin/riemann-proc
    ADDED
    
    | @@ -0,0 +1,131 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            # Reports running process count to riemann.
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            require 'pry'
         | 
| 6 | 
            +
            require 'set'
         | 
| 7 | 
            +
            require_relative "../lib/riemann-monitors"
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            # $clk_tck = Etc.sysconf(2).to_f
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            ProcessInfo = Struct.new(:pid, :comm, :oldfields, :newfields) do
         | 
| 12 | 
            +
              def riemann_statemap(state)
         | 
| 13 | 
            +
                statemap = {"R" => "ok",
         | 
| 14 | 
            +
                            "S" => "ok",
         | 
| 15 | 
            +
                            "I" => "warning",
         | 
| 16 | 
            +
                            "T" => "critical",
         | 
| 17 | 
            +
                            "U" => "critical",
         | 
| 18 | 
            +
                            "Z" => "critical"}
         | 
| 19 | 
            +
                statemap.default = "unknown"
         | 
| 20 | 
            +
                statemap[state]
         | 
| 21 | 
            +
              end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
              def fields
         | 
| 24 | 
            +
                @sorted_fields ||= (oldfields + newfields).sort
         | 
| 25 | 
            +
              end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
              # def time_of(field)
         | 
| 28 | 
            +
              #   fields.assoc(field).last / $clk_tck
         | 
| 29 | 
            +
              # end
         | 
| 30 | 
            +
             | 
| 31 | 
            +
              def summarize()
         | 
| 32 | 
            +
                fields.each do |name, metric|
         | 
| 33 | 
            +
                  params = {}
         | 
| 34 | 
            +
                  params[:service] = "process;#{pid};#{name}"
         | 
| 35 | 
            +
                  params[:description] = name
         | 
| 36 | 
            +
                  params[:pname] = comm
         | 
| 37 | 
            +
                  if name == "State"
         | 
| 38 | 
            +
                    params[:state] = riemann_statemap(metric)
         | 
| 39 | 
            +
                    params[:value] = metric
         | 
| 40 | 
            +
                  # elsif name.end_with?("time")
         | 
| 41 | 
            +
                  #   params[:metric] = time_of(name)
         | 
| 42 | 
            +
                  elsif metric.is_a?(Numeric)
         | 
| 43 | 
            +
                    params[:metric] = metric
         | 
| 44 | 
            +
                  else
         | 
| 45 | 
            +
                    params[:value] = metric
         | 
| 46 | 
            +
                  end
         | 
| 47 | 
            +
                  yield(params)
         | 
| 48 | 
            +
                end
         | 
| 49 | 
            +
              end
         | 
| 50 | 
            +
            end
         | 
| 51 | 
            +
             | 
| 52 | 
            +
            class Riemann::Monitors::Proc
         | 
| 53 | 
            +
              include Riemann::Monitors
         | 
| 54 | 
            +
             | 
| 55 | 
            +
              opt(:proc_regex, "regular expression that matches the process to be monitored", type: :string, default: ".*")
         | 
| 56 | 
            +
              opt(:newstats, "fields from /proc/#/status to collect (always includes name and status character)", multi: :strings,
         | 
| 57 | 
            +
                  default: ["VmRSS", "VmSize", "VmPeak", "VmHWM", "VmLib", "VmSwap", "Threads"])
         | 
| 58 | 
            +
              opt(:oldstats, "fields from /proc/#/status to collect (use names from proc(5) manpage)", multi: :strings, default: ["utime", "stime"])
         | 
| 59 | 
            +
              opt(:metricfield, "field to assign to metric", default: "VmRSS")
         | 
| 60 | 
            +
              opt(:proc_min_critical, "running process count minimum", :default => 0)
         | 
| 61 | 
            +
              opt(:proc_max_critical, "running process count maximum", :default => 65536)
         | 
| 62 | 
            +
             | 
| 63 | 
            +
              def initialize
         | 
| 64 | 
            +
                @process_regex = Regexp.new(opts[:proc_regex])
         | 
| 65 | 
            +
                @newfields = Set.new(opts[:newstats]).add("Name").add("State")
         | 
| 66 | 
            +
                @oldfields = Set.new(opts[:oldstats])
         | 
| 67 | 
            +
                @allstatfields = File.readlines(File.join(__dir__, "..", "data", "statfields")).map(&:chomp)
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                @limits = { :critical => Range.new(opts[:proc_min_critical], opts[:proc_max_critical]) }
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                abort "FATAL: specify a process regular expression, see --help for usage" unless opts[:proc_regex]
         | 
| 72 | 
            +
              end
         | 
| 73 | 
            +
             | 
| 74 | 
            +
              def tick
         | 
| 75 | 
            +
                @sample_start = Time.now
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                processes = Dir.glob("/proc/[0-9]*/comm").map {|cfile|
         | 
| 78 | 
            +
                  pid = cfile[/\d+/].to_i
         | 
| 79 | 
            +
                  comm = File.read(cfile).chomp
         | 
| 80 | 
            +
                  if @process_regex.match(comm)
         | 
| 81 | 
            +
                    p = ProcessInfo.new()
         | 
| 82 | 
            +
                    p.pid = pid
         | 
| 83 | 
            +
                    p.comm = comm
         | 
| 84 | 
            +
                    p
         | 
| 85 | 
            +
                  end
         | 
| 86 | 
            +
                }.compact
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                processes.each {|p|
         | 
| 89 | 
            +
                  p.newfields = File.readlines("/proc/#{p.pid}/status") \
         | 
| 90 | 
            +
                                    .map {|line| parts = line.split(/\s+/)
         | 
| 91 | 
            +
                                            name, rest = parts[0].chop, parts[1..-1]
         | 
| 92 | 
            +
                                            if @newfields.include?(name)
         | 
| 93 | 
            +
                                              value = rest&.first
         | 
| 94 | 
            +
                                              if rest.length > 2
         | 
| 95 | 
            +
                                                value = rest.join("\t")
         | 
| 96 | 
            +
                                              elsif value[/\A\d+\z/]
         | 
| 97 | 
            +
                                                value = value.to_i
         | 
| 98 | 
            +
                                              end
         | 
| 99 | 
            +
                                              [name, value]
         | 
| 100 | 
            +
                                            end
         | 
| 101 | 
            +
                                    }.compact
         | 
| 102 | 
            +
                }
         | 
| 103 | 
            +
             | 
| 104 | 
            +
                processes.each {|p|
         | 
| 105 | 
            +
                  statstr = File.read("/proc/#{p.pid}/stat")
         | 
| 106 | 
            +
                  stat_tail = statstr.reverse[/[-0-9 ]+/].reverse.split(" ").map {|n| n.to_i }
         | 
| 107 | 
            +
                  p.oldfields = @allstatfields.zip(stat_tail).select {|name, value| @oldfields.include?(name) }
         | 
| 108 | 
            +
                }
         | 
| 109 | 
            +
                @sample_end = Time.now
         | 
| 110 | 
            +
             | 
| 111 | 
            +
                report(service: "process.samplerate", state: "ok", metric: (@sample_end.to_f - @sample_start.to_f), time: @sample_end)
         | 
| 112 | 
            +
             | 
| 113 | 
            +
                processes.group_by {|p| p.comm }.map do |pname, ps|
         | 
| 114 | 
            +
                  running = ps.count
         | 
| 115 | 
            +
                  if @limits[:critical].cover?(running)
         | 
| 116 | 
            +
                    report(service: "process.instances", state: "ok", description: pname, metric: running)
         | 
| 117 | 
            +
                  else
         | 
| 118 | 
            +
                    report(service: "process.instances", state: "critical", description: pname, metric: running)
         | 
| 119 | 
            +
                  end
         | 
| 120 | 
            +
                end
         | 
| 121 | 
            +
             | 
| 122 | 
            +
                processes.each do |p|
         | 
| 123 | 
            +
                  p.summarize do |point|
         | 
| 124 | 
            +
                    point[:time] = @sample_end
         | 
| 125 | 
            +
                    report(point)
         | 
| 126 | 
            +
                  end
         | 
| 127 | 
            +
                end
         | 
| 128 | 
            +
              end
         | 
| 129 | 
            +
            end
         | 
| 130 | 
            +
             | 
| 131 | 
            +
            Riemann::Monitors::Proc.run
         | 
    
        data/bin/riemann-varnish
    ADDED
    
    | @@ -0,0 +1,54 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            # Reports varnish stats to Riemann.
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            require 'open3'
         | 
| 6 | 
            +
            require_relative "../lib/riemann-monitors"
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            class Riemann::Monitors::Varnish
         | 
| 9 | 
            +
              include Riemann::Monitors
         | 
| 10 | 
            +
             | 
| 11 | 
            +
              opt :varnish_host, "Varnish hostname", :default => `hostname`.chomp
         | 
| 12 | 
            +
             | 
| 13 | 
            +
              def initialize
         | 
| 14 | 
            +
                cmd = 'varnishstat -V'
         | 
| 15 | 
            +
                Open3.popen3(cmd) do |stdin, stdout, stderr, wait_thr|
         | 
| 16 | 
            +
                  @ver = /varnishstat \(varnish-(\d+)/.match(stderr.read)[1].to_i
         | 
| 17 | 
            +
                end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                if @ver >= 4
         | 
| 20 | 
            +
                  @vstats = [ "MAIN.sess_conn",
         | 
| 21 | 
            +
                            "MAIN.sess_drop ",
         | 
| 22 | 
            +
                            "MAIN.client_req",
         | 
| 23 | 
            +
                            "MAIN.cache_hit",
         | 
| 24 | 
            +
                            "MAIN.cache_miss" ]
         | 
| 25 | 
            +
                else
         | 
| 26 | 
            +
                  @vstats = [ "client_conn",
         | 
| 27 | 
            +
                            "client_drop",
         | 
| 28 | 
            +
                            "client_req",
         | 
| 29 | 
            +
                            "cache_hit",
         | 
| 30 | 
            +
                            "cache_miss" ]
         | 
| 31 | 
            +
                end
         | 
| 32 | 
            +
              end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
              def tick
         | 
| 35 | 
            +
                if @ver >= 4
         | 
| 36 | 
            +
                  stats = `varnishstat -1 -f #{@vstats.join(" -f ")}`
         | 
| 37 | 
            +
                else
         | 
| 38 | 
            +
                  stats = `varnishstat -1 -f #{@vstats.join(",")}`
         | 
| 39 | 
            +
                end
         | 
| 40 | 
            +
                stats.each_line do |stat|
         | 
| 41 | 
            +
                  m = stat.split()
         | 
| 42 | 
            +
                  report(
         | 
| 43 | 
            +
                         :host => opts[:varnish_host].dup,
         | 
| 44 | 
            +
                         :service => "varnish #{m[0]}",
         | 
| 45 | 
            +
                         :metric => m[1].to_f,
         | 
| 46 | 
            +
                         :state => "ok",
         | 
| 47 | 
            +
                         :description => "#{m[3..-1].join(' ')}",
         | 
| 48 | 
            +
                         :tags => ["varnish"]
         | 
| 49 | 
            +
                        )
         | 
| 50 | 
            +
                end
         | 
| 51 | 
            +
              end
         | 
| 52 | 
            +
            end
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            Riemann::Monitors::Varnish.run
         | 
| @@ -0,0 +1,41 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            # Gathers zookeeper STATS and submits them to Riemann.
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            require_relative "../lib/riemann-monitors"
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            class Riemann::Monitors::Zookeeper
         | 
| 8 | 
            +
              include Riemann::Monitors
         | 
| 9 | 
            +
              require 'socket'
         | 
| 10 | 
            +
             | 
| 11 | 
            +
              opt :zookeeper_host, "Zookeeper hostname", :default => 'localhost'
         | 
| 12 | 
            +
              opt :zookeeper_port, "Zookeeper port", :default => 2181
         | 
| 13 | 
            +
             | 
| 14 | 
            +
              def tick
         | 
| 15 | 
            +
                sock = TCPSocket.new(opts[:zookeeper_host], opts[:zookeeper_port])
         | 
| 16 | 
            +
                sock.sync = true
         | 
| 17 | 
            +
                sock.print("mntr")
         | 
| 18 | 
            +
                sock.flush
         | 
| 19 | 
            +
                
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                data = {}
         | 
| 22 | 
            +
                while true
         | 
| 23 | 
            +
                  stats = sock.gets
         | 
| 24 | 
            +
                  
         | 
| 25 | 
            +
                  break if stats.nil?
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                  m = stats.match /^(\w+)\t+(.*)/
         | 
| 28 | 
            +
                
         | 
| 29 | 
            +
                  report(
         | 
| 30 | 
            +
                         :host => opts[ :zookeeper_host].dup,
         | 
| 31 | 
            +
                         :service => "zookeeper #{m[1]}",
         | 
| 32 | 
            +
                         :metric => m[2].to_f,
         | 
| 33 | 
            +
                         :state => 'ok',
         | 
| 34 | 
            +
                         :tags => ['zookeeper']
         | 
| 35 | 
            +
                         )
         | 
| 36 | 
            +
                end      
         | 
| 37 | 
            +
                sock.close
         | 
| 38 | 
            +
              end
         | 
| 39 | 
            +
            end
         | 
| 40 | 
            +
             | 
| 41 | 
            +
            Riemann::Monitors::Zookeeper.run
         | 
    
        data/data/statfields
    ADDED
    
    | @@ -0,0 +1,49 @@ | |
| 1 | 
            +
            ppid
         | 
| 2 | 
            +
            pgrp
         | 
| 3 | 
            +
            session
         | 
| 4 | 
            +
            tty_nr
         | 
| 5 | 
            +
            tpgid
         | 
| 6 | 
            +
            flags
         | 
| 7 | 
            +
            minflt
         | 
| 8 | 
            +
            cminflt
         | 
| 9 | 
            +
            majflt
         | 
| 10 | 
            +
            cmajflt
         | 
| 11 | 
            +
            utime
         | 
| 12 | 
            +
            stime
         | 
| 13 | 
            +
            cutime
         | 
| 14 | 
            +
            cstime
         | 
| 15 | 
            +
            priority
         | 
| 16 | 
            +
            nice
         | 
| 17 | 
            +
            num_threads
         | 
| 18 | 
            +
            itrealvalue
         | 
| 19 | 
            +
            starttime
         | 
| 20 | 
            +
            vsize
         | 
| 21 | 
            +
            rss
         | 
| 22 | 
            +
            rsslim
         | 
| 23 | 
            +
            startcode
         | 
| 24 | 
            +
            endcode
         | 
| 25 | 
            +
            startstack
         | 
| 26 | 
            +
            kstkesp
         | 
| 27 | 
            +
            kstkeip
         | 
| 28 | 
            +
            signal
         | 
| 29 | 
            +
            blocked
         | 
| 30 | 
            +
            sigignore
         | 
| 31 | 
            +
            sigcatch
         | 
| 32 | 
            +
            wchan
         | 
| 33 | 
            +
            nswap
         | 
| 34 | 
            +
            cnswap
         | 
| 35 | 
            +
            exit_signal
         | 
| 36 | 
            +
            processor
         | 
| 37 | 
            +
            rt_priority
         | 
| 38 | 
            +
            policy
         | 
| 39 | 
            +
            delayacct_blkio_ticks
         | 
| 40 | 
            +
            guest_time
         | 
| 41 | 
            +
            cguest_time
         | 
| 42 | 
            +
            start_data
         | 
| 43 | 
            +
            end_data
         | 
| 44 | 
            +
            start_brk
         | 
| 45 | 
            +
            arg_start
         | 
| 46 | 
            +
            arg_end
         | 
| 47 | 
            +
            env_start
         | 
| 48 | 
            +
            env_end
         | 
| 49 | 
            +
            exit_code
         | 
| @@ -0,0 +1,111 @@ | |
| 1 | 
            +
            class Hash
         | 
| 2 | 
            +
              def has_keys?(*rest)
         | 
| 3 | 
            +
                rest.all? {|k| self.has_key?(k) }
         | 
| 4 | 
            +
              end
         | 
| 5 | 
            +
            end
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            module Riemann
         | 
| 8 | 
            +
              module Monitors
         | 
| 9 | 
            +
                def self.included(base)
         | 
| 10 | 
            +
                  base.instance_eval do
         | 
| 11 | 
            +
                    def run
         | 
| 12 | 
            +
                      new.run
         | 
| 13 | 
            +
                    end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                    def opt(*a)
         | 
| 16 | 
            +
                      a.unshift :opt
         | 
| 17 | 
            +
                      @opts ||= []
         | 
| 18 | 
            +
                      @opts << a
         | 
| 19 | 
            +
                    end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                    def options
         | 
| 22 | 
            +
                      p = Trollop::Parser.new
         | 
| 23 | 
            +
                      @opts.each do |o|
         | 
| 24 | 
            +
                        p.send *o
         | 
| 25 | 
            +
                      end
         | 
| 26 | 
            +
                      Trollop::with_standard_exception_handling(p) do
         | 
| 27 | 
            +
                        p.parse ARGV
         | 
| 28 | 
            +
                      end
         | 
| 29 | 
            +
                    end
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                    opt :host, "Riemann host", :default => '127.0.0.1'
         | 
| 32 | 
            +
                    opt :port, "Riemann port", :default => 5555
         | 
| 33 | 
            +
                    opt :event_host, "Event hostname", :type => String
         | 
| 34 | 
            +
                    opt :interval, "Seconds between updates", :default => 5
         | 
| 35 | 
            +
                    opt :tag, "Tag to add to events", :type => String, :multi => true
         | 
| 36 | 
            +
                    opt :ttl, "TTL for events", :type => Integer
         | 
| 37 | 
            +
                    opt :attribute, "Attribute to add to the event", :type => String, :multi => true
         | 
| 38 | 
            +
                    opt :timeout, "Timeout (in seconds) when waiting for acknowledgements", :default => 30
         | 
| 39 | 
            +
                    opt :tcp, "Use TCP transport instead of UDP (improves reliability, slight overhead.", :default => true
         | 
| 40 | 
            +
                    opt :ssl, "Use SSL.", default: false
         | 
| 41 | 
            +
                    opt :ssl_ca_file, "SSL certificate authority cert", :default => File.join(Dir.home, ".config", "riemann-tools", "ca.crt")
         | 
| 42 | 
            +
                    opt :ssl_cert_file, "SSL client certificate public key", :default => File.join(Dir.home, ".config", "riemann-tools", "#{Socket.gethostname}.crt")
         | 
| 43 | 
            +
                    opt :ssl_key_file, "SSL client certificate private key", :default => File.join(Dir.home, ".config", "riemann-tools", "#{Socket.gethostname}.key")
         | 
| 44 | 
            +
                  end
         | 
| 45 | 
            +
                end
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                # Returns parsed options (cached) from command line.
         | 
| 48 | 
            +
                def options
         | 
| 49 | 
            +
                  @options ||= self.class.options
         | 
| 50 | 
            +
                end
         | 
| 51 | 
            +
                alias :opts :options
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                def attributes
         | 
| 54 | 
            +
                  @attributes ||= Hash[options[:attribute].map do |attr|
         | 
| 55 | 
            +
                    k,v = attr.split(/=/)
         | 
| 56 | 
            +
                    if k and v
         | 
| 57 | 
            +
                      [k.to_sym,v]
         | 
| 58 | 
            +
                    end
         | 
| 59 | 
            +
                  end]
         | 
| 60 | 
            +
                end
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                def report(event_hash)
         | 
| 63 | 
            +
                  event_hash[:tags] = (event_hash[:tags] || []) + (options[:tag] || [])
         | 
| 64 | 
            +
                  event_hash[:ttl] ||= (options[:ttl] || (options[:interval] * 2))
         | 
| 65 | 
            +
                  event_hash[:host] ||= options[:event_host]
         | 
| 66 | 
            +
                  event_hash.merge!(attributes)
         | 
| 67 | 
            +
                  riemann.add_event(event_hash)
         | 
| 68 | 
            +
                end
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                def new_riemann_client
         | 
| 71 | 
            +
                  riemann_options = {
         | 
| 72 | 
            +
                    :server  => "#{options[:host]}:#{options[:port]}",
         | 
| 73 | 
            +
                    :connect_timeout => options[:timeout]
         | 
| 74 | 
            +
                  }
         | 
| 75 | 
            +
                  if options.has_keys?(:ssl_ca_file, :ssl_cert_file, :ssl_key_file) && options[:ssl]
         | 
| 76 | 
            +
                    # These are given to OpenSSL::SSL::SSLContext
         | 
| 77 | 
            +
                    riemann_options[:ssl] = {
         | 
| 78 | 
            +
                      ca_file: File.expand_path(options[:ssl_ca_file]),
         | 
| 79 | 
            +
                      cert:    OpenSSL::X509::Certificate.new(File.read(File.expand_path(options[:ssl_cert_file]))),
         | 
| 80 | 
            +
                      key:     OpenSSL::PKey::RSA.new(File.read(File.expand_path(options[:ssl_key_file]))),
         | 
| 81 | 
            +
                      verify_mode: OpenSSL::SSL::VERIFY_PEER,
         | 
| 82 | 
            +
                      ssl_version: :TLSv1_2
         | 
| 83 | 
            +
                    }
         | 
| 84 | 
            +
                  end
         | 
| 85 | 
            +
                  Riemann::Experiment::Client.new(riemann_options)
         | 
| 86 | 
            +
                end
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                def riemann
         | 
| 89 | 
            +
                  @riemann ||= new_riemann_client
         | 
| 90 | 
            +
                end
         | 
| 91 | 
            +
                alias :r :riemann
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                def run
         | 
| 94 | 
            +
                  t0 = Time.now
         | 
| 95 | 
            +
                  loop do
         | 
| 96 | 
            +
                    begin
         | 
| 97 | 
            +
                      tick
         | 
| 98 | 
            +
                      riemann.send_message(ok: true)
         | 
| 99 | 
            +
                    rescue => e
         | 
| 100 | 
            +
                      $stderr.puts "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
         | 
| 101 | 
            +
                    end
         | 
| 102 | 
            +
             | 
| 103 | 
            +
                    # Sleep.
         | 
| 104 | 
            +
                    sleep(options[:interval] - ((Time.now - t0) % options[:interval]))
         | 
| 105 | 
            +
                  end
         | 
| 106 | 
            +
                end
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                def tick
         | 
| 109 | 
            +
                end
         | 
| 110 | 
            +
              end
         | 
| 111 | 
            +
            end
         | 
    
        data/project.yaml
    ADDED