server_metrics 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .idea
19
+ .ruby-version
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in server_metrics.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Andre Lewis
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,34 @@
1
+ # SystemMetrics
2
+
3
+ Collects key metrics on CPU, disks, memory, network interfaces, and processes.
4
+
5
+ ## Use
6
+
7
+ $ gem install server_metrics
8
+
9
+ require 'server_metrics'
10
+ cpu=ServerMetrics::Cpu.new
11
+ cpu.run
12
+ pp cpu.data.inspect
13
+
14
+ # same with:
15
+
16
+ ServerMetrics::Disk.new
17
+ ServerMetrics::Memory.new
18
+ ServerMetrics::Network.new
19
+
20
+ # also see:
21
+
22
+ ServerMetrics::SystemInfo.to_hash
23
+
24
+ ## Creating a new collector
25
+
26
+ Inherit from either Collector (if you're generating just one set of data), or MultiCollector (if you will be generating N
27
+ sets of data -- for example if you're monitoring disks or network interfaces).
28
+
29
+ ## TODOs
30
+
31
+ * better mac compatibility
32
+ * more test coverage
33
+ * rethink module hierarchy
34
+
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.libs << "test"
6
+ t.test_files = FileList['test/test*.rb']
7
+ t.verbose = true
8
+ end
9
+
10
+ task :default => :test
@@ -0,0 +1,20 @@
1
+ require "rubygems"
2
+ require File.dirname(__FILE__)+ "/lib/server_metrics"
3
+ require "pry"
4
+ require "awesome_print"
5
+
6
+
7
+ p = ServerMetrics::Processes.new(1)
8
+ puts "stating ..."
9
+ p.run
10
+ sleep 1
11
+ ap p.run
12
+
13
+ #p.get_overall_cpu
14
+ #ServerMetrics::ProcessList.add_cpu_time(ServerMetrics::ProcessList.group)
15
+ #puts "sleeping ..."
16
+ #sleep 1
17
+ #puts "#### overall"
18
+ #ap p.get_overall_cpu
19
+ #puts "#### individual processes"
20
+ #ap ServerMetrics::ProcessList.add_cpu_time(ServerMetrics::ProcessList.group)
data/example_usage.rb ADDED
@@ -0,0 +1,43 @@
1
+ require "rubygems"
2
+ require File.dirname(__FILE__)+ "/lib/server_metrics"
3
+ require "pry"
4
+
5
+ class Harness
6
+ attr_accessor :num_runs, :latest_run
7
+
8
+ def initialize
9
+ @num_runs=0
10
+
11
+ @collectors={:disks => ServerMetrics::Disk.new(), :cpu => ServerMetrics::Cpu.new(), :memory => ServerMetrics::Memory.new(), :network => ServerMetrics::Network.new(), :processes=>ServerMetrics::Processes.new(ServerMetrics::SystemInfo.num_processors)}
12
+
13
+ @system_info = ServerMetrics::SystemInfo.to_h
14
+ end
15
+
16
+ def run
17
+ collector_res={}
18
+ @collectors.each_pair do |name, collector|
19
+ collector_res[name] = collector.run
20
+ end
21
+
22
+ @latest_run = collector_res.merge(:system_info => @system_info)
23
+
24
+ @num_runs +=1
25
+ end
26
+ end
27
+
28
+ harness = Harness.new
29
+
30
+ harness.run
31
+ sleep 1
32
+ harness.run
33
+ pp harness.latest_run
34
+
35
+ #puts "starting"
36
+ #while(true) do
37
+ # harness.run
38
+ # puts "running at #{Time.now}"
39
+ # File.open("server_metrics.json","w") do |f|
40
+ # f.puts harness.latest_run
41
+ # end
42
+ # sleep 15
43
+ #end
@@ -0,0 +1,147 @@
1
+
2
+ # The base class for SystemMetrics collectors.
3
+ #
4
+ # Some collects inherit directly from Collector, and some inherit from MultiCollector.
5
+ # The difference: if you're collecting for an arbitrary number of instances (say, disks), use MultiCollector.
6
+ # Otherwise, use Collector.
7
+ #
8
+ # Relative to Scout's plugins, Collectors have a few differences:
9
+ #
10
+ # 1. simplified: no options parsing. simpler interface to reporting and memory (these methods only take a hash)
11
+ # 2. intended to persist in memory: a collector maintains its own memory. Reuse the same instance as many times as needed.
12
+ # If you need to persist to disk, use the to_hash and from_hash methods.
13
+ #
14
+ module ServerMetrics
15
+ class Collector
16
+ attr_reader :collector_id
17
+ attr_accessor :data, :error
18
+
19
+ def initialize(options={})
20
+ @options = options
21
+ @data={}
22
+ @memory={}
23
+ @collector_id = self.class.name+'-'+@options.to_a.sort_by { |a| a.first }.flatten.join('-')
24
+ @error=nil
25
+ end
26
+
27
+ def option(name)
28
+ @options[name] || @options[name.is_a?(String) ? name.to_sym : String(name)]
29
+ end
30
+
31
+ def run
32
+ @data={}
33
+ build_report
34
+ @data
35
+ end
36
+
37
+ def report(hash)
38
+ @data.merge!(hash)
39
+ end
40
+
41
+ # memory(:no_track)
42
+ # memory.delete(:no_track)
43
+ # memory.clear
44
+ #
45
+ def memory(name = nil)
46
+ if name.nil?
47
+ @memory
48
+ else
49
+ @memory[name] || @memory[name.is_a?(String) ? name.to_sym : String(name)]
50
+ end
51
+ end
52
+
53
+
54
+ # remember(name1: value1, name2: value2)
55
+ #
56
+ def remember(hash)
57
+ @memory.merge!(hash)
58
+ end
59
+
60
+ # counter(:rkbps, stats['rsect'] / 2, :per => :second)
61
+ # counter(:rpm, request_counter, :per => :minute)
62
+ # counter(:swap_ins, vmstat['pswpin'], :per => :second, :round => true)
63
+ #
64
+ def counter(name, value, options = {}, &block)
65
+ current_time = Time.now
66
+
67
+ if data = memory("_counter_#{name}")
68
+ last_time, last_value = data[:time], data[:value]
69
+ elapsed_seconds = current_time - last_time
70
+
71
+ # We won't log it if the value has wrapped or enough time hasn't
72
+ # elapsed
73
+ if value >= last_value && elapsed_seconds >= 1
74
+ if block
75
+ result = block.call(last_value, value)
76
+ else
77
+ result = value - last_value
78
+ end
79
+
80
+ case options[:per]
81
+ when :second, 'second'
82
+ result = result / elapsed_seconds.to_f
83
+ when :minute, 'minute'
84
+ result = result / elapsed_seconds.to_f * 60.0
85
+ else
86
+ raise "Unknown option for ':per': #{options[:per].inspect}"
87
+ end
88
+
89
+ if options[:round]
90
+ result = (result * (10 ** options[:round])).round / (10 ** options[:round]).to_f
91
+ end
92
+
93
+ report(name => result)
94
+ end
95
+ end
96
+
97
+ remember("_counter_#{name}" => {:time => current_time, :value => value})
98
+ end
99
+
100
+ # Convert strings containing 'T,G,M,or K' to MB. The result is a float only -- units are NOT returned
101
+ def convert_to_mb(value)
102
+ value = if value =~ /G/i
103
+ value.to_f*1024.0
104
+ elsif value =~ /M/i
105
+ value.to_f
106
+ elsif value =~ /K/i
107
+ (value.to_f/1024.0)
108
+ elsif value =~ /T/i
109
+ (value.to_f*1024.0*1024.0)
110
+ else
111
+ value.to_f
112
+ end
113
+ ("%.1f" % [value]).to_f
114
+ end
115
+
116
+ #
117
+ def normalize_key(key)
118
+ (key.is_a?(String) ? key : key.to_s).downcase.gsub(" ", "_").gsub("%", "percent").to_sym
119
+ end
120
+
121
+ # returns a hash you can serialize and store on disk, or just hold onto and re-instantiate the collector later.
122
+ # Why you'd need to do this: to persist the memory (including counters) of a plugin instance.
123
+ #
124
+ # Collector.from_hash(h) is the flipside of this: Collector.from_hash(plugin.to_hash) gets you essentially the same instance
125
+ #
126
+ def to_hash
127
+ {:options => @options, :memory => @memory, :data => @data, :plugin_id => @plugin_id}
128
+ end
129
+
130
+ # see to_hash. The hash should contain :options and :memory keys
131
+ def self.from_hash(hash)
132
+ c=Collector.new(hash[:options])
133
+ c.instance_variable_set('@memory', hash[:memory])
134
+ c.instance_variable_set('@data', hash[:data])
135
+ c
136
+ end
137
+
138
+ def linux?
139
+ RbConfig::CONFIG['target_os'] == 'linux'
140
+ end
141
+
142
+ def osx?
143
+ RbConfig::CONFIG['target_os'] == 'darwin'
144
+ end
145
+
146
+ end
147
+ end
@@ -0,0 +1,137 @@
1
+ require "time"
2
+
3
+ class ServerMetrics::Cpu < ServerMetrics::Collector
4
+
5
+ # Raised by #CpuStats when an error reading /proc/stats.
6
+ class ProcStatError < Exception
7
+ end
8
+
9
+ def build_report
10
+ begin
11
+ stats = CpuStats.fetch
12
+
13
+ if previous = memory(:cpu_stats)
14
+ previous_stats = CpuStats.from_hash(previous)
15
+
16
+ report stats.diff(previous_stats)
17
+ end
18
+
19
+ remember(:cpu_stats => stats.to_h)
20
+ rescue ProcStatError
21
+ @error = "could not retrieve CPU stats from /proc/stat"
22
+ end
23
+
24
+ ENV['LANG'] = 'C' # forcing english for parsing
25
+ uptime_output = `uptime`
26
+ matches = uptime_output.match(/load averages?: ([\d.]+),? ([\d.]+),? ([\d.]+)\Z/)
27
+
28
+ report("Last minute" => matches[1].to_f,
29
+ "Last five minutes" => matches[2].to_f,
30
+ "Last fifteen minutes" => matches[3].to_f)
31
+ end
32
+
33
+ # Helper class
34
+ class CpuStats
35
+ attr_accessor :user, :system, :idle, :iowait, :interrupts, :procs_running, :procs_blocked, :time, :steal
36
+
37
+ def self.fetch
38
+ output = `cat /proc/stat 2>&1`
39
+
40
+ if $? and !$?.success?
41
+ raise ProcStatError, output
42
+ end
43
+
44
+ data = output.split(/\n/).collect { |line| line.split }
45
+
46
+ cpu_stats = CpuStats.new
47
+
48
+ if cpu = data.detect { |line| line[0] == 'cpu' }
49
+ cpu_stats.user, nice, cpu_stats.system, cpu_stats.idle, cpu_stats.iowait,
50
+ hardirq, softirq, cpu_stats.steal = *cpu[1..-1].collect { |c| c.to_i }
51
+ cpu_stats.user += nice
52
+ cpu_stats.system += hardirq + softirq
53
+ end
54
+
55
+ if interrupts = data.detect { |line| line[0] == 'intr' }
56
+ cpu_stats.interrupts, _ = *interrupts[1..-1].collect { |c| c.to_i }
57
+ end
58
+
59
+ if procs_running = data.detect { |line| line[0] == 'procs_running' }
60
+ cpu_stats.procs_running, _ = *procs_running[1..-1].collect { |c| c.to_i }
61
+ end
62
+
63
+ if procs_blocked = data.detect { |line| line[0] == 'procs_blocked' }
64
+ cpu_stats.procs_blocked, _ = *procs_blocked[1..-1].collect { |c| c.to_i }
65
+ end
66
+
67
+ cpu_stats
68
+ end
69
+
70
+ def self.from_hash(h)
71
+ cpu_stats= CpuStats.new
72
+ hash = {}
73
+ h.each { |k, v| hash[k.to_sym] = v }
74
+
75
+ if time = hash.delete(:time)
76
+ cpu_stats.time = Time.parse(time) rescue time
77
+ end
78
+
79
+ hash.each do |k, v|
80
+ cpu_stats.send("#{k}=", v) if cpu_stats.respond_to?("#{k}=")
81
+ end
82
+ cpu_stats
83
+ end
84
+
85
+ def initialize
86
+ self.time = Time.now
87
+ end
88
+
89
+ def diff(other)
90
+ diff_user = user - other.user
91
+ diff_system = system - other.system
92
+ diff_idle = idle - other.idle
93
+ diff_iowait = iowait - other.iowait
94
+
95
+ div = diff_user + diff_system + diff_idle + diff_iowait
96
+
97
+ if steal && other.steal && steal > 0
98
+ diff_steal = steal - other.steal
99
+ div += diff_steal
100
+ end
101
+
102
+ divo2 = div / 2
103
+
104
+ results = {
105
+ "User" => (100.0 * diff_user + divo2) / div,
106
+ "System" => (100.0 * diff_system + divo2) / div,
107
+ "Idle" => (100.0 * diff_idle + divo2) / div,
108
+ "IO wait" => (100.0 * diff_iowait + divo2) / div,
109
+ "Procs running" => self.procs_running,
110
+ "Procs blocked" => self.procs_blocked
111
+ }
112
+
113
+ if diff_steal && steal > 0
114
+ results["Steal"] = (100.0 * diff_steal + divo2) / div
115
+ end
116
+
117
+ if self.time && other.time
118
+ diff_in_seconds = self.time.to_f - other.time.to_f
119
+
120
+ results["Interrupts"] = (self.interrupts.to_f - other.interrupts.to_f) / diff_in_seconds
121
+ end
122
+
123
+ results
124
+ end
125
+
126
+ def to_h
127
+ {
128
+ :user => user, :system => system, :idle => idle, :iowait => iowait,
129
+ :interrupts => interrupts, :procs_running => procs_running,
130
+ :procs_blocked => procs_blocked, :time => Time.now.to_s,
131
+ :steal => steal
132
+ }
133
+ end
134
+
135
+ end
136
+ end
137
+
@@ -0,0 +1,106 @@
1
+ # Collects Disk metrics on eligible filesystems. Reports a hash of hashes, with the first hash keyed by device name.
2
+ #
3
+ # TODO: Currently, this reports on devices that begins with /dev as listed by `mount`. Revisit this.
4
+ # TODO: relies on /proc/diskstats, so not mac compatible. Figure out mac compatibility
5
+ #
6
+ class ServerMetrics::Disk < ServerMetrics::MultiCollector
7
+
8
+ def build_report
9
+ @df_output = `df -h`.split("\n")
10
+ @devices = `mount`.split("\n").grep(/^\/dev/).map{|l|l.split.first} # any device that starts with /dev
11
+
12
+ @devices.each do |device|
13
+ get_sizes(device) # does its own reporting
14
+ get_stats(device) if linux? # does its own reporting
15
+ end
16
+ end
17
+
18
+ # called from build_report for each device
19
+ def get_sizes(device)
20
+ ENV['LANG'] = 'C' # forcing English for parsing
21
+
22
+ header_line=@df_output.first
23
+ num_columns = header_line.include?("iused") ? 9 : 6 # Mac has extra columns
24
+ headers = header_line.split(/\s+/,num_columns)
25
+ parsed_lines=[] # Each line will look like {"%iused" => "38%","Avail" => "289Gi", "Capacity=> "38%", "Filesystem"=> "/dev/disk0s2","Mounted => "/", "Size" => "465Gi", "Used" => "176Gi", "ifree" => "75812051", "iused" => "46116178"}
26
+
27
+ @df_output[1..@df_output.size-2].each do |line|
28
+ values=line.split(/\s+/,num_columns)
29
+ parsed_lines<<Hash[headers.zip(values)]
30
+ end
31
+
32
+ # select the right line
33
+ hash = parsed_lines.select{|l| l["Filesystem"] == device}.first
34
+ result = {}
35
+ hash.each_pair do |key,value|
36
+ #key=normalize_key(key) # downcase, make a symbol, etc
37
+ value = convert_to_mb(value) if [:avail,:capacity,:size,:used, :usepercent].include?(key)
38
+ result[key]=value
39
+ end
40
+
41
+ report(device, result)
42
+ end
43
+
44
+ # called from build_report for each device
45
+ def get_stats(device)
46
+ stats = iostat(device)
47
+
48
+ if stats
49
+ counter(device, "RPS", stats['rio'], :per => :second)
50
+ counter(device, "WPS", stats['wio'], :per => :second)
51
+ counter(device, "Kb RPS", stats['rsect'] / 2, :per => :second)
52
+ counter(device, "Kb WPS", stats['wsect'] / 2, :per => :second)
53
+ counter(device, "Utilization", stats['use'] / 10.0, :per => :second)
54
+ # Not 100% sure that average queue length is present on all distros.
55
+ if stats['aveq']
56
+ counter(device, "Average queue length", stats['aveq'], :per => :second)
57
+ end
58
+
59
+ if old = memory(device, "stats")
60
+ ios = (stats['rio'] - old['rio']) + (stats['wio'] - old['wio'])
61
+
62
+ if ios > 0
63
+ await = ((stats['ruse'] - old['ruse']) + (stats['wuse'] - old['wuse'])) / ios.to_f
64
+
65
+ report(device, "Await" => await)
66
+ end
67
+ end
68
+
69
+ remember(device, "stats" => stats)
70
+ end
71
+ end
72
+
73
+ private
74
+ COLUMNS = %w(major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq)
75
+
76
+ # Returns the /proc/diskstats line associated with device name +dev+. Logic:
77
+ #
78
+ # * If an exact match of the specified device is found, returns it.
79
+ # * If there isn't an exact match but there are /proc/diskstats lines that are included in +dev+,
80
+ # returns the first matching line. This is needed as the mount output used to find the default device doesn't always
81
+ # match /proc/diskstats output.
82
+ # * If there are no matches but an LVM is used, returns the line matching "dm-0".
83
+ def iostat(dev)
84
+ # if a LVM is used, `mount` output doesn't map to `/diskstats`. In this case, use dm-0 as the default device.
85
+ lvm = nil
86
+ retried = false
87
+ possible_devices = []
88
+ begin
89
+ %x(cat /proc/diskstats).split(/\n/).each do |line|
90
+ entry = Hash[*COLUMNS.zip(line.strip.split(/\s+/).collect { |v| Integer(v) rescue v }).flatten]
91
+ possible_devices << entry if dev.include?(entry['name'])
92
+ lvm = entry if (@default_device_used and 'dm-0'.include?(entry['name']))
93
+ end
94
+ rescue Errno::EPIPE
95
+ if retried
96
+ raise
97
+ else
98
+ retried = true
99
+ retry
100
+ end
101
+ end
102
+ found_device = possible_devices.find { |entry| dev == entry['name'] } || possible_devices.first
103
+ return found_device || lvm
104
+ end
105
+
106
+ end