server_metrics 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .idea
19
+ .ruby-version
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in server_metrics.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Andre Lewis
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,34 @@
1
+ # SystemMetrics
2
+
3
+ Collects key metrics on CPU, disks, memory, network interfaces, and processes.
4
+
5
+ ## Use
6
+
7
+ $ gem install server_metrics
8
+
9
+ require 'server_metrics'
10
+ cpu=ServerMetrics::Cpu.new
11
+ cpu.run
12
+ pp cpu.data.inspect
13
+
14
+ # same with:
15
+
16
+ ServerMetrics::Disk.new
17
+ ServerMetrics::Memory.new
18
+ ServerMetrics::Network.new
19
+
20
+ # also see:
21
+
22
+ ServerMetrics::SystemInfo.to_hash
23
+
24
+ ## Creating a new collector
25
+
26
+ Inherit from either Collector (if you're generating just one set of data), or MultiCollector (if you will be generating N
27
+ sets of data -- for example if you're monitoring disks or network interfaces).
28
+
29
+ ## TODOs
30
+
31
+ * better mac compatibility
32
+ * more test coverage
33
+ * rethink module hierarchy
34
+
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.libs << "test"
6
+ t.test_files = FileList['test/test*.rb']
7
+ t.verbose = true
8
+ end
9
+
10
+ task :default => :test
@@ -0,0 +1,20 @@
1
+ require "rubygems"
2
+ require File.dirname(__FILE__)+ "/lib/server_metrics"
3
+ require "pry"
4
+ require "awesome_print"
5
+
6
+
7
+ p = ServerMetrics::Processes.new(1)
8
+ puts "stating ..."
9
+ p.run
10
+ sleep 1
11
+ ap p.run
12
+
13
+ #p.get_overall_cpu
14
+ #ServerMetrics::ProcessList.add_cpu_time(ServerMetrics::ProcessList.group)
15
+ #puts "sleeping ..."
16
+ #sleep 1
17
+ #puts "#### overall"
18
+ #ap p.get_overall_cpu
19
+ #puts "#### individual processes"
20
+ #ap ServerMetrics::ProcessList.add_cpu_time(ServerMetrics::ProcessList.group)
data/example_usage.rb ADDED
@@ -0,0 +1,43 @@
1
+ require "rubygems"
2
+ require File.dirname(__FILE__)+ "/lib/server_metrics"
3
+ require "pry"
4
+
5
+ class Harness
6
+ attr_accessor :num_runs, :latest_run
7
+
8
+ def initialize
9
+ @num_runs=0
10
+
11
+ @collectors={:disks => ServerMetrics::Disk.new(), :cpu => ServerMetrics::Cpu.new(), :memory => ServerMetrics::Memory.new(), :network => ServerMetrics::Network.new(), :processes=>ServerMetrics::Processes.new(ServerMetrics::SystemInfo.num_processors)}
12
+
13
+ @system_info = ServerMetrics::SystemInfo.to_h
14
+ end
15
+
16
+ def run
17
+ collector_res={}
18
+ @collectors.each_pair do |name, collector|
19
+ collector_res[name] = collector.run
20
+ end
21
+
22
+ @latest_run = collector_res.merge(:system_info => @system_info)
23
+
24
+ @num_runs +=1
25
+ end
26
+ end
27
+
28
+ harness = Harness.new
29
+
30
+ harness.run
31
+ sleep 1
32
+ harness.run
33
+ pp harness.latest_run
34
+
35
+ #puts "starting"
36
+ #while(true) do
37
+ # harness.run
38
+ # puts "running at #{Time.now}"
39
+ # File.open("server_metrics.json","w") do |f|
40
+ # f.puts harness.latest_run
41
+ # end
42
+ # sleep 15
43
+ #end
@@ -0,0 +1,147 @@
1
+
2
+ # The base class for SystemMetrics collectors.
3
+ #
4
+ # Some collects inherit directly from Collector, and some inherit from MultiCollector.
5
+ # The difference: if you're collecting for an arbitrary number of instances (say, disks), use MultiCollector.
6
+ # Otherwise, use Collector.
7
+ #
8
+ # Relative to Scout's plugins, Collectors have a few differences:
9
+ #
10
+ # 1. simplified: no options parsing. simpler interface to reporting and memory (these methods only take a hash)
11
+ # 2. intended to persist in memory: a collector maintains its own memory. Reuse the same instance as many times as needed.
12
+ # If you need to persist to disk, use the to_hash and from_hash methods.
13
+ #
14
+ module ServerMetrics
15
+ class Collector
16
+ attr_reader :collector_id
17
+ attr_accessor :data, :error
18
+
19
+ def initialize(options={})
20
+ @options = options
21
+ @data={}
22
+ @memory={}
23
+ @collector_id = self.class.name+'-'+@options.to_a.sort_by { |a| a.first }.flatten.join('-')
24
+ @error=nil
25
+ end
26
+
27
+ def option(name)
28
+ @options[name] || @options[name.is_a?(String) ? name.to_sym : String(name)]
29
+ end
30
+
31
+ def run
32
+ @data={}
33
+ build_report
34
+ @data
35
+ end
36
+
37
+ def report(hash)
38
+ @data.merge!(hash)
39
+ end
40
+
41
+ # memory(:no_track)
42
+ # memory.delete(:no_track)
43
+ # memory.clear
44
+ #
45
+ def memory(name = nil)
46
+ if name.nil?
47
+ @memory
48
+ else
49
+ @memory[name] || @memory[name.is_a?(String) ? name.to_sym : String(name)]
50
+ end
51
+ end
52
+
53
+
54
+ # remember(name1: value1, name2: value2)
55
+ #
56
+ def remember(hash)
57
+ @memory.merge!(hash)
58
+ end
59
+
60
+ # counter(:rkbps, stats['rsect'] / 2, :per => :second)
61
+ # counter(:rpm, request_counter, :per => :minute)
62
+ # counter(:swap_ins, vmstat['pswpin'], :per => :second, :round => true)
63
+ #
64
+ def counter(name, value, options = {}, &block)
65
+ current_time = Time.now
66
+
67
+ if data = memory("_counter_#{name}")
68
+ last_time, last_value = data[:time], data[:value]
69
+ elapsed_seconds = current_time - last_time
70
+
71
+ # We won't log it if the value has wrapped or enough time hasn't
72
+ # elapsed
73
+ if value >= last_value && elapsed_seconds >= 1
74
+ if block
75
+ result = block.call(last_value, value)
76
+ else
77
+ result = value - last_value
78
+ end
79
+
80
+ case options[:per]
81
+ when :second, 'second'
82
+ result = result / elapsed_seconds.to_f
83
+ when :minute, 'minute'
84
+ result = result / elapsed_seconds.to_f * 60.0
85
+ else
86
+ raise "Unknown option for ':per': #{options[:per].inspect}"
87
+ end
88
+
89
+ if options[:round]
90
+ result = (result * (10 ** options[:round])).round / (10 ** options[:round]).to_f
91
+ end
92
+
93
+ report(name => result)
94
+ end
95
+ end
96
+
97
+ remember("_counter_#{name}" => {:time => current_time, :value => value})
98
+ end
99
+
100
+ # Convert strings containing 'T,G,M,or K' to MB. The result is a float only -- units are NOT returned
101
+ def convert_to_mb(value)
102
+ value = if value =~ /G/i
103
+ value.to_f*1024.0
104
+ elsif value =~ /M/i
105
+ value.to_f
106
+ elsif value =~ /K/i
107
+ (value.to_f/1024.0)
108
+ elsif value =~ /T/i
109
+ (value.to_f*1024.0*1024.0)
110
+ else
111
+ value.to_f
112
+ end
113
+ ("%.1f" % [value]).to_f
114
+ end
115
+
116
+ #
117
+ def normalize_key(key)
118
+ (key.is_a?(String) ? key : key.to_s).downcase.gsub(" ", "_").gsub("%", "percent").to_sym
119
+ end
120
+
121
+ # returns a hash you can serialize and store on disk, or just hold onto and re-instantiate the collector later.
122
+ # Why you'd need to do this: to persist the memory (including counters) of a plugin instance.
123
+ #
124
+ # Collector.from_hash(h) is the flipside of this: Collector.from_hash(plugin.to_hash) gets you essentially the same instance
125
+ #
126
+ def to_hash
127
+ {:options => @options, :memory => @memory, :data => @data, :plugin_id => @plugin_id}
128
+ end
129
+
130
+ # see to_hash. The hash should contain :options and :memory keys
131
+ def self.from_hash(hash)
132
+ c=Collector.new(hash[:options])
133
+ c.instance_variable_set('@memory', hash[:memory])
134
+ c.instance_variable_set('@data', hash[:data])
135
+ c
136
+ end
137
+
138
+ def linux?
139
+ RbConfig::CONFIG['target_os'] == 'linux'
140
+ end
141
+
142
+ def osx?
143
+ RbConfig::CONFIG['target_os'] == 'darwin'
144
+ end
145
+
146
+ end
147
+ end
@@ -0,0 +1,137 @@
1
+ require "time"
2
+
3
+ class ServerMetrics::Cpu < ServerMetrics::Collector
4
+
5
+ # Raised by #CpuStats when an error reading /proc/stats.
6
+ class ProcStatError < Exception
7
+ end
8
+
9
+ def build_report
10
+ begin
11
+ stats = CpuStats.fetch
12
+
13
+ if previous = memory(:cpu_stats)
14
+ previous_stats = CpuStats.from_hash(previous)
15
+
16
+ report stats.diff(previous_stats)
17
+ end
18
+
19
+ remember(:cpu_stats => stats.to_h)
20
+ rescue ProcStatError
21
+ @error = "could not retrieve CPU stats from /proc/stat"
22
+ end
23
+
24
+ ENV['LANG'] = 'C' # forcing english for parsing
25
+ uptime_output = `uptime`
26
+ matches = uptime_output.match(/load averages?: ([\d.]+),? ([\d.]+),? ([\d.]+)\Z/)
27
+
28
+ report("Last minute" => matches[1].to_f,
29
+ "Last five minutes" => matches[2].to_f,
30
+ "Last fifteen minutes" => matches[3].to_f)
31
+ end
32
+
33
+ # Helper class
34
+ class CpuStats
35
+ attr_accessor :user, :system, :idle, :iowait, :interrupts, :procs_running, :procs_blocked, :time, :steal
36
+
37
+ def self.fetch
38
+ output = `cat /proc/stat 2>&1`
39
+
40
+ if $? and !$?.success?
41
+ raise ProcStatError, output
42
+ end
43
+
44
+ data = output.split(/\n/).collect { |line| line.split }
45
+
46
+ cpu_stats = CpuStats.new
47
+
48
+ if cpu = data.detect { |line| line[0] == 'cpu' }
49
+ cpu_stats.user, nice, cpu_stats.system, cpu_stats.idle, cpu_stats.iowait,
50
+ hardirq, softirq, cpu_stats.steal = *cpu[1..-1].collect { |c| c.to_i }
51
+ cpu_stats.user += nice
52
+ cpu_stats.system += hardirq + softirq
53
+ end
54
+
55
+ if interrupts = data.detect { |line| line[0] == 'intr' }
56
+ cpu_stats.interrupts, _ = *interrupts[1..-1].collect { |c| c.to_i }
57
+ end
58
+
59
+ if procs_running = data.detect { |line| line[0] == 'procs_running' }
60
+ cpu_stats.procs_running, _ = *procs_running[1..-1].collect { |c| c.to_i }
61
+ end
62
+
63
+ if procs_blocked = data.detect { |line| line[0] == 'procs_blocked' }
64
+ cpu_stats.procs_blocked, _ = *procs_blocked[1..-1].collect { |c| c.to_i }
65
+ end
66
+
67
+ cpu_stats
68
+ end
69
+
70
+ def self.from_hash(h)
71
+ cpu_stats= CpuStats.new
72
+ hash = {}
73
+ h.each { |k, v| hash[k.to_sym] = v }
74
+
75
+ if time = hash.delete(:time)
76
+ cpu_stats.time = Time.parse(time) rescue time
77
+ end
78
+
79
+ hash.each do |k, v|
80
+ cpu_stats.send("#{k}=", v) if cpu_stats.respond_to?("#{k}=")
81
+ end
82
+ cpu_stats
83
+ end
84
+
85
+ def initialize
86
+ self.time = Time.now
87
+ end
88
+
89
+ def diff(other)
90
+ diff_user = user - other.user
91
+ diff_system = system - other.system
92
+ diff_idle = idle - other.idle
93
+ diff_iowait = iowait - other.iowait
94
+
95
+ div = diff_user + diff_system + diff_idle + diff_iowait
96
+
97
+ if steal && other.steal && steal > 0
98
+ diff_steal = steal - other.steal
99
+ div += diff_steal
100
+ end
101
+
102
+ divo2 = div / 2
103
+
104
+ results = {
105
+ "User" => (100.0 * diff_user + divo2) / div,
106
+ "System" => (100.0 * diff_system + divo2) / div,
107
+ "Idle" => (100.0 * diff_idle + divo2) / div,
108
+ "IO wait" => (100.0 * diff_iowait + divo2) / div,
109
+ "Procs running" => self.procs_running,
110
+ "Procs blocked" => self.procs_blocked
111
+ }
112
+
113
+ if diff_steal && steal > 0
114
+ results["Steal"] = (100.0 * diff_steal + divo2) / div
115
+ end
116
+
117
+ if self.time && other.time
118
+ diff_in_seconds = self.time.to_f - other.time.to_f
119
+
120
+ results["Interrupts"] = (self.interrupts.to_f - other.interrupts.to_f) / diff_in_seconds
121
+ end
122
+
123
+ results
124
+ end
125
+
126
+ def to_h
127
+ {
128
+ :user => user, :system => system, :idle => idle, :iowait => iowait,
129
+ :interrupts => interrupts, :procs_running => procs_running,
130
+ :procs_blocked => procs_blocked, :time => Time.now.to_s,
131
+ :steal => steal
132
+ }
133
+ end
134
+
135
+ end
136
+ end
137
+
@@ -0,0 +1,106 @@
1
+ # Collects Disk metrics on eligible filesystems. Reports a hash of hashes, with the first hash keyed by device name.
2
+ #
3
+ # TODO: Currently, this reports on devices that begins with /dev as listed by `mount`. Revisit this.
4
+ # TODO: relies on /proc/diskstats, so not mac compatible. Figure out mac compatibility
5
+ #
6
+ class ServerMetrics::Disk < ServerMetrics::MultiCollector
7
+
8
+ def build_report
9
+ @df_output = `df -h`.split("\n")
10
+ @devices = `mount`.split("\n").grep(/^\/dev/).map{|l|l.split.first} # any device that starts with /dev
11
+
12
+ @devices.each do |device|
13
+ get_sizes(device) # does its own reporting
14
+ get_stats(device) if linux? # does its own reporting
15
+ end
16
+ end
17
+
18
+ # called from build_report for each device
19
+ def get_sizes(device)
20
+ ENV['LANG'] = 'C' # forcing English for parsing
21
+
22
+ header_line=@df_output.first
23
+ num_columns = header_line.include?("iused") ? 9 : 6 # Mac has extra columns
24
+ headers = header_line.split(/\s+/,num_columns)
25
+ parsed_lines=[] # Each line will look like {"%iused" => "38%","Avail" => "289Gi", "Capacity=> "38%", "Filesystem"=> "/dev/disk0s2","Mounted => "/", "Size" => "465Gi", "Used" => "176Gi", "ifree" => "75812051", "iused" => "46116178"}
26
+
27
+ @df_output[1..@df_output.size-2].each do |line|
28
+ values=line.split(/\s+/,num_columns)
29
+ parsed_lines<<Hash[headers.zip(values)]
30
+ end
31
+
32
+ # select the right line
33
+ hash = parsed_lines.select{|l| l["Filesystem"] == device}.first
34
+ result = {}
35
+ hash.each_pair do |key,value|
36
+ #key=normalize_key(key) # downcase, make a symbol, etc
37
+ value = convert_to_mb(value) if [:avail,:capacity,:size,:used, :usepercent].include?(key)
38
+ result[key]=value
39
+ end
40
+
41
+ report(device, result)
42
+ end
43
+
44
+ # called from build_report for each device
45
+ def get_stats(device)
46
+ stats = iostat(device)
47
+
48
+ if stats
49
+ counter(device, "RPS", stats['rio'], :per => :second)
50
+ counter(device, "WPS", stats['wio'], :per => :second)
51
+ counter(device, "Kb RPS", stats['rsect'] / 2, :per => :second)
52
+ counter(device, "Kb WPS", stats['wsect'] / 2, :per => :second)
53
+ counter(device, "Utilization", stats['use'] / 10.0, :per => :second)
54
+ # Not 100% sure that average queue length is present on all distros.
55
+ if stats['aveq']
56
+ counter(device, "Average queue length", stats['aveq'], :per => :second)
57
+ end
58
+
59
+ if old = memory(device, "stats")
60
+ ios = (stats['rio'] - old['rio']) + (stats['wio'] - old['wio'])
61
+
62
+ if ios > 0
63
+ await = ((stats['ruse'] - old['ruse']) + (stats['wuse'] - old['wuse'])) / ios.to_f
64
+
65
+ report(device, "Await" => await)
66
+ end
67
+ end
68
+
69
+ remember(device, "stats" => stats)
70
+ end
71
+ end
72
+
73
+ private
74
+ COLUMNS = %w(major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq)
75
+
76
+ # Returns the /proc/diskstats line associated with device name +dev+. Logic:
77
+ #
78
+ # * If an exact match of the specified device is found, returns it.
79
+ # * If there isn't an exact match but there are /proc/diskstats lines that are included in +dev+,
80
+ # returns the first matching line. This is needed as the mount output used to find the default device doesn't always
81
+ # match /proc/diskstats output.
82
+ # * If there are no matches but an LVM is used, returns the line matching "dm-0".
83
+ def iostat(dev)
84
+ # if a LVM is used, `mount` output doesn't map to `/diskstats`. In this case, use dm-0 as the default device.
85
+ lvm = nil
86
+ retried = false
87
+ possible_devices = []
88
+ begin
89
+ %x(cat /proc/diskstats).split(/\n/).each do |line|
90
+ entry = Hash[*COLUMNS.zip(line.strip.split(/\s+/).collect { |v| Integer(v) rescue v }).flatten]
91
+ possible_devices << entry if dev.include?(entry['name'])
92
+ lvm = entry if (@default_device_used and 'dm-0'.include?(entry['name']))
93
+ end
94
+ rescue Errno::EPIPE
95
+ if retried
96
+ raise
97
+ else
98
+ retried = true
99
+ retry
100
+ end
101
+ end
102
+ found_device = possible_devices.find { |entry| dev == entry['name'] } || possible_devices.first
103
+ return found_device || lvm
104
+ end
105
+
106
+ end