instrumental_tools 0.6.0 → 1.0.0.pre.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -1
- data/CUSTOM_METRICS.md +56 -0
- data/README.md +13 -6
- data/bin/instrument_server +111 -385
- data/examples/README.md +7 -0
- data/examples/docker/README.md +20 -0
- data/examples/docker/docker.rb +74 -0
- data/examples/mongo/README.md +25 -0
- data/examples/mongo/mongo_3.rb +85 -0
- data/examples/mysql/README.md +13 -0
- data/examples/mysql/mysql_status.rb +88 -0
- data/instrumental_tools.gemspec +3 -1
- data/lib/instrumental_tools/metric_script_executor.rb +96 -0
- data/lib/instrumental_tools/server_controller.rb +70 -0
- data/lib/instrumental_tools/system_inspector/linux.rb +146 -0
- data/lib/instrumental_tools/system_inspector/osx.rb +114 -0
- data/lib/instrumental_tools/system_inspector.rb +67 -0
- data/lib/instrumental_tools/version.rb +1 -1
- metadata +18 -19
data/examples/README.md
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# Docker Metrics
|
2
|
+
|
3
|
+
This script will generate performance metrics for each running Docker container based on the name of that container. It will also output information regarding the number of running docker containers for that host.
|
4
|
+
|
5
|
+
Each Docker container will have the following metrics output:
|
6
|
+
|
7
|
+
* `CONTAINER_NAME.system_total` - Total system CPU time spent for container
|
8
|
+
* `CONTAINER_NAME.user_total` - Total user CPU time spent for container
|
9
|
+
* `CONTAINER_NAME.system` - Current percent usage of system time spent for container
|
10
|
+
* `CONTAINER_NAME.user` - Current percent usage of user time spent for container
|
11
|
+
* `CONTAINER_NAME.cache_mb` - Current cache memory allocated for container
|
12
|
+
* `CONTAINER_NAME.rss_mb` - Current resident memory allocated for container
|
13
|
+
* `CONTAINER_NAME.mapped_file_mb` - Current mapped memory allocated for container
|
14
|
+
* `CONTAINER_NAME.swap_mb` - Current swap memory allocated for container
|
15
|
+
|
16
|
+
The following metric will be output only once for the host:
|
17
|
+
|
18
|
+
* `running` - The total number of docker containers running on the host
|
19
|
+
|
20
|
+
This script will only work if the `docker ps` process is exeutable by the same user that is running `instrument_server`. You should ensure that the user that exeutes the `instrument_server` process belongs to the `docker` group on your system.
|
@@ -0,0 +1,74 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
stdout_r, stdout_w = IO.pipe
|
4
|
+
pid = Process.spawn("docker", "ps", :err => STDERR, :out => stdout_w)
|
5
|
+
_, exit_status = Process.wait2(pid)
|
6
|
+
|
7
|
+
stdout_w.close
|
8
|
+
|
9
|
+
output = stdout_r.read.chomp
|
10
|
+
|
11
|
+
stdout_r.close
|
12
|
+
|
13
|
+
if !exit_status.success?
|
14
|
+
STDERR.puts output
|
15
|
+
exit 1
|
16
|
+
end
|
17
|
+
|
18
|
+
header, *content = output.lines.map(&:chomp)
|
19
|
+
|
20
|
+
header = header.split(/\s{2,}/)
|
21
|
+
content = content.map { |line| line.split(/\s{2,}/) }
|
22
|
+
docker_containers = content.map { |data| Hash[header.zip(data)] }
|
23
|
+
cpu_info = "/sys/fs/cgroup/cpuacct/"
|
24
|
+
mem_info = "/sys/fs/cgroup/memory/"
|
25
|
+
|
26
|
+
previous_run_time = ARGV[0].to_i
|
27
|
+
current_time = Time.now.to_i
|
28
|
+
run_interval = (current_time - previous_run_time).to_f
|
29
|
+
previously_ran = previous_run_time > 0
|
30
|
+
previous_values = {}
|
31
|
+
|
32
|
+
if previously_ran
|
33
|
+
previous_output = STDIN.read.chomp.lines
|
34
|
+
.map { |line| line.chomp.split }
|
35
|
+
.map { |(name, value, _)| [name, value.to_f] }
|
36
|
+
previous_values = Hash[previous_output]
|
37
|
+
end
|
38
|
+
|
39
|
+
all_stats = docker_containers.map do |container|
|
40
|
+
stats = {}
|
41
|
+
container_name = Array(container["NAMES"].to_s.split(",")).first || container["CONTAINER ID"][0..7]
|
42
|
+
Dir[File.join(cpu_info, "**", container["CONTAINER ID"] + "*", "cpuacct.stat")].each do |file|
|
43
|
+
|
44
|
+
cpu_stats = Hash[File.read(file).lines.map { |line| line.chomp.split }]
|
45
|
+
|
46
|
+
%w{system user}.each do |stat|
|
47
|
+
output_stat = [container_name, stat + "_total"].join(".")
|
48
|
+
stats[output_stat] = cpu_stats[stat]
|
49
|
+
if previously_ran
|
50
|
+
time_over_interval = (cpu_stats[stat].to_f - previous_values[output_stat]) / run_interval
|
51
|
+
stats[[container_name, stat].join(".")] = time_over_interval
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
Dir[File.join(mem_info, "**", container["CONTAINER ID"] + "*", "memory.stat")].each do |file|
|
56
|
+
mem_stats = Hash[File.read(file).lines.map { |line| line.chomp.split }]
|
57
|
+
|
58
|
+
%w{cache rss mapped_file swap}.each do |stat|
|
59
|
+
stats[[container_name, stat + "_mb"].join(".")] = mem_stats[stat].to_i / 1024.0 / 1024.0
|
60
|
+
end
|
61
|
+
end
|
62
|
+
stats
|
63
|
+
end
|
64
|
+
|
65
|
+
puts "running %s" % docker_containers.size
|
66
|
+
all_stats.each do |row|
|
67
|
+
row.each do |metric, value|
|
68
|
+
puts [metric, value].join(" ")
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
if !previously_ran
|
73
|
+
exit 1
|
74
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# Mongo Metrics
|
2
|
+
|
3
|
+
The [`mongo_3.rb`](mongo_3.rb) script generates metrics from the `mongotop` and `mongostat` commands for a Mongo 3.0 database. The following metrics will be output for every collection in your system:
|
4
|
+
|
5
|
+
* `mongotop.COLLECTION.total_ms` - Total ms spent in collection
|
6
|
+
* `mongotop.COLLECTION.write_ms` - Write ms spent in collection
|
7
|
+
* `mongotop.COLLECTION.read_ms` - Read ms spent in collection
|
8
|
+
|
9
|
+
Additionally, the following database level metrics will be output:
|
10
|
+
|
11
|
+
* `mongostat.HOST.conn` - Current number of connections
|
12
|
+
* `mongostat.HOST.delete`- Delete commands issued
|
13
|
+
* `mongostat.HOST.faults` - Page faults occurred
|
14
|
+
* `mongostat.HOST.flushes` - Flushes performed
|
15
|
+
* `mongostat.HOST.getmore` - Get More commands issued
|
16
|
+
* `mongostat.HOST.idx_miss_pct` - Percentage of queries missing an index
|
17
|
+
* `mongostat.HOST.insert` - Insert commands issued
|
18
|
+
* `mongostat.HOST.mapped_mb` - Database files mapped into memory
|
19
|
+
* `mongostat.HOST.netIn_mb` - Amount of network I/O received
|
20
|
+
* `mongostat.HOST.netOut_mb` - Amount of network I/O sent
|
21
|
+
* `mongostat.HOST.query` - Number of queries received
|
22
|
+
* `mongostat.HOST.res_mb` - Resident memory of db process
|
23
|
+
* `mongostat.HOST.update` - Number of update commands issued
|
24
|
+
|
25
|
+
These metrics can only be gathered using the 3.0 version of the Mongo command line tools.
|
@@ -0,0 +1,85 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
# Requires Mongo 3.0 command line tools
|
6
|
+
# ( uses --json for output, not available prior to 3.0 )
|
7
|
+
|
8
|
+
MONGO_HOST = ENV["MONGO_HOST"] || "127.0.0.1"
|
9
|
+
MONGO_PORT = ENV["MONGO_PORT"] || 27017
|
10
|
+
MONGO_USER = ENV["MONGO_USER"]
|
11
|
+
MONGO_PWD = ENV["MONGO_PASSWORD"]
|
12
|
+
MONGO_AUTH_MECHANISM = ENV["MONGO_AUTH_MECHANISM"]
|
13
|
+
MONGO_AUTH_DB = ENV["MONGO_AUTH_DB"]
|
14
|
+
|
15
|
+
CMDS = %w{mongotop mongostat}
|
16
|
+
|
17
|
+
outputs = CMDS.map do |cmd|
|
18
|
+
cmd_with_arguments = [cmd, "--json", "-n", "1", "--host", MONGO_HOST, "--port", MONGO_PORT]
|
19
|
+
if MONGO_USER
|
20
|
+
cmd_with_arguments += ["--user", MONGO_USER]
|
21
|
+
end
|
22
|
+
if MONGO_PWD
|
23
|
+
cmd_with_arguments += ["--password", MONGO_PWD]
|
24
|
+
end
|
25
|
+
if MONGO_AUTH_DB
|
26
|
+
cmd_with_arguments += ["--authenticationDatabase", MONGO_AUTH_DB]
|
27
|
+
end
|
28
|
+
if MONGO_AUTH_MECHANISM
|
29
|
+
cmd_with_arguments += ["--authenticationMechanism", MONGO_AUTH_MECHANISM]
|
30
|
+
end
|
31
|
+
stdout_r, stdout_w = IO.pipe
|
32
|
+
pid = Process.spawn(*cmd_with_arguments.map(&:to_s), :err => STDERR, :out => stdout_w)
|
33
|
+
_, exit_status = Process.wait2(pid)
|
34
|
+
stdout_w.close
|
35
|
+
output = stdout_r.read.chomp
|
36
|
+
stdout_r.close
|
37
|
+
if !exit_status.success?
|
38
|
+
STDERR.puts output
|
39
|
+
exit 1
|
40
|
+
end
|
41
|
+
[cmd, output]
|
42
|
+
end
|
43
|
+
|
44
|
+
cmd_to_output = Hash[outputs]
|
45
|
+
|
46
|
+
if (output = cmd_to_output["mongotop"])
|
47
|
+
stats = YAML.load(output)
|
48
|
+
(stats["totals"] || {}).each do |collection, totals|
|
49
|
+
stat = "mongotop.%s" % collection.gsub(/[^a-z0-9\-\_]/i, "_")
|
50
|
+
%w{total read write}.each do |metric|
|
51
|
+
puts "%s.%s_ms %s" % [stat, metric, totals[metric]["time"]]
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
if (output = cmd_to_output["mongostat"])
|
57
|
+
stats = YAML.load(output)
|
58
|
+
stats.each do |host, metrics|
|
59
|
+
stat = "mongostat.%s" % host.gsub(/[^a-z0-9\-\_]/i, "_")
|
60
|
+
["conn", "delete", "faults", "flushes", "getmore", ["idx miss %", "idx_miss_pct"], "insert", "mapped", "netIn", "netOut", "query", "res", "update"].each do |metric|
|
61
|
+
name, value = case metric
|
62
|
+
when Array
|
63
|
+
key, output_key = metric
|
64
|
+
[output_key, metrics[key]]
|
65
|
+
else
|
66
|
+
[metric, metrics[metric]]
|
67
|
+
end
|
68
|
+
if value =~ /\A([\d\.]+)(b|k|M|G)\Z/
|
69
|
+
value = case $2
|
70
|
+
when "b"
|
71
|
+
$1.to_f / 1024.0 / 1024.0
|
72
|
+
when "k"
|
73
|
+
$1.to_f / 1024.0
|
74
|
+
when "M"
|
75
|
+
$1.to_f
|
76
|
+
when "G"
|
77
|
+
$1.to_f * 1024.0
|
78
|
+
end
|
79
|
+
name += "_mb"
|
80
|
+
end
|
81
|
+
value = value.to_s.gsub(/[^\d\.]/, "")
|
82
|
+
puts "%s.%s %s" % [stat, name, value]
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# MySQL Metrics
|
2
|
+
|
3
|
+
The MySQL metrics script collects the following metrics:
|
4
|
+
|
5
|
+
* Queries - `Queries`
|
6
|
+
* Bytes Sent - `Bytes_sent`
|
7
|
+
* Bytes Received - `Bytes_received`
|
8
|
+
* Connections - `Connections`
|
9
|
+
* Slow queries - `Slow_queries`
|
10
|
+
|
11
|
+
Additionally, it estimates the number received per second ( `Queries_per_second`, etc. ).
|
12
|
+
|
13
|
+
You may either edit the values `MYSQL_HOST`, `MYSQL_PORT`, `MYSQL_USER`, `MYSQL_DEFAULTS_FILE` / `MYSQL_PASSWORD` in the [`mysql_status.rb`](mysql_status.rb) to reflect your server's information or provide them as environment variables to the `instrument_server` process. It is advisable that you use a MySQL CNF file to specify password information if your server uses password authentication. See [the MySQL page regarding password security](http://dev.mysql.com/doc/refman/5.0/en/password-security-user.html) for more information.
|
@@ -0,0 +1,88 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Ensure your instrument_server process runs with these environment variables set,
|
4
|
+
# or replace with your own.
|
5
|
+
|
6
|
+
MYSQL_HOST = ENV["MYSQL_HOST"] || "localhost"
|
7
|
+
MYSQL_PORT = ENV["MYSQL_PORT"] || 3306
|
8
|
+
MYSQL_USER = ENV["MYSQL_USER"]
|
9
|
+
MYSQL_DEFAULTS_FILE = ENV["MYSQL_DEFAULTS_FILE"]
|
10
|
+
MYSQL_PASSWORD = ENV["MYSQL_PASSWORD"]
|
11
|
+
|
12
|
+
RATE_METRICS_TO_INSPECT = %w{Queries Bytes_sent Bytes_received Connections Slow_queries}
|
13
|
+
CANARY_METRIC = "Queries"
|
14
|
+
|
15
|
+
env = {}
|
16
|
+
args = []
|
17
|
+
if MYSQL_DEFAULTS_FILE.to_s.size > 0
|
18
|
+
args << "--defaults-file=%s" % MYSQL_DEFAULTS_FILE
|
19
|
+
else
|
20
|
+
env = { "MYSQL_PWD" => MYSQL_PASSWORD }
|
21
|
+
end
|
22
|
+
args += [
|
23
|
+
"-N",
|
24
|
+
"-B",
|
25
|
+
"-e",
|
26
|
+
"SHOW GLOBAL STATUS"
|
27
|
+
]
|
28
|
+
if MYSQL_USER
|
29
|
+
args += ["--user", MYSQL_USER]
|
30
|
+
end
|
31
|
+
if MYSQL_HOST
|
32
|
+
args += ["--host", MYSQL_HOST]
|
33
|
+
end
|
34
|
+
if MYSQL_PORT
|
35
|
+
args += ["--port", MYSQL_PORT]
|
36
|
+
end
|
37
|
+
|
38
|
+
stdout_r, stdout_w = IO.pipe
|
39
|
+
|
40
|
+
pid = Process.spawn(env, "mysql", *args.map(&:to_s), :out => stdout_w, :err => STDERR)
|
41
|
+
|
42
|
+
pid, exit_status = Process.wait2(pid)
|
43
|
+
|
44
|
+
stdout_w.close
|
45
|
+
|
46
|
+
previous_run_time = ARGV[0].to_i
|
47
|
+
current_time = Time.now.to_i
|
48
|
+
run_interval = (current_time - previous_run_time).to_f
|
49
|
+
previously_ran = previous_run_time > 0
|
50
|
+
previous_values = {}
|
51
|
+
|
52
|
+
if previously_ran
|
53
|
+
previous_output = STDIN.read.chomp.each_line.map
|
54
|
+
.map { |line| line.split }
|
55
|
+
.map { |(name, value, _)| [name, value.to_f] }
|
56
|
+
previous_values = Hash[previous_output]
|
57
|
+
end
|
58
|
+
|
59
|
+
if !exit_status.success?
|
60
|
+
exit exit_status.to_i
|
61
|
+
else
|
62
|
+
output = stdout_r.read.lines # each line
|
63
|
+
.map { |line| line.chomp.split } # split by space characters
|
64
|
+
.map { |(name, value, _)| [name, value.to_f] } # with values coerced to floats
|
65
|
+
stats = Hash[output]
|
66
|
+
if (stats[CANARY_METRIC] < previous_values[CANARY_METRIC].to_i) || previous_values[CANARY_METRIC].nil?
|
67
|
+
# The server has restarted, don't trust previous values for calculating difference
|
68
|
+
previously_ran = false
|
69
|
+
end
|
70
|
+
if previously_ran
|
71
|
+
RATE_METRICS_TO_INSPECT.each do |metric|
|
72
|
+
stats["%s_per_second" % metric] = (stats[metric] - previous_values[metric]) / run_interval
|
73
|
+
end
|
74
|
+
end
|
75
|
+
RATE_METRICS_TO_INSPECT.each do |metric|
|
76
|
+
puts [metric, stats[metric]].join(" ")
|
77
|
+
per_second_metric = "%s_per_second" % metric
|
78
|
+
per_second = stats[per_second_metric]
|
79
|
+
if per_second
|
80
|
+
puts [per_second_metric, per_second].join(" ")
|
81
|
+
end
|
82
|
+
end
|
83
|
+
if previously_ran
|
84
|
+
exit 0
|
85
|
+
else
|
86
|
+
exit 1
|
87
|
+
end
|
88
|
+
end
|
data/instrumental_tools.gemspec
CHANGED
@@ -14,7 +14,9 @@ Gem::Specification.new do |s|
|
|
14
14
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
15
15
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
16
16
|
s.require_paths = ["lib"]
|
17
|
-
|
17
|
+
|
18
|
+
s.required_ruby_version = '>= 1.9'
|
19
|
+
|
18
20
|
s.add_runtime_dependency(%q<instrumental_agent>, [">=0.12.6"])
|
19
21
|
s.add_runtime_dependency(%q<pidly>, [">=0.1.3"])
|
20
22
|
s.add_development_dependency(%q<rake>, [">=0"])
|
@@ -0,0 +1,96 @@
|
|
1
|
+
require 'benchmark'
|
2
|
+
|
3
|
+
class MetricScriptExecutor
|
4
|
+
|
5
|
+
attr_reader :directory, :previous, :last_ran_at
|
6
|
+
|
7
|
+
def initialize(directory)
|
8
|
+
@directory = directory
|
9
|
+
@previous = {}
|
10
|
+
end
|
11
|
+
|
12
|
+
def can_execute_file?(path)
|
13
|
+
stat = File::Stat.new(path)
|
14
|
+
stat.executable? && file_is_owner_only?(stat)
|
15
|
+
end
|
16
|
+
|
17
|
+
def can_execute_in_directory?(directory)
|
18
|
+
stat = File::Stat.new(directory)
|
19
|
+
stat.directory? && file_is_owner_only?(stat)
|
20
|
+
end
|
21
|
+
|
22
|
+
def file_is_owner_only?(file_stat)
|
23
|
+
file_stat.owned? && ((file_stat.mode & 0xFFF) ^ 0O700) == 0
|
24
|
+
end
|
25
|
+
|
26
|
+
def run
|
27
|
+
process_to_output = {}
|
28
|
+
if can_execute_in_directory?(directory)
|
29
|
+
current = Dir[File.join(directory, "*")].map do |path|
|
30
|
+
full_path = File.expand_path(path)
|
31
|
+
if can_execute_file?(path)
|
32
|
+
stdin_r, stdin_w = IO.pipe
|
33
|
+
stdout_r, stdout_w = IO.pipe
|
34
|
+
stderr_r, stderr_w = IO.pipe
|
35
|
+
|
36
|
+
previous_status, previous_time, previous_output = previous[full_path]
|
37
|
+
|
38
|
+
stdin_w.write(previous_output || "")
|
39
|
+
stdin_w.close
|
40
|
+
|
41
|
+
|
42
|
+
cmd = [full_path, (previous_time || 0).to_i, (previous_status && previous_status.to_i)].compact.map(&:to_s)
|
43
|
+
|
44
|
+
pid = Process.spawn(*cmd,
|
45
|
+
:chdir => File.dirname(full_path),
|
46
|
+
:in => stdin_r,
|
47
|
+
:out => stdout_w,
|
48
|
+
:err => stderr_w)
|
49
|
+
|
50
|
+
exit_status = nil
|
51
|
+
exec_time = Benchmark.realtime do
|
52
|
+
pid, exit_status = Process.wait2(pid)
|
53
|
+
end
|
54
|
+
|
55
|
+
if exec_time > 1.0
|
56
|
+
puts "[SLOW SCRIPT] Time to execute process #{full_path} took #{exec_time} seconds"
|
57
|
+
end
|
58
|
+
|
59
|
+
[stdin_r, stdout_w, stderr_w].each(&:close)
|
60
|
+
|
61
|
+
output = stdout_r.read.to_s.chomp
|
62
|
+
|
63
|
+
stderr = stderr_r.read.to_s.chomp
|
64
|
+
unless stderr.empty?
|
65
|
+
puts "[STDERR] #{full_path} (PID:#{pid}) [#{Time.now.to_s}]:: #{stderr}"
|
66
|
+
end
|
67
|
+
|
68
|
+
[stdout_r, stderr_r].each(&:close)
|
69
|
+
|
70
|
+
[full_path, [exit_status, Time.now, output]]
|
71
|
+
else
|
72
|
+
if !File.directory?(full_path)
|
73
|
+
uid = Process.uid
|
74
|
+
user = Etc.getpwuid(uid).name
|
75
|
+
puts "[INFO] Cannot execute #{full_path}, must be executable and only readable/writable by #{user}/#{uid}"
|
76
|
+
end
|
77
|
+
[full_path, []]
|
78
|
+
end
|
79
|
+
end
|
80
|
+
process_to_output = Hash[current]
|
81
|
+
@previous = process_to_output
|
82
|
+
else
|
83
|
+
puts "Directory #{directory} has gone away or does not have the correct permissions (0700), not scanning for metric scripts."
|
84
|
+
end
|
85
|
+
process_to_output.flat_map do |path, (status, time, output)|
|
86
|
+
if status && status.success?
|
87
|
+
prefix = File.basename(path).split(".")[0..-2].join(".").gsub(/[^a-z0-9\-\_\.]/i, "_")
|
88
|
+
output.lines # each line
|
89
|
+
.map { |line| line.chomp.split } # split by whitespace
|
90
|
+
.select { |data| (2..3).include?(data.size) } # and only valid name value time? pairs
|
91
|
+
.map { |(name, value, specific_time)| [[prefix, name].join("."), value.to_f, (specific_time || time).to_i] } # with value coerced to a float
|
92
|
+
end
|
93
|
+
end.compact
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'pidly'
|
2
|
+
require 'instrumental_tools/metric_script_executor'
|
3
|
+
require 'instrumental_tools/system_inspector'
|
4
|
+
|
5
|
+
class ServerController < Pidly::Control
|
6
|
+
COMMANDS = [:start, :stop, :status, :restart, :clean, :kill, :foreground]
|
7
|
+
|
8
|
+
attr_accessor :run_options, :pid
|
9
|
+
|
10
|
+
before_start do
|
11
|
+
extra_info = if run_options[:daemon]
|
12
|
+
"(#{run_options[:pid_location]}), log: #{run_options[:log_location]}"
|
13
|
+
end
|
14
|
+
puts "Starting daemon process: #{@pid} #{extra_info}"
|
15
|
+
end
|
16
|
+
|
17
|
+
start :foreground
|
18
|
+
|
19
|
+
stop do
|
20
|
+
puts "Attempting to kill daemon process: #{@pid}"
|
21
|
+
end
|
22
|
+
|
23
|
+
error do
|
24
|
+
puts 'Error encountered'
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.run(options)
|
28
|
+
agent = Instrumental::Agent.new(options[:api_key], :collector => [options[:collector], options[:port]].compact.join(':'))
|
29
|
+
puts "instrument_server version #{Instrumental::Tools::VERSION} started at #{Time.now.utc}"
|
30
|
+
puts "Collecting stats under the hostname: #{options[:hostname]}"
|
31
|
+
report_interval = options[:report_interval]
|
32
|
+
custom_metrics = MetricScriptExecutor.new(options[:script_location])
|
33
|
+
loop do
|
34
|
+
t = Time.now.to_i
|
35
|
+
next_run_at = (t - t % report_interval) + report_interval
|
36
|
+
sleep [next_run_at - t, 0].max
|
37
|
+
inspector = SystemInspector.new
|
38
|
+
inspector.load_all
|
39
|
+
count = 0
|
40
|
+
inspector.gauges.each do |stat, value|
|
41
|
+
metric = "#{options[:hostname]}.#{stat}"
|
42
|
+
agent.gauge(metric, value)
|
43
|
+
if options[:debug]
|
44
|
+
puts [metric, value].join(":")
|
45
|
+
end
|
46
|
+
count += 1
|
47
|
+
end
|
48
|
+
custom_metrics.run.each do |(stat, value, time)|
|
49
|
+
metric = "#{options[:hostname]}.#{stat}"
|
50
|
+
agent.gauge(metric, value, time)
|
51
|
+
if options[:debug]
|
52
|
+
puts [metric, value].join(":")
|
53
|
+
end
|
54
|
+
count += 1
|
55
|
+
end
|
56
|
+
puts "Sent #{count} metrics"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def initialize(options={})
|
61
|
+
@run_options = options.delete(:run_options) || {}
|
62
|
+
super(options)
|
63
|
+
end
|
64
|
+
|
65
|
+
def foreground
|
66
|
+
self.class.run(run_options)
|
67
|
+
end
|
68
|
+
|
69
|
+
alias_method :clean, :clean!
|
70
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
class SystemInspector
|
2
|
+
module Linux
|
3
|
+
def self.load_cpu
|
4
|
+
output = { :gauges => {} }
|
5
|
+
output[:gauges].merge!(cpu)
|
6
|
+
output[:gauges].merge!(loadavg)
|
7
|
+
output
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.cpu
|
11
|
+
categories = [:user, :nice, :system, :idle, :iowait]
|
12
|
+
values = `cat /proc/stat | grep cpu[^0-9]`.chomp.split.slice(1, 5).map { |v| v.to_f }
|
13
|
+
SystemInspector.memory.store(:cpu_values, values.dup)
|
14
|
+
if previous_values = SystemInspector.memory.retrieve(:cpu_values)
|
15
|
+
index = -1
|
16
|
+
values.collect! { |value| (previous_values[index += 1] - value).abs }
|
17
|
+
end
|
18
|
+
data = Hash[*categories.zip(values).flatten]
|
19
|
+
total = values.inject { |memo, value| memo + value }
|
20
|
+
|
21
|
+
output = {}
|
22
|
+
if previous_values
|
23
|
+
data.each do |category, value|
|
24
|
+
output["cpu.#{category}"] = value / total * 100
|
25
|
+
end
|
26
|
+
end
|
27
|
+
output["cpu.in_use"] = 100 - data[:idle] / total * 100
|
28
|
+
output
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.loadavg
|
32
|
+
min_1, min_5, min_15 = `cat /proc/loadavg`.split
|
33
|
+
{
|
34
|
+
'load.1min' => min_1.to_f,
|
35
|
+
'load.5min' => min_5.to_f,
|
36
|
+
'load.15min' => min_15.to_f
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.load_memory
|
41
|
+
output = { :gauges => {} }
|
42
|
+
if SystemInspector.command_present?('free', 'memory')
|
43
|
+
output[:gauges].merge!(memory)
|
44
|
+
end
|
45
|
+
if SystemInspector.command_present?('free', 'swap')
|
46
|
+
output[:gauges].merge!(swap)
|
47
|
+
end
|
48
|
+
output
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.memory
|
52
|
+
_, total, used, free, shared, buffers, cached = `free -k -o | grep Mem`.chomp.split
|
53
|
+
{
|
54
|
+
'memory.used_mb' => used.to_f / 1024,
|
55
|
+
'memory.free_mb' => free.to_f / 1024,
|
56
|
+
'memory.buffers_mb' => buffers.to_f / 1024,
|
57
|
+
'memory.cached_mb' => cached.to_f / 1024,
|
58
|
+
'memory.free_percent' => (free.to_f / total.to_f) * 100
|
59
|
+
}
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.swap
|
63
|
+
_, total, used, free = `free -k -o | grep Swap`.chomp.split
|
64
|
+
return {} if total.to_i == 0
|
65
|
+
{
|
66
|
+
'swap.used_mb' => used.to_f / 1024,
|
67
|
+
'swap.free_mb' => free.to_f / 1024,
|
68
|
+
'swap.free_percent' => (free.to_f / total.to_f) * 100
|
69
|
+
}
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.load_disks
|
73
|
+
output = { :gauges => {} }
|
74
|
+
if SystemInspector.command_present?('df', 'disk storage')
|
75
|
+
output[:gauges].merge!(disk_storage)
|
76
|
+
end
|
77
|
+
if SystemInspector.command_present?('mount', 'disk IO')
|
78
|
+
output[:gauges].merge!(disk_io)
|
79
|
+
end
|
80
|
+
output
|
81
|
+
end
|
82
|
+
|
83
|
+
def self.disk_storage
|
84
|
+
output = {}
|
85
|
+
`df -Pka`.lines.each do |line|
|
86
|
+
device, total, used, available, capacity, mount = line.chomp.split
|
87
|
+
if device == "tmpfs"
|
88
|
+
names = ["tmpfs_#{mount.gsub(/[^[:alnum:]]/, "_")}".gsub(/_+/, "_")]
|
89
|
+
elsif device =~ %r{/dev/}
|
90
|
+
names = [File.basename(device)]
|
91
|
+
else
|
92
|
+
next
|
93
|
+
end
|
94
|
+
names << 'root' if mount == '/'
|
95
|
+
names.each do |name|
|
96
|
+
output["disk.#{name}.total_mb"] = total.to_f / 1024
|
97
|
+
output["disk.#{name}.used_mb"] = used.to_f / 1024
|
98
|
+
output["disk.#{name}.available_mb"] = available.to_f / 1024
|
99
|
+
output["disk.#{name}.available_percent"] = available.to_f / total.to_f * 100
|
100
|
+
end
|
101
|
+
end
|
102
|
+
output
|
103
|
+
end
|
104
|
+
|
105
|
+
def self.disk_io
|
106
|
+
output = {}
|
107
|
+
mounted_devices = `mount`.lines.grep(/^\/dev\/(\w+)/) { $1 }
|
108
|
+
diskstats_lines = `cat /proc/diskstats`.lines.grep(/#{mounted_devices.join('|')}/)
|
109
|
+
entries = diskstats_lines.map do |line|
|
110
|
+
values = line.chomp.split
|
111
|
+
entry = {}
|
112
|
+
entry[:time] = Time.now
|
113
|
+
entry[:device] = values[2]
|
114
|
+
entry[:utilization] = values[12].to_f
|
115
|
+
SystemInspector.memory.store("disk_stats_#{entry[:device]}".to_sym, entry)
|
116
|
+
end
|
117
|
+
|
118
|
+
entries.each do |entry|
|
119
|
+
if previous_entry = SystemInspector.memory.retrieve("disk_stats_#{entry[:device]}".to_sym)
|
120
|
+
time_delta = (entry[:time] - previous_entry[:time]) * 1000
|
121
|
+
utilization_delta = entry[:utilization] - previous_entry[:utilization]
|
122
|
+
output["disk.#{entry[:device]}.percent_utilization"] = utilization_delta / time_delta * 100
|
123
|
+
end
|
124
|
+
end
|
125
|
+
output
|
126
|
+
end
|
127
|
+
|
128
|
+
def self.load_filesystem
|
129
|
+
output = { :gauges => {} }
|
130
|
+
if SystemInspector.command_present?('sysctl', 'filesystem')
|
131
|
+
output[:gauges].merge!(filesystem)
|
132
|
+
end
|
133
|
+
output
|
134
|
+
end
|
135
|
+
|
136
|
+
def self.filesystem
|
137
|
+
allocated, unused, max = `sysctl fs.file-nr`.split[-3..-1].map { |v| v.to_i }
|
138
|
+
open_files = allocated - unused
|
139
|
+
{
|
140
|
+
'filesystem.open_files' => open_files,
|
141
|
+
'filesystem.max_open_files' => max,
|
142
|
+
'filesystem.open_files_pct_max' => (open_files.to_f / max.to_f) * 100
|
143
|
+
}
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|