newrelic_ia 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +4 -0
- data/LICENSE +22 -0
- data/README.rdoc +101 -38
- data/Rakefile +87 -43
- data/bin/newrelic_ia +11 -3
- data/lib/new_relic/ia/cli.rb +136 -83
- data/lib/new_relic/ia/memcached_sampler.rb +340 -0
- data/lib/new_relic/ia/metric_names.rb +2 -2
- data/lib/new_relic/ia/newrelic.yml +17 -8
- data/lib/new_relic/ia/version.rb +5 -0
- data/lib/newrelic_ia.rb +5 -8
- data/spec/cli_spec.rb +6 -0
- data/spec/memcached-1.out +24 -0
- data/spec/memcached-nodes.txt +4 -0
- data/spec/memcached_sampler_spec.rb +58 -0
- data/spec/spec_helper.rb +4 -2
- metadata +49 -35
- data/Manifest +0 -21
- data/newrelic_ia.gemspec +0 -41
@@ -0,0 +1,340 @@
|
|
1
|
+
require 'new_relic/ia/metric_names'
|
2
|
+
require 'socket'
|
3
|
+
#require 'active_support'
|
4
|
+
|
5
|
+
# Memcached stats sampler
|
6
|
+
# An IGN Hackday project
|
7
|
+
|
8
|
+
class NewRelic::IA::MemcachedSampler < NewRelic::Agent::Sampler
|
9
|
+
|
10
|
+
include NewRelic::IA::MetricNames
|
11
|
+
case RUBY_PLATFORM
|
12
|
+
when /darwin/
|
13
|
+
# Do some special stuff...
|
14
|
+
when /linux/
|
15
|
+
# Do some special stuff...
|
16
|
+
else
|
17
|
+
NewRelic::IA::CLI.log.warn "unsupported platform #{RUBY_PLATFORM}"
|
18
|
+
end
|
19
|
+
|
20
|
+
def initialize
|
21
|
+
super 'memcached'
|
22
|
+
@int_values = [ :uptime, :curr_items, :total_items, :bytes, :curr_connections, :total_connections, :connection_structures,
|
23
|
+
:cmd_flush, :cmd_get, :cmd_set, :get_hits, :get_misses, :evictions, :bytes_read, :bytes_written, :limit_maxbytes, :threads]
|
24
|
+
@derived_values = [ :free_bytes]
|
25
|
+
@derivatives = [:hit_ratio, :miss_ratio, :rpm, :gpm, :hpm, :mpm, :spm, :fpm, :epm]
|
26
|
+
|
27
|
+
@last_stats = Hash.new
|
28
|
+
@memcached_nodes = parse_config
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse_config
|
32
|
+
# file with a list of mecached nodes. each line have hostname:port
|
33
|
+
memcached_nodes = NewRelic::Control.instance['memcached_nodes']
|
34
|
+
if !memcached_nodes.is_a? Array || memcached_nodes.empty?
|
35
|
+
raise NewRelic::IA::InitError, "No memcache_nodes array found in newrelic.yml."
|
36
|
+
end
|
37
|
+
memcached_nodes
|
38
|
+
end
|
39
|
+
|
40
|
+
def memcached_nodes
|
41
|
+
@memcached_nodes
|
42
|
+
end
|
43
|
+
|
44
|
+
# Sanity check, make sure the servers are there.
|
45
|
+
def check
|
46
|
+
down_servers = []
|
47
|
+
memcached_nodes.each do | hostname_port |
|
48
|
+
stats_text = issue_stats hostname_port
|
49
|
+
down_servers << hostname_port unless stats_text
|
50
|
+
end
|
51
|
+
raise NewRelic::Agent::Sampler::Unsupported, "Servers not available: #{down_servers.join(", ")}" unless down_servers.empty?
|
52
|
+
end
|
53
|
+
|
54
|
+
# This gets called once a minute in the agent worker thread. It
|
55
|
+
# pings each host in the array 'memcached_nodes'
|
56
|
+
def poll
|
57
|
+
unless memcached_nodes.empty?
|
58
|
+
memcached_nodes.each do | hostname_port |
|
59
|
+
stats_text = issue_stats hostname_port
|
60
|
+
if stats_text
|
61
|
+
@last_stats[hostname_port] = parse_and_report_stats hostname_port, stats_text
|
62
|
+
else
|
63
|
+
@last_stats[hostname_port] = nil #{}
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
aggregate_stats
|
68
|
+
debug "done with aggs"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def logger
|
73
|
+
NewRelic::IA::CLI.log
|
74
|
+
end
|
75
|
+
|
76
|
+
def aggregate_stats
|
77
|
+
begin
|
78
|
+
|
79
|
+
aggs_stats = Hash.new
|
80
|
+
@int_values.each {|metric| aggs_stats[metric] = 0}
|
81
|
+
@derived_values.each {|metric| aggs_stats[metric] = 0}
|
82
|
+
|
83
|
+
@derivatives[0,2].each {|metric| aggs_stats[metric] = 0.0}
|
84
|
+
@derivatives[2,@derivatives.length - 2].each {|metric| aggs_stats[metric] = 0}
|
85
|
+
|
86
|
+
aggs_count = 0
|
87
|
+
@last_stats.each_value do |v|
|
88
|
+
@int_values.each do |metric|
|
89
|
+
aggs_stats[metric] += (v[metric] || 0)
|
90
|
+
end
|
91
|
+
@derived_values.each do |metric|
|
92
|
+
aggs_stats[metric] += (v[metric] || 0)
|
93
|
+
end
|
94
|
+
if v[:hit_ratio] && v[:miss_ratio]
|
95
|
+
@derivatives[0,2].each do |metric|
|
96
|
+
aggs_stats[metric] += v[metric]
|
97
|
+
end
|
98
|
+
aggs_count += 1
|
99
|
+
|
100
|
+
@derivatives[2,@derivatives.length - 2].each do |metric|
|
101
|
+
aggs_stats[metric] += v[metric]
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
if aggs_count > 0
|
106
|
+
aggs_stats[:hit_ratio] = aggs_stats[:hit_ratio] /aggs_count
|
107
|
+
aggs_stats[:miss_ratio] = aggs_stats[:miss_ratio] /aggs_count
|
108
|
+
end
|
109
|
+
|
110
|
+
if aggs_stats[:uptime] > 0
|
111
|
+
@int_values.each do |stat|
|
112
|
+
debug "recording #{MEMCACHED}/all/#{stat.to_s} = #{aggs_stats[stat]}"
|
113
|
+
begin
|
114
|
+
stats("#{MEMCACHED}/all/#{stat.to_s}").record_data_point(aggs_stats[stat])
|
115
|
+
rescue => e
|
116
|
+
debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
@derived_values.each do |stat|
|
121
|
+
debug "recording #{MEMCACHED}/all/#{stat.to_s} = #{aggs_stats[stat]}"
|
122
|
+
begin
|
123
|
+
stats("#{MEMCACHED}/all/#{stat.to_s}").record_data_point(aggs_stats[stat])
|
124
|
+
rescue => e
|
125
|
+
debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
|
126
|
+
end
|
127
|
+
end
|
128
|
+
if aggs_count > 0
|
129
|
+
@derivatives.each do |stat|
|
130
|
+
debug "recording #{MEMCACHED}/all/#{stat.to_s} = #{aggs_stats[stat].to_i}"
|
131
|
+
begin
|
132
|
+
stats("#{MEMCACHED}/all/#{stat.to_s}").record_data_point(aggs_stats[stat].to_i)
|
133
|
+
rescue => e
|
134
|
+
debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
else
|
139
|
+
debug "skipping aggregates since aggregate uptime is zero"
|
140
|
+
end
|
141
|
+
rescue => e
|
142
|
+
debug "Could not record stat: stats\n #{e.backtrace.join("\n")}"
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
|
147
|
+
#TODO send stats for down nodes
|
148
|
+
def issue_stats(hostname_port)
|
149
|
+
debug "get stats from hostname #{hostname_port}"
|
150
|
+
begin
|
151
|
+
split = hostname_port.split(':', 2)
|
152
|
+
hostname = split.first
|
153
|
+
port = split.last
|
154
|
+
|
155
|
+
socket = TCPSocket.open(hostname, port)
|
156
|
+
socket.send("stats\r\n", 0)
|
157
|
+
|
158
|
+
# TODO UDP or use memcached gem to use udp first and fallback to tcp
|
159
|
+
# socket = UDPSocket.open
|
160
|
+
# socket.connect(@host, @port)
|
161
|
+
# socket.send("stats\r\n", 0, 'localhost', '11211')
|
162
|
+
|
163
|
+
statistics = ""
|
164
|
+
loop do
|
165
|
+
data = socket.recv(4096)
|
166
|
+
if !data || data.length == 0
|
167
|
+
break
|
168
|
+
end
|
169
|
+
statistics << data
|
170
|
+
end_index = statistics =~ /\s+END\s+$/
|
171
|
+
if end_index
|
172
|
+
return statistics[0, end_index]
|
173
|
+
end
|
174
|
+
end
|
175
|
+
rescue IOError, SystemCallError => e
|
176
|
+
NewRelic::IA::CLI.log.warn "memcached: unable to connect to memcached node at #{hostname_port}: #{e.message}"
|
177
|
+
logger.info "memcached: unable to connect to memcached node at #{hostname_port}"
|
178
|
+
logger.error "memcached: #{e.message}"
|
179
|
+
debug e.backtrace.join("\n")
|
180
|
+
ensure
|
181
|
+
socket.close if socket rescue nil
|
182
|
+
end
|
183
|
+
nil
|
184
|
+
end
|
185
|
+
|
186
|
+
def parse_stats(hostname_port, stats_text)
|
187
|
+
end_index = stats_text =~ /\s+END\s+$/
|
188
|
+
stats_text = stats_text[0, end_index] if end_index
|
189
|
+
sss = stats_text.split(/\s+/)
|
190
|
+
if sss.size % 3 != 0
|
191
|
+
logger.error "memcached: unexcpected stats output from #{hostname_port}: #{stats_text}"
|
192
|
+
break
|
193
|
+
end
|
194
|
+
triplets = []
|
195
|
+
while sss.any? do
|
196
|
+
triplets << [ sss.shift, sss.shift, sss.shift]
|
197
|
+
end
|
198
|
+
stats = Hash.new
|
199
|
+
triplets.each do |triplet|
|
200
|
+
debug "#{triplet[1].to_sym} = #{triplet[2]}"
|
201
|
+
stats[triplet[1].to_sym] = triplet[2]
|
202
|
+
end
|
203
|
+
return stats
|
204
|
+
end
|
205
|
+
|
206
|
+
def parse_and_report_stats(hostname_port, stats_text)
|
207
|
+
# pid = 21355
|
208
|
+
# uptime = 2089
|
209
|
+
# time = 1264673782
|
210
|
+
# version = 1.2.8
|
211
|
+
# pointer_size = 64
|
212
|
+
# rusage_user = 0.020996
|
213
|
+
# rusage_system = 0.020996
|
214
|
+
# curr_items = 277
|
215
|
+
# total_items = 356
|
216
|
+
# bytes = 544955
|
217
|
+
# curr_connections = 14
|
218
|
+
# total_connections = 15
|
219
|
+
# connection_structures = 15
|
220
|
+
# cmd_flush = 0
|
221
|
+
# cmd_get = 549
|
222
|
+
# cmd_set = 356
|
223
|
+
# get_hits = 185
|
224
|
+
# get_misses = 364
|
225
|
+
# evictions = 0
|
226
|
+
# bytes_read = 703195
|
227
|
+
# bytes_written = 344345
|
228
|
+
# limit_maxbytes = 1048576000
|
229
|
+
# threads = 5
|
230
|
+
# accepting_conns = 1
|
231
|
+
# listen_disabled_num = 0
|
232
|
+
|
233
|
+
|
234
|
+
# average_value
|
235
|
+
# * Active Connections - free
|
236
|
+
# * Current items
|
237
|
+
# * evictions
|
238
|
+
# * Total Size (memcache stat: limit_maxbytes)
|
239
|
+
# * Used size (memcache stat: bytes)
|
240
|
+
#
|
241
|
+
# need to compute during collection
|
242
|
+
# * Hit Ratio (%)
|
243
|
+
# * Requests per interval
|
244
|
+
# * Hits per interval
|
245
|
+
# * Misses per interval
|
246
|
+
# * Sets per interval
|
247
|
+
# * Free size (memcache stat: limit_maxbytes - bytes)
|
248
|
+
#
|
249
|
+
# Also send all stats.
|
250
|
+
#
|
251
|
+
#
|
252
|
+
stats = parse_stats(hostname_port, stats_text)
|
253
|
+
|
254
|
+
#we store ints in the hash
|
255
|
+
@int_values.each do |stat|
|
256
|
+
stats[stat] = stats[stat].to_i
|
257
|
+
end
|
258
|
+
#time is not shipped to collector but we add it for derivative calculations
|
259
|
+
stats[:time] = Time.at stats[:time].to_i
|
260
|
+
|
261
|
+
stats[:free_bytes] = stats[:limit_maxbytes] - stats[:bytes]
|
262
|
+
|
263
|
+
previous_stats = @last_stats[hostname_port]
|
264
|
+
if previous_stats
|
265
|
+
tn = stats[:time]
|
266
|
+
tm = previous_stats[:time]
|
267
|
+
previous_r = previous_stats[:cmd_get] + previous_stats[:cmd_set]+ previous_stats[:cmd_flush]
|
268
|
+
current_r = stats[:cmd_get] + stats[:cmd_set]+ stats[:cmd_flush]
|
269
|
+
|
270
|
+
#unit per minute
|
271
|
+
stats[:rpm] = (current_r - previous_r) / (tn - tm) * 60
|
272
|
+
stats[:gpm] = (stats[:cmd_get] - previous_stats[:cmd_get]) / (tn - tm) * 60
|
273
|
+
stats[:spm] = (stats[:cmd_set] - previous_stats[:cmd_set]) / (tn - tm) * 60
|
274
|
+
stats[:fpm] = (stats[:cmd_flush] - previous_stats[:cmd_flush]) / (tn - tm) * 60
|
275
|
+
stats[:hpm] = (stats[:get_hits] - previous_stats[:get_hits]) / (tn - tm) * 60
|
276
|
+
stats[:mpm] = (stats[:get_misses] - previous_stats[:get_misses]) / (tn - tm) * 60
|
277
|
+
stats[:epm] = (stats[:evictions] - previous_stats[:evictions]) / (tn - tm) * 60
|
278
|
+
if stats[:hpm] + stats[:mpm] > 0
|
279
|
+
stats[:hit_ratio] = stats[:hpm] / (stats[:hpm]+stats[:mpm])*100
|
280
|
+
stats[:miss_ratio] = stats[:mpm] / (stats[:hpm]+stats[:mpm])*100
|
281
|
+
else
|
282
|
+
stats[:hit_ratio] = 100
|
283
|
+
stats[:miss_ratio] = 0
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
#string_values = [:version]
|
288
|
+
#float_values = [:rusage_user, :rusage_system]
|
289
|
+
|
290
|
+
@int_values.each do |stat|
|
291
|
+
debug "recording #{MEMCACHED}/#{hostname_port}/#{stat.to_s} = #{stats[stat]}"
|
292
|
+
begin
|
293
|
+
stats("#{MEMCACHED}/#{hostname_port}/#{stat.to_s}").record_data_point(stats[stat])
|
294
|
+
rescue => e
|
295
|
+
debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
|
296
|
+
end
|
297
|
+
end
|
298
|
+
@derived_values.each do |stat|
|
299
|
+
debug "recording #{MEMCACHED}/#{hostname_port}/#{stat.to_s} = #{stats[stat]}"
|
300
|
+
begin
|
301
|
+
stats("#{MEMCACHED}/#{hostname_port}/#{stat.to_s}").record_data_point(stats[stat])
|
302
|
+
rescue => e
|
303
|
+
debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
|
304
|
+
end
|
305
|
+
end
|
306
|
+
if previous_stats
|
307
|
+
@derivatives.each do |stat|
|
308
|
+
begin
|
309
|
+
value = stats[stat].to_i
|
310
|
+
debug "recording #{MEMCACHED}/#{hostname_port}/#{stat.to_s} = #{value}"
|
311
|
+
stats("#{MEMCACHED}/#{hostname_port}/#{stat.to_s}").record_data_point(value)
|
312
|
+
rescue => e
|
313
|
+
puts "Error converting #{stat} value <#{stats[stat]}> to i: #{e.message}"
|
314
|
+
puts "stats: #{stats.inspect}"
|
315
|
+
debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
|
316
|
+
end
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
320
|
+
# float_values.each do |stat|
|
321
|
+
# debug "recording #{MEMCACHED}/#{hostname_port}/#{stat.to_s} = #{stats[stat].to_f}"
|
322
|
+
# begin
|
323
|
+
# stats("#{MEMCACHED}/#{hostname_port}/#{stat.to_s}").record_data_point(stats[stat].to_f)
|
324
|
+
# rescue => e
|
325
|
+
# debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
|
326
|
+
# end
|
327
|
+
# end
|
328
|
+
debug "Done with record data"
|
329
|
+
return stats
|
330
|
+
end
|
331
|
+
|
332
|
+
def stats(s)
|
333
|
+
NewRelic::Agent.get_stats_no_scope(s)
|
334
|
+
end
|
335
|
+
|
336
|
+
def debug(msg)
|
337
|
+
logger.debug "memcached: #{msg}"
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
@@ -2,12 +2,25 @@
|
|
2
2
|
# This is a configuration file for the RPM Agent, tailored
|
3
3
|
# for use as a system monitor.
|
4
4
|
#
|
5
|
+
# <%= generated_for_user %>
|
6
|
+
#
|
5
7
|
common: &default_settings
|
6
|
-
|
7
|
-
|
8
|
-
|
8
|
+
log_level: info
|
9
|
+
license_key: '<%= license_key %>'
|
10
|
+
|
9
11
|
app_name: System Monitor
|
10
12
|
ssl: false
|
13
|
+
|
14
|
+
# Set the array of nodes for the memcache monitor
|
15
|
+
memcached_nodes:
|
16
|
+
- localhost:11211
|
17
|
+
# - localhost:11212
|
18
|
+
# - localhost:11213
|
19
|
+
|
20
|
+
# These settings ensure we don't end up actually monitoring
|
21
|
+
# the IA agent itself--we aren't really interested in that.
|
22
|
+
# Don't change these.
|
23
|
+
disable_samplers: true
|
11
24
|
capture_params: false
|
12
25
|
transaction_tracer:
|
13
26
|
enabled: false
|
@@ -16,11 +29,7 @@ common: &default_settings
|
|
16
29
|
# provide newrelic conifguration settings for these enviromnents here.
|
17
30
|
production:
|
18
31
|
<<: *default_settings
|
19
|
-
enabled: true
|
20
32
|
|
21
33
|
development:
|
22
34
|
<<: *default_settings
|
23
|
-
|
24
|
-
# NOTE: for initial evaluation purposes, you may want to temporarily turn
|
25
|
-
# the agent on in development mode.
|
26
|
-
enabled: false
|
35
|
+
|
data/lib/newrelic_ia.rb
CHANGED
@@ -1,15 +1,12 @@
|
|
1
|
-
|
1
|
+
# Make sure the code is on the path, and load the version file.
|
2
|
+
|
3
|
+
$:.unshift(File.expand_path(File.dirname(__FILE__))) unless
|
2
4
|
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
5
|
|
4
|
-
require 'rubygems'
|
5
|
-
gem 'newrelic_rpm'
|
6
6
|
# You can select different newrelic.yml sections by setting the
|
7
7
|
# RUBY_ENV environment variable, similar to RAILS_ENV (which is also checked).
|
8
8
|
# Default is 'monitor'
|
9
9
|
# ENV['RUBY_ENV'] = 'production'
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
VERSION = '0.1.0'
|
14
|
-
end
|
15
|
-
end
|
11
|
+
require 'new_relic/ia/version.rb'
|
12
|
+
|
data/spec/cli_spec.rb
CHANGED
@@ -2,6 +2,7 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
2
2
|
require 'new_relic/ia/cli'
|
3
3
|
require 'new_relic/ia/iostat_reader'
|
4
4
|
require 'new_relic/ia/disk_sampler'
|
5
|
+
require 'new_relic/ia/memcached_sampler'
|
5
6
|
describe NewRelic::IA::CLI, "execute" do
|
6
7
|
before(:each) do
|
7
8
|
@stdout_io = StringIO.new
|
@@ -27,6 +28,11 @@ describe NewRelic::IA::CLI, "execute" do
|
|
27
28
|
stat = NewRelic::IA::CLI.execute(@stdout_io, [ "disk"])
|
28
29
|
stat.should == nil
|
29
30
|
end
|
31
|
+
it "should start memcached" do
|
32
|
+
NewRelic::Agent::StatsEngine.any_instance.expects(:add_harvest_sampler)
|
33
|
+
stat = NewRelic::IA::CLI.execute(@stdout_io, [ "memcached"])
|
34
|
+
stat.should == nil
|
35
|
+
end
|
30
36
|
it "should override the env" do
|
31
37
|
stat = NewRelic::IA::CLI.execute(@stdout_io, [ "disk", "-e", "production"])
|
32
38
|
stat.should == nil
|
@@ -0,0 +1,24 @@
|
|
1
|
+
STAT pid 14109
|
2
|
+
STAT uptime 5616278
|
3
|
+
STAT time 1270358085
|
4
|
+
STAT version 1.2.6
|
5
|
+
STAT pointer_size 64
|
6
|
+
STAT rusage_user 2311.216442
|
7
|
+
STAT rusage_system 6675.001161
|
8
|
+
STAT curr_items 643807
|
9
|
+
STAT total_items 651490771
|
10
|
+
STAT bytes 2896024756
|
11
|
+
STAT curr_connections 47
|
12
|
+
STAT total_connections 2449591
|
13
|
+
STAT connection_structures 1878
|
14
|
+
STAT cmd_get 732073179
|
15
|
+
STAT cmd_set 672331163
|
16
|
+
STAT get_hits 686458260
|
17
|
+
STAT get_misses 45614919
|
18
|
+
STAT evictions 25291211
|
19
|
+
STAT bytes_read 2349066138990
|
20
|
+
STAT bytes_written 6488057125567
|
21
|
+
STAT limit_maxbytes 3221225472
|
22
|
+
STAT threads 4
|
23
|
+
END
|
24
|
+
|