newrelic_ia 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +4 -0
- data/LICENSE +22 -0
- data/README.rdoc +101 -38
- data/Rakefile +87 -43
- data/bin/newrelic_ia +11 -3
- data/lib/new_relic/ia/cli.rb +136 -83
- data/lib/new_relic/ia/memcached_sampler.rb +340 -0
- data/lib/new_relic/ia/metric_names.rb +2 -2
- data/lib/new_relic/ia/newrelic.yml +17 -8
- data/lib/new_relic/ia/version.rb +5 -0
- data/lib/newrelic_ia.rb +5 -8
- data/spec/cli_spec.rb +6 -0
- data/spec/memcached-1.out +24 -0
- data/spec/memcached-nodes.txt +4 -0
- data/spec/memcached_sampler_spec.rb +58 -0
- data/spec/spec_helper.rb +4 -2
- metadata +49 -35
- data/Manifest +0 -21
- data/newrelic_ia.gemspec +0 -41
@@ -0,0 +1,340 @@
|
|
1
|
+
require 'new_relic/ia/metric_names'
|
2
|
+
require 'socket'
|
3
|
+
#require 'active_support'
|
4
|
+
|
5
|
+
# Memcached stats sampler
|
6
|
+
# An IGN Hackday project
|
7
|
+
|
8
|
+
class NewRelic::IA::MemcachedSampler < NewRelic::Agent::Sampler
|
9
|
+
|
10
|
+
include NewRelic::IA::MetricNames
|
11
|
+
case RUBY_PLATFORM
|
12
|
+
when /darwin/
|
13
|
+
# Do some special stuff...
|
14
|
+
when /linux/
|
15
|
+
# Do some special stuff...
|
16
|
+
else
|
17
|
+
NewRelic::IA::CLI.log.warn "unsupported platform #{RUBY_PLATFORM}"
|
18
|
+
end
|
19
|
+
|
20
|
+
def initialize
|
21
|
+
super 'memcached'
|
22
|
+
@int_values = [ :uptime, :curr_items, :total_items, :bytes, :curr_connections, :total_connections, :connection_structures,
|
23
|
+
:cmd_flush, :cmd_get, :cmd_set, :get_hits, :get_misses, :evictions, :bytes_read, :bytes_written, :limit_maxbytes, :threads]
|
24
|
+
@derived_values = [ :free_bytes]
|
25
|
+
@derivatives = [:hit_ratio, :miss_ratio, :rpm, :gpm, :hpm, :mpm, :spm, :fpm, :epm]
|
26
|
+
|
27
|
+
@last_stats = Hash.new
|
28
|
+
@memcached_nodes = parse_config
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse_config
|
32
|
+
# file with a list of mecached nodes. each line have hostname:port
|
33
|
+
memcached_nodes = NewRelic::Control.instance['memcached_nodes']
|
34
|
+
if !memcached_nodes.is_a? Array || memcached_nodes.empty?
|
35
|
+
raise NewRelic::IA::InitError, "No memcache_nodes array found in newrelic.yml."
|
36
|
+
end
|
37
|
+
memcached_nodes
|
38
|
+
end
|
39
|
+
|
40
|
+
def memcached_nodes
|
41
|
+
@memcached_nodes
|
42
|
+
end
|
43
|
+
|
44
|
+
# Sanity check, make sure the servers are there.
|
45
|
+
def check
|
46
|
+
down_servers = []
|
47
|
+
memcached_nodes.each do | hostname_port |
|
48
|
+
stats_text = issue_stats hostname_port
|
49
|
+
down_servers << hostname_port unless stats_text
|
50
|
+
end
|
51
|
+
raise NewRelic::Agent::Sampler::Unsupported, "Servers not available: #{down_servers.join(", ")}" unless down_servers.empty?
|
52
|
+
end
|
53
|
+
|
54
|
+
# This gets called once a minute in the agent worker thread. It
|
55
|
+
# pings each host in the array 'memcached_nodes'
|
56
|
+
def poll
|
57
|
+
unless memcached_nodes.empty?
|
58
|
+
memcached_nodes.each do | hostname_port |
|
59
|
+
stats_text = issue_stats hostname_port
|
60
|
+
if stats_text
|
61
|
+
@last_stats[hostname_port] = parse_and_report_stats hostname_port, stats_text
|
62
|
+
else
|
63
|
+
@last_stats[hostname_port] = nil #{}
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
aggregate_stats
|
68
|
+
debug "done with aggs"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def logger
|
73
|
+
NewRelic::IA::CLI.log
|
74
|
+
end
|
75
|
+
|
76
|
+
def aggregate_stats
|
77
|
+
begin
|
78
|
+
|
79
|
+
aggs_stats = Hash.new
|
80
|
+
@int_values.each {|metric| aggs_stats[metric] = 0}
|
81
|
+
@derived_values.each {|metric| aggs_stats[metric] = 0}
|
82
|
+
|
83
|
+
@derivatives[0,2].each {|metric| aggs_stats[metric] = 0.0}
|
84
|
+
@derivatives[2,@derivatives.length - 2].each {|metric| aggs_stats[metric] = 0}
|
85
|
+
|
86
|
+
aggs_count = 0
|
87
|
+
@last_stats.each_value do |v|
|
88
|
+
@int_values.each do |metric|
|
89
|
+
aggs_stats[metric] += (v[metric] || 0)
|
90
|
+
end
|
91
|
+
@derived_values.each do |metric|
|
92
|
+
aggs_stats[metric] += (v[metric] || 0)
|
93
|
+
end
|
94
|
+
if v[:hit_ratio] && v[:miss_ratio]
|
95
|
+
@derivatives[0,2].each do |metric|
|
96
|
+
aggs_stats[metric] += v[metric]
|
97
|
+
end
|
98
|
+
aggs_count += 1
|
99
|
+
|
100
|
+
@derivatives[2,@derivatives.length - 2].each do |metric|
|
101
|
+
aggs_stats[metric] += v[metric]
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
if aggs_count > 0
|
106
|
+
aggs_stats[:hit_ratio] = aggs_stats[:hit_ratio] /aggs_count
|
107
|
+
aggs_stats[:miss_ratio] = aggs_stats[:miss_ratio] /aggs_count
|
108
|
+
end
|
109
|
+
|
110
|
+
if aggs_stats[:uptime] > 0
|
111
|
+
@int_values.each do |stat|
|
112
|
+
debug "recording #{MEMCACHED}/all/#{stat.to_s} = #{aggs_stats[stat]}"
|
113
|
+
begin
|
114
|
+
stats("#{MEMCACHED}/all/#{stat.to_s}").record_data_point(aggs_stats[stat])
|
115
|
+
rescue => e
|
116
|
+
debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
@derived_values.each do |stat|
|
121
|
+
debug "recording #{MEMCACHED}/all/#{stat.to_s} = #{aggs_stats[stat]}"
|
122
|
+
begin
|
123
|
+
stats("#{MEMCACHED}/all/#{stat.to_s}").record_data_point(aggs_stats[stat])
|
124
|
+
rescue => e
|
125
|
+
debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
|
126
|
+
end
|
127
|
+
end
|
128
|
+
if aggs_count > 0
|
129
|
+
@derivatives.each do |stat|
|
130
|
+
debug "recording #{MEMCACHED}/all/#{stat.to_s} = #{aggs_stats[stat].to_i}"
|
131
|
+
begin
|
132
|
+
stats("#{MEMCACHED}/all/#{stat.to_s}").record_data_point(aggs_stats[stat].to_i)
|
133
|
+
rescue => e
|
134
|
+
debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
else
|
139
|
+
debug "skipping aggregates since aggregate uptime is zero"
|
140
|
+
end
|
141
|
+
rescue => e
|
142
|
+
debug "Could not record stat: stats\n #{e.backtrace.join("\n")}"
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
|
147
|
+
#TODO send stats for down nodes
|
148
|
+
def issue_stats(hostname_port)
|
149
|
+
debug "get stats from hostname #{hostname_port}"
|
150
|
+
begin
|
151
|
+
split = hostname_port.split(':', 2)
|
152
|
+
hostname = split.first
|
153
|
+
port = split.last
|
154
|
+
|
155
|
+
socket = TCPSocket.open(hostname, port)
|
156
|
+
socket.send("stats\r\n", 0)
|
157
|
+
|
158
|
+
# TODO UDP or use memcached gem to use udp first and fallback to tcp
|
159
|
+
# socket = UDPSocket.open
|
160
|
+
# socket.connect(@host, @port)
|
161
|
+
# socket.send("stats\r\n", 0, 'localhost', '11211')
|
162
|
+
|
163
|
+
statistics = ""
|
164
|
+
loop do
|
165
|
+
data = socket.recv(4096)
|
166
|
+
if !data || data.length == 0
|
167
|
+
break
|
168
|
+
end
|
169
|
+
statistics << data
|
170
|
+
end_index = statistics =~ /\s+END\s+$/
|
171
|
+
if end_index
|
172
|
+
return statistics[0, end_index]
|
173
|
+
end
|
174
|
+
end
|
175
|
+
rescue IOError, SystemCallError => e
|
176
|
+
NewRelic::IA::CLI.log.warn "memcached: unable to connect to memcached node at #{hostname_port}: #{e.message}"
|
177
|
+
logger.info "memcached: unable to connect to memcached node at #{hostname_port}"
|
178
|
+
logger.error "memcached: #{e.message}"
|
179
|
+
debug e.backtrace.join("\n")
|
180
|
+
ensure
|
181
|
+
socket.close if socket rescue nil
|
182
|
+
end
|
183
|
+
nil
|
184
|
+
end
|
185
|
+
|
186
|
+
def parse_stats(hostname_port, stats_text)
|
187
|
+
end_index = stats_text =~ /\s+END\s+$/
|
188
|
+
stats_text = stats_text[0, end_index] if end_index
|
189
|
+
sss = stats_text.split(/\s+/)
|
190
|
+
if sss.size % 3 != 0
|
191
|
+
logger.error "memcached: unexcpected stats output from #{hostname_port}: #{stats_text}"
|
192
|
+
break
|
193
|
+
end
|
194
|
+
triplets = []
|
195
|
+
while sss.any? do
|
196
|
+
triplets << [ sss.shift, sss.shift, sss.shift]
|
197
|
+
end
|
198
|
+
stats = Hash.new
|
199
|
+
triplets.each do |triplet|
|
200
|
+
debug "#{triplet[1].to_sym} = #{triplet[2]}"
|
201
|
+
stats[triplet[1].to_sym] = triplet[2]
|
202
|
+
end
|
203
|
+
return stats
|
204
|
+
end
|
205
|
+
|
206
|
+
def parse_and_report_stats(hostname_port, stats_text)
|
207
|
+
# pid = 21355
|
208
|
+
# uptime = 2089
|
209
|
+
# time = 1264673782
|
210
|
+
# version = 1.2.8
|
211
|
+
# pointer_size = 64
|
212
|
+
# rusage_user = 0.020996
|
213
|
+
# rusage_system = 0.020996
|
214
|
+
# curr_items = 277
|
215
|
+
# total_items = 356
|
216
|
+
# bytes = 544955
|
217
|
+
# curr_connections = 14
|
218
|
+
# total_connections = 15
|
219
|
+
# connection_structures = 15
|
220
|
+
# cmd_flush = 0
|
221
|
+
# cmd_get = 549
|
222
|
+
# cmd_set = 356
|
223
|
+
# get_hits = 185
|
224
|
+
# get_misses = 364
|
225
|
+
# evictions = 0
|
226
|
+
# bytes_read = 703195
|
227
|
+
# bytes_written = 344345
|
228
|
+
# limit_maxbytes = 1048576000
|
229
|
+
# threads = 5
|
230
|
+
# accepting_conns = 1
|
231
|
+
# listen_disabled_num = 0
|
232
|
+
|
233
|
+
|
234
|
+
# average_value
|
235
|
+
# * Active Connections - free
|
236
|
+
# * Current items
|
237
|
+
# * evictions
|
238
|
+
# * Total Size (memcache stat: limit_maxbytes)
|
239
|
+
# * Used size (memcache stat: bytes)
|
240
|
+
#
|
241
|
+
# need to compute during collection
|
242
|
+
# * Hit Ratio (%)
|
243
|
+
# * Requests per interval
|
244
|
+
# * Hits per interval
|
245
|
+
# * Misses per interval
|
246
|
+
# * Sets per interval
|
247
|
+
# * Free size (memcache stat: limit_maxbytes - bytes)
|
248
|
+
#
|
249
|
+
# Also send all stats.
|
250
|
+
#
|
251
|
+
#
|
252
|
+
stats = parse_stats(hostname_port, stats_text)
|
253
|
+
|
254
|
+
#we store ints in the hash
|
255
|
+
@int_values.each do |stat|
|
256
|
+
stats[stat] = stats[stat].to_i
|
257
|
+
end
|
258
|
+
#time is not shipped to collector but we add it for derivative calculations
|
259
|
+
stats[:time] = Time.at stats[:time].to_i
|
260
|
+
|
261
|
+
stats[:free_bytes] = stats[:limit_maxbytes] - stats[:bytes]
|
262
|
+
|
263
|
+
previous_stats = @last_stats[hostname_port]
|
264
|
+
if previous_stats
|
265
|
+
tn = stats[:time]
|
266
|
+
tm = previous_stats[:time]
|
267
|
+
previous_r = previous_stats[:cmd_get] + previous_stats[:cmd_set]+ previous_stats[:cmd_flush]
|
268
|
+
current_r = stats[:cmd_get] + stats[:cmd_set]+ stats[:cmd_flush]
|
269
|
+
|
270
|
+
#unit per minute
|
271
|
+
stats[:rpm] = (current_r - previous_r) / (tn - tm) * 60
|
272
|
+
stats[:gpm] = (stats[:cmd_get] - previous_stats[:cmd_get]) / (tn - tm) * 60
|
273
|
+
stats[:spm] = (stats[:cmd_set] - previous_stats[:cmd_set]) / (tn - tm) * 60
|
274
|
+
stats[:fpm] = (stats[:cmd_flush] - previous_stats[:cmd_flush]) / (tn - tm) * 60
|
275
|
+
stats[:hpm] = (stats[:get_hits] - previous_stats[:get_hits]) / (tn - tm) * 60
|
276
|
+
stats[:mpm] = (stats[:get_misses] - previous_stats[:get_misses]) / (tn - tm) * 60
|
277
|
+
stats[:epm] = (stats[:evictions] - previous_stats[:evictions]) / (tn - tm) * 60
|
278
|
+
if stats[:hpm] + stats[:mpm] > 0
|
279
|
+
stats[:hit_ratio] = stats[:hpm] / (stats[:hpm]+stats[:mpm])*100
|
280
|
+
stats[:miss_ratio] = stats[:mpm] / (stats[:hpm]+stats[:mpm])*100
|
281
|
+
else
|
282
|
+
stats[:hit_ratio] = 100
|
283
|
+
stats[:miss_ratio] = 0
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
#string_values = [:version]
|
288
|
+
#float_values = [:rusage_user, :rusage_system]
|
289
|
+
|
290
|
+
@int_values.each do |stat|
|
291
|
+
debug "recording #{MEMCACHED}/#{hostname_port}/#{stat.to_s} = #{stats[stat]}"
|
292
|
+
begin
|
293
|
+
stats("#{MEMCACHED}/#{hostname_port}/#{stat.to_s}").record_data_point(stats[stat])
|
294
|
+
rescue => e
|
295
|
+
debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
|
296
|
+
end
|
297
|
+
end
|
298
|
+
@derived_values.each do |stat|
|
299
|
+
debug "recording #{MEMCACHED}/#{hostname_port}/#{stat.to_s} = #{stats[stat]}"
|
300
|
+
begin
|
301
|
+
stats("#{MEMCACHED}/#{hostname_port}/#{stat.to_s}").record_data_point(stats[stat])
|
302
|
+
rescue => e
|
303
|
+
debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
|
304
|
+
end
|
305
|
+
end
|
306
|
+
if previous_stats
|
307
|
+
@derivatives.each do |stat|
|
308
|
+
begin
|
309
|
+
value = stats[stat].to_i
|
310
|
+
debug "recording #{MEMCACHED}/#{hostname_port}/#{stat.to_s} = #{value}"
|
311
|
+
stats("#{MEMCACHED}/#{hostname_port}/#{stat.to_s}").record_data_point(value)
|
312
|
+
rescue => e
|
313
|
+
puts "Error converting #{stat} value <#{stats[stat]}> to i: #{e.message}"
|
314
|
+
puts "stats: #{stats.inspect}"
|
315
|
+
debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
|
316
|
+
end
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
320
|
+
# float_values.each do |stat|
|
321
|
+
# debug "recording #{MEMCACHED}/#{hostname_port}/#{stat.to_s} = #{stats[stat].to_f}"
|
322
|
+
# begin
|
323
|
+
# stats("#{MEMCACHED}/#{hostname_port}/#{stat.to_s}").record_data_point(stats[stat].to_f)
|
324
|
+
# rescue => e
|
325
|
+
# debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
|
326
|
+
# end
|
327
|
+
# end
|
328
|
+
debug "Done with record data"
|
329
|
+
return stats
|
330
|
+
end
|
331
|
+
|
332
|
+
def stats(s)
|
333
|
+
NewRelic::Agent.get_stats_no_scope(s)
|
334
|
+
end
|
335
|
+
|
336
|
+
def debug(msg)
|
337
|
+
logger.debug "memcached: #{msg}"
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
@@ -2,12 +2,25 @@
|
|
2
2
|
# This is a configuration file for the RPM Agent, tailored
|
3
3
|
# for use as a system monitor.
|
4
4
|
#
|
5
|
+
# <%= generated_for_user %>
|
6
|
+
#
|
5
7
|
common: &default_settings
|
6
|
-
|
7
|
-
|
8
|
-
|
8
|
+
log_level: info
|
9
|
+
license_key: '<%= license_key %>'
|
10
|
+
|
9
11
|
app_name: System Monitor
|
10
12
|
ssl: false
|
13
|
+
|
14
|
+
# Set the array of nodes for the memcache monitor
|
15
|
+
memcached_nodes:
|
16
|
+
- localhost:11211
|
17
|
+
# - localhost:11212
|
18
|
+
# - localhost:11213
|
19
|
+
|
20
|
+
# These settings ensure we don't end up actually monitoring
|
21
|
+
# the IA agent itself--we aren't really interested in that.
|
22
|
+
# Don't change these.
|
23
|
+
disable_samplers: true
|
11
24
|
capture_params: false
|
12
25
|
transaction_tracer:
|
13
26
|
enabled: false
|
@@ -16,11 +29,7 @@ common: &default_settings
|
|
16
29
|
# provide newrelic conifguration settings for these enviromnents here.
|
17
30
|
production:
|
18
31
|
<<: *default_settings
|
19
|
-
enabled: true
|
20
32
|
|
21
33
|
development:
|
22
34
|
<<: *default_settings
|
23
|
-
|
24
|
-
# NOTE: for initial evaluation purposes, you may want to temporarily turn
|
25
|
-
# the agent on in development mode.
|
26
|
-
enabled: false
|
35
|
+
|
data/lib/newrelic_ia.rb
CHANGED
@@ -1,15 +1,12 @@
|
|
1
|
-
|
1
|
+
# Make sure the code is on the path, and load the version file.
|
2
|
+
|
3
|
+
$:.unshift(File.expand_path(File.dirname(__FILE__))) unless
|
2
4
|
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
5
|
|
4
|
-
require 'rubygems'
|
5
|
-
gem 'newrelic_rpm'
|
6
6
|
# You can select different newrelic.yml sections by setting the
|
7
7
|
# RUBY_ENV environment variable, similar to RAILS_ENV (which is also checked).
|
8
8
|
# Default is 'monitor'
|
9
9
|
# ENV['RUBY_ENV'] = 'production'
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
VERSION = '0.1.0'
|
14
|
-
end
|
15
|
-
end
|
11
|
+
require 'new_relic/ia/version.rb'
|
12
|
+
|
data/spec/cli_spec.rb
CHANGED
@@ -2,6 +2,7 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
2
2
|
require 'new_relic/ia/cli'
|
3
3
|
require 'new_relic/ia/iostat_reader'
|
4
4
|
require 'new_relic/ia/disk_sampler'
|
5
|
+
require 'new_relic/ia/memcached_sampler'
|
5
6
|
describe NewRelic::IA::CLI, "execute" do
|
6
7
|
before(:each) do
|
7
8
|
@stdout_io = StringIO.new
|
@@ -27,6 +28,11 @@ describe NewRelic::IA::CLI, "execute" do
|
|
27
28
|
stat = NewRelic::IA::CLI.execute(@stdout_io, [ "disk"])
|
28
29
|
stat.should == nil
|
29
30
|
end
|
31
|
+
it "should start memcached" do
|
32
|
+
NewRelic::Agent::StatsEngine.any_instance.expects(:add_harvest_sampler)
|
33
|
+
stat = NewRelic::IA::CLI.execute(@stdout_io, [ "memcached"])
|
34
|
+
stat.should == nil
|
35
|
+
end
|
30
36
|
it "should override the env" do
|
31
37
|
stat = NewRelic::IA::CLI.execute(@stdout_io, [ "disk", "-e", "production"])
|
32
38
|
stat.should == nil
|
@@ -0,0 +1,24 @@
|
|
1
|
+
STAT pid 14109
|
2
|
+
STAT uptime 5616278
|
3
|
+
STAT time 1270358085
|
4
|
+
STAT version 1.2.6
|
5
|
+
STAT pointer_size 64
|
6
|
+
STAT rusage_user 2311.216442
|
7
|
+
STAT rusage_system 6675.001161
|
8
|
+
STAT curr_items 643807
|
9
|
+
STAT total_items 651490771
|
10
|
+
STAT bytes 2896024756
|
11
|
+
STAT curr_connections 47
|
12
|
+
STAT total_connections 2449591
|
13
|
+
STAT connection_structures 1878
|
14
|
+
STAT cmd_get 732073179
|
15
|
+
STAT cmd_set 672331163
|
16
|
+
STAT get_hits 686458260
|
17
|
+
STAT get_misses 45614919
|
18
|
+
STAT evictions 25291211
|
19
|
+
STAT bytes_read 2349066138990
|
20
|
+
STAT bytes_written 6488057125567
|
21
|
+
STAT limit_maxbytes 3221225472
|
22
|
+
STAT threads 4
|
23
|
+
END
|
24
|
+
|