newrelic_ia 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,340 @@
1
+ require 'new_relic/ia/metric_names'
2
+ require 'socket'
3
+ #require 'active_support'
4
+
5
+ # Memcached stats sampler
6
+ # An IGN Hackday project
7
+
8
+ class NewRelic::IA::MemcachedSampler < NewRelic::Agent::Sampler
9
+
10
+ include NewRelic::IA::MetricNames
11
+ case RUBY_PLATFORM
12
+ when /darwin/
13
+ # Do some special stuff...
14
+ when /linux/
15
+ # Do some special stuff...
16
+ else
17
+ NewRelic::IA::CLI.log.warn "unsupported platform #{RUBY_PLATFORM}"
18
+ end
19
+
20
+ def initialize
21
+ super 'memcached'
22
+ @int_values = [ :uptime, :curr_items, :total_items, :bytes, :curr_connections, :total_connections, :connection_structures,
23
+ :cmd_flush, :cmd_get, :cmd_set, :get_hits, :get_misses, :evictions, :bytes_read, :bytes_written, :limit_maxbytes, :threads]
24
+ @derived_values = [ :free_bytes]
25
+ @derivatives = [:hit_ratio, :miss_ratio, :rpm, :gpm, :hpm, :mpm, :spm, :fpm, :epm]
26
+
27
+ @last_stats = Hash.new
28
+ @memcached_nodes = parse_config
29
+ end
30
+
31
+ def parse_config
32
+ # file with a list of mecached nodes. each line have hostname:port
33
+ memcached_nodes = NewRelic::Control.instance['memcached_nodes']
34
+ if !memcached_nodes.is_a? Array || memcached_nodes.empty?
35
+ raise NewRelic::IA::InitError, "No memcache_nodes array found in newrelic.yml."
36
+ end
37
+ memcached_nodes
38
+ end
39
+
40
+ def memcached_nodes
41
+ @memcached_nodes
42
+ end
43
+
44
+ # Sanity check, make sure the servers are there.
45
+ def check
46
+ down_servers = []
47
+ memcached_nodes.each do | hostname_port |
48
+ stats_text = issue_stats hostname_port
49
+ down_servers << hostname_port unless stats_text
50
+ end
51
+ raise NewRelic::Agent::Sampler::Unsupported, "Servers not available: #{down_servers.join(", ")}" unless down_servers.empty?
52
+ end
53
+
54
+ # This gets called once a minute in the agent worker thread. It
55
+ # pings each host in the array 'memcached_nodes'
56
+ def poll
57
+ unless memcached_nodes.empty?
58
+ memcached_nodes.each do | hostname_port |
59
+ stats_text = issue_stats hostname_port
60
+ if stats_text
61
+ @last_stats[hostname_port] = parse_and_report_stats hostname_port, stats_text
62
+ else
63
+ @last_stats[hostname_port] = nil #{}
64
+ end
65
+ end
66
+
67
+ aggregate_stats
68
+ debug "done with aggs"
69
+ end
70
+ end
71
+
72
+ def logger
73
+ NewRelic::IA::CLI.log
74
+ end
75
+
76
+ def aggregate_stats
77
+ begin
78
+
79
+ aggs_stats = Hash.new
80
+ @int_values.each {|metric| aggs_stats[metric] = 0}
81
+ @derived_values.each {|metric| aggs_stats[metric] = 0}
82
+
83
+ @derivatives[0,2].each {|metric| aggs_stats[metric] = 0.0}
84
+ @derivatives[2,@derivatives.length - 2].each {|metric| aggs_stats[metric] = 0}
85
+
86
+ aggs_count = 0
87
+ @last_stats.each_value do |v|
88
+ @int_values.each do |metric|
89
+ aggs_stats[metric] += (v[metric] || 0)
90
+ end
91
+ @derived_values.each do |metric|
92
+ aggs_stats[metric] += (v[metric] || 0)
93
+ end
94
+ if v[:hit_ratio] && v[:miss_ratio]
95
+ @derivatives[0,2].each do |metric|
96
+ aggs_stats[metric] += v[metric]
97
+ end
98
+ aggs_count += 1
99
+
100
+ @derivatives[2,@derivatives.length - 2].each do |metric|
101
+ aggs_stats[metric] += v[metric]
102
+ end
103
+ end
104
+ end
105
+ if aggs_count > 0
106
+ aggs_stats[:hit_ratio] = aggs_stats[:hit_ratio] /aggs_count
107
+ aggs_stats[:miss_ratio] = aggs_stats[:miss_ratio] /aggs_count
108
+ end
109
+
110
+ if aggs_stats[:uptime] > 0
111
+ @int_values.each do |stat|
112
+ debug "recording #{MEMCACHED}/all/#{stat.to_s} = #{aggs_stats[stat]}"
113
+ begin
114
+ stats("#{MEMCACHED}/all/#{stat.to_s}").record_data_point(aggs_stats[stat])
115
+ rescue => e
116
+ debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
117
+ end
118
+ end
119
+
120
+ @derived_values.each do |stat|
121
+ debug "recording #{MEMCACHED}/all/#{stat.to_s} = #{aggs_stats[stat]}"
122
+ begin
123
+ stats("#{MEMCACHED}/all/#{stat.to_s}").record_data_point(aggs_stats[stat])
124
+ rescue => e
125
+ debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
126
+ end
127
+ end
128
+ if aggs_count > 0
129
+ @derivatives.each do |stat|
130
+ debug "recording #{MEMCACHED}/all/#{stat.to_s} = #{aggs_stats[stat].to_i}"
131
+ begin
132
+ stats("#{MEMCACHED}/all/#{stat.to_s}").record_data_point(aggs_stats[stat].to_i)
133
+ rescue => e
134
+ debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
135
+ end
136
+ end
137
+ end
138
+ else
139
+ debug "skipping aggregates since aggregate uptime is zero"
140
+ end
141
+ rescue => e
142
+ debug "Could not record stat: stats\n #{e.backtrace.join("\n")}"
143
+ end
144
+ end
145
+
146
+
147
+ #TODO send stats for down nodes
148
+ def issue_stats(hostname_port)
149
+ debug "get stats from hostname #{hostname_port}"
150
+ begin
151
+ split = hostname_port.split(':', 2)
152
+ hostname = split.first
153
+ port = split.last
154
+
155
+ socket = TCPSocket.open(hostname, port)
156
+ socket.send("stats\r\n", 0)
157
+
158
+ # TODO UDP or use memcached gem to use udp first and fallback to tcp
159
+ # socket = UDPSocket.open
160
+ # socket.connect(@host, @port)
161
+ # socket.send("stats\r\n", 0, 'localhost', '11211')
162
+
163
+ statistics = ""
164
+ loop do
165
+ data = socket.recv(4096)
166
+ if !data || data.length == 0
167
+ break
168
+ end
169
+ statistics << data
170
+ end_index = statistics =~ /\s+END\s+$/
171
+ if end_index
172
+ return statistics[0, end_index]
173
+ end
174
+ end
175
+ rescue IOError, SystemCallError => e
176
+ NewRelic::IA::CLI.log.warn "memcached: unable to connect to memcached node at #{hostname_port}: #{e.message}"
177
+ logger.info "memcached: unable to connect to memcached node at #{hostname_port}"
178
+ logger.error "memcached: #{e.message}"
179
+ debug e.backtrace.join("\n")
180
+ ensure
181
+ socket.close if socket rescue nil
182
+ end
183
+ nil
184
+ end
185
+
186
+ def parse_stats(hostname_port, stats_text)
187
+ end_index = stats_text =~ /\s+END\s+$/
188
+ stats_text = stats_text[0, end_index] if end_index
189
+ sss = stats_text.split(/\s+/)
190
+ if sss.size % 3 != 0
191
+ logger.error "memcached: unexcpected stats output from #{hostname_port}: #{stats_text}"
192
+ break
193
+ end
194
+ triplets = []
195
+ while sss.any? do
196
+ triplets << [ sss.shift, sss.shift, sss.shift]
197
+ end
198
+ stats = Hash.new
199
+ triplets.each do |triplet|
200
+ debug "#{triplet[1].to_sym} = #{triplet[2]}"
201
+ stats[triplet[1].to_sym] = triplet[2]
202
+ end
203
+ return stats
204
+ end
205
+
206
+ def parse_and_report_stats(hostname_port, stats_text)
207
+ # pid = 21355
208
+ # uptime = 2089
209
+ # time = 1264673782
210
+ # version = 1.2.8
211
+ # pointer_size = 64
212
+ # rusage_user = 0.020996
213
+ # rusage_system = 0.020996
214
+ # curr_items = 277
215
+ # total_items = 356
216
+ # bytes = 544955
217
+ # curr_connections = 14
218
+ # total_connections = 15
219
+ # connection_structures = 15
220
+ # cmd_flush = 0
221
+ # cmd_get = 549
222
+ # cmd_set = 356
223
+ # get_hits = 185
224
+ # get_misses = 364
225
+ # evictions = 0
226
+ # bytes_read = 703195
227
+ # bytes_written = 344345
228
+ # limit_maxbytes = 1048576000
229
+ # threads = 5
230
+ # accepting_conns = 1
231
+ # listen_disabled_num = 0
232
+
233
+
234
+ # average_value
235
+ # * Active Connections - free
236
+ # * Current items
237
+ # * evictions
238
+ # * Total Size (memcache stat: limit_maxbytes)
239
+ # * Used size (memcache stat: bytes)
240
+ #
241
+ # need to compute during collection
242
+ # * Hit Ratio (%)
243
+ # * Requests per interval
244
+ # * Hits per interval
245
+ # * Misses per interval
246
+ # * Sets per interval
247
+ # * Free size (memcache stat: limit_maxbytes - bytes)
248
+ #
249
+ # Also send all stats.
250
+ #
251
+ #
252
+ stats = parse_stats(hostname_port, stats_text)
253
+
254
+ #we store ints in the hash
255
+ @int_values.each do |stat|
256
+ stats[stat] = stats[stat].to_i
257
+ end
258
+ #time is not shipped to collector but we add it for derivative calculations
259
+ stats[:time] = Time.at stats[:time].to_i
260
+
261
+ stats[:free_bytes] = stats[:limit_maxbytes] - stats[:bytes]
262
+
263
+ previous_stats = @last_stats[hostname_port]
264
+ if previous_stats
265
+ tn = stats[:time]
266
+ tm = previous_stats[:time]
267
+ previous_r = previous_stats[:cmd_get] + previous_stats[:cmd_set]+ previous_stats[:cmd_flush]
268
+ current_r = stats[:cmd_get] + stats[:cmd_set]+ stats[:cmd_flush]
269
+
270
+ #unit per minute
271
+ stats[:rpm] = (current_r - previous_r) / (tn - tm) * 60
272
+ stats[:gpm] = (stats[:cmd_get] - previous_stats[:cmd_get]) / (tn - tm) * 60
273
+ stats[:spm] = (stats[:cmd_set] - previous_stats[:cmd_set]) / (tn - tm) * 60
274
+ stats[:fpm] = (stats[:cmd_flush] - previous_stats[:cmd_flush]) / (tn - tm) * 60
275
+ stats[:hpm] = (stats[:get_hits] - previous_stats[:get_hits]) / (tn - tm) * 60
276
+ stats[:mpm] = (stats[:get_misses] - previous_stats[:get_misses]) / (tn - tm) * 60
277
+ stats[:epm] = (stats[:evictions] - previous_stats[:evictions]) / (tn - tm) * 60
278
+ if stats[:hpm] + stats[:mpm] > 0
279
+ stats[:hit_ratio] = stats[:hpm] / (stats[:hpm]+stats[:mpm])*100
280
+ stats[:miss_ratio] = stats[:mpm] / (stats[:hpm]+stats[:mpm])*100
281
+ else
282
+ stats[:hit_ratio] = 100
283
+ stats[:miss_ratio] = 0
284
+ end
285
+ end
286
+
287
+ #string_values = [:version]
288
+ #float_values = [:rusage_user, :rusage_system]
289
+
290
+ @int_values.each do |stat|
291
+ debug "recording #{MEMCACHED}/#{hostname_port}/#{stat.to_s} = #{stats[stat]}"
292
+ begin
293
+ stats("#{MEMCACHED}/#{hostname_port}/#{stat.to_s}").record_data_point(stats[stat])
294
+ rescue => e
295
+ debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
296
+ end
297
+ end
298
+ @derived_values.each do |stat|
299
+ debug "recording #{MEMCACHED}/#{hostname_port}/#{stat.to_s} = #{stats[stat]}"
300
+ begin
301
+ stats("#{MEMCACHED}/#{hostname_port}/#{stat.to_s}").record_data_point(stats[stat])
302
+ rescue => e
303
+ debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
304
+ end
305
+ end
306
+ if previous_stats
307
+ @derivatives.each do |stat|
308
+ begin
309
+ value = stats[stat].to_i
310
+ debug "recording #{MEMCACHED}/#{hostname_port}/#{stat.to_s} = #{value}"
311
+ stats("#{MEMCACHED}/#{hostname_port}/#{stat.to_s}").record_data_point(value)
312
+ rescue => e
313
+ puts "Error converting #{stat} value <#{stats[stat]}> to i: #{e.message}"
314
+ puts "stats: #{stats.inspect}"
315
+ debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
316
+ end
317
+ end
318
+ end
319
+
320
+ # float_values.each do |stat|
321
+ # debug "recording #{MEMCACHED}/#{hostname_port}/#{stat.to_s} = #{stats[stat].to_f}"
322
+ # begin
323
+ # stats("#{MEMCACHED}/#{hostname_port}/#{stat.to_s}").record_data_point(stats[stat].to_f)
324
+ # rescue => e
325
+ # debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
326
+ # end
327
+ # end
328
+ debug "Done with record data"
329
+ return stats
330
+ end
331
+
332
+ def stats(s)
333
+ NewRelic::Agent.get_stats_no_scope(s)
334
+ end
335
+
336
+ def debug(msg)
337
+ logger.debug "memcached: #{msg}"
338
+ end
339
+ end
340
+
@@ -4,5 +4,5 @@ module NewRelic::IA::MetricNames
4
4
  DISK_IO = "System/Resource/DiskIO/b"
5
5
  SYSTEM_CPU = "System/CPU/System/percent"
6
6
  USER_CPU = "System/CPU/User/percent"
7
-
8
- end
7
+ MEMCACHED = "System/Memcached"
8
+ end
@@ -2,12 +2,25 @@
2
2
  # This is a configuration file for the RPM Agent, tailored
3
3
  # for use as a system monitor.
4
4
  #
5
+ # <%= generated_for_user %>
6
+ #
5
7
  common: &default_settings
6
- license_key: 'your licene key here'
7
- agent_enabled: true
8
- disable_samplers: true
8
+ log_level: info
9
+ license_key: '<%= license_key %>'
10
+
9
11
  app_name: System Monitor
10
12
  ssl: false
13
+
14
+ # Set the array of nodes for the memcache monitor
15
+ memcached_nodes:
16
+ - localhost:11211
17
+ # - localhost:11212
18
+ # - localhost:11213
19
+
20
+ # These settings ensure we don't end up actually monitoring
21
+ # the IA agent itself--we aren't really interested in that.
22
+ # Don't change these.
23
+ disable_samplers: true
11
24
  capture_params: false
12
25
  transaction_tracer:
13
26
  enabled: false
@@ -16,11 +29,7 @@ common: &default_settings
16
29
  # provide newrelic conifguration settings for these enviromnents here.
17
30
  production:
18
31
  <<: *default_settings
19
- enabled: true
20
32
 
21
33
  development:
22
34
  <<: *default_settings
23
- # turn off communication to RPM service in development mode.
24
- # NOTE: for initial evaluation purposes, you may want to temporarily turn
25
- # the agent on in development mode.
26
- enabled: false
35
+
@@ -0,0 +1,5 @@
1
+ module NewRelic
2
+ module IA
3
+ VERSION = '0.2.0'
4
+ end
5
+ end
@@ -1,15 +1,12 @@
1
- $:.unshift(File.dirname(__FILE__)) unless
1
+ # Make sure the code is on the path, and load the version file.
2
+
3
+ $:.unshift(File.expand_path(File.dirname(__FILE__))) unless
2
4
  $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
5
 
4
- require 'rubygems'
5
- gem 'newrelic_rpm'
6
6
  # You can select different newrelic.yml sections by setting the
7
7
  # RUBY_ENV environment variable, similar to RAILS_ENV (which is also checked).
8
8
  # Default is 'monitor'
9
9
  # ENV['RUBY_ENV'] = 'production'
10
10
 
11
- module NewRelic
12
- module IA
13
- VERSION = '0.1.0'
14
- end
15
- end
11
+ require 'new_relic/ia/version.rb'
12
+
@@ -2,6 +2,7 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
2
  require 'new_relic/ia/cli'
3
3
  require 'new_relic/ia/iostat_reader'
4
4
  require 'new_relic/ia/disk_sampler'
5
+ require 'new_relic/ia/memcached_sampler'
5
6
  describe NewRelic::IA::CLI, "execute" do
6
7
  before(:each) do
7
8
  @stdout_io = StringIO.new
@@ -27,6 +28,11 @@ describe NewRelic::IA::CLI, "execute" do
27
28
  stat = NewRelic::IA::CLI.execute(@stdout_io, [ "disk"])
28
29
  stat.should == nil
29
30
  end
31
+ it "should start memcached" do
32
+ NewRelic::Agent::StatsEngine.any_instance.expects(:add_harvest_sampler)
33
+ stat = NewRelic::IA::CLI.execute(@stdout_io, [ "memcached"])
34
+ stat.should == nil
35
+ end
30
36
  it "should override the env" do
31
37
  stat = NewRelic::IA::CLI.execute(@stdout_io, [ "disk", "-e", "production"])
32
38
  stat.should == nil
@@ -0,0 +1,24 @@
1
+ STAT pid 14109
2
+ STAT uptime 5616278
3
+ STAT time 1270358085
4
+ STAT version 1.2.6
5
+ STAT pointer_size 64
6
+ STAT rusage_user 2311.216442
7
+ STAT rusage_system 6675.001161
8
+ STAT curr_items 643807
9
+ STAT total_items 651490771
10
+ STAT bytes 2896024756
11
+ STAT curr_connections 47
12
+ STAT total_connections 2449591
13
+ STAT connection_structures 1878
14
+ STAT cmd_get 732073179
15
+ STAT cmd_set 672331163
16
+ STAT get_hits 686458260
17
+ STAT get_misses 45614919
18
+ STAT evictions 25291211
19
+ STAT bytes_read 2349066138990
20
+ STAT bytes_written 6488057125567
21
+ STAT limit_maxbytes 3221225472
22
+ STAT threads 4
23
+ END
24
+