newrelic_ia 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,340 @@
1
+ require 'new_relic/ia/metric_names'
2
+ require 'socket'
3
+ #require 'active_support'
4
+
5
+ # Memcached stats sampler
6
+ # An IGN Hackday project
7
+
8
+ class NewRelic::IA::MemcachedSampler < NewRelic::Agent::Sampler
9
+
10
+ include NewRelic::IA::MetricNames
11
+ case RUBY_PLATFORM
12
+ when /darwin/
13
+ # Do some special stuff...
14
+ when /linux/
15
+ # Do some special stuff...
16
+ else
17
+ NewRelic::IA::CLI.log.warn "unsupported platform #{RUBY_PLATFORM}"
18
+ end
19
+
20
+ def initialize
21
+ super 'memcached'
22
+ @int_values = [ :uptime, :curr_items, :total_items, :bytes, :curr_connections, :total_connections, :connection_structures,
23
+ :cmd_flush, :cmd_get, :cmd_set, :get_hits, :get_misses, :evictions, :bytes_read, :bytes_written, :limit_maxbytes, :threads]
24
+ @derived_values = [ :free_bytes]
25
+ @derivatives = [:hit_ratio, :miss_ratio, :rpm, :gpm, :hpm, :mpm, :spm, :fpm, :epm]
26
+
27
+ @last_stats = Hash.new
28
+ @memcached_nodes = parse_config
29
+ end
30
+
31
+ def parse_config
32
+ # file with a list of mecached nodes. each line have hostname:port
33
+ memcached_nodes = NewRelic::Control.instance['memcached_nodes']
34
+ if !memcached_nodes.is_a? Array || memcached_nodes.empty?
35
+ raise NewRelic::IA::InitError, "No memcache_nodes array found in newrelic.yml."
36
+ end
37
+ memcached_nodes
38
+ end
39
+
40
+ def memcached_nodes
41
+ @memcached_nodes
42
+ end
43
+
44
+ # Sanity check, make sure the servers are there.
45
+ def check
46
+ down_servers = []
47
+ memcached_nodes.each do | hostname_port |
48
+ stats_text = issue_stats hostname_port
49
+ down_servers << hostname_port unless stats_text
50
+ end
51
+ raise NewRelic::Agent::Sampler::Unsupported, "Servers not available: #{down_servers.join(", ")}" unless down_servers.empty?
52
+ end
53
+
54
+ # This gets called once a minute in the agent worker thread. It
55
+ # pings each host in the array 'memcached_nodes'
56
+ def poll
57
+ unless memcached_nodes.empty?
58
+ memcached_nodes.each do | hostname_port |
59
+ stats_text = issue_stats hostname_port
60
+ if stats_text
61
+ @last_stats[hostname_port] = parse_and_report_stats hostname_port, stats_text
62
+ else
63
+ @last_stats[hostname_port] = nil #{}
64
+ end
65
+ end
66
+
67
+ aggregate_stats
68
+ debug "done with aggs"
69
+ end
70
+ end
71
+
72
+ def logger
73
+ NewRelic::IA::CLI.log
74
+ end
75
+
76
+ def aggregate_stats
77
+ begin
78
+
79
+ aggs_stats = Hash.new
80
+ @int_values.each {|metric| aggs_stats[metric] = 0}
81
+ @derived_values.each {|metric| aggs_stats[metric] = 0}
82
+
83
+ @derivatives[0,2].each {|metric| aggs_stats[metric] = 0.0}
84
+ @derivatives[2,@derivatives.length - 2].each {|metric| aggs_stats[metric] = 0}
85
+
86
+ aggs_count = 0
87
+ @last_stats.each_value do |v|
88
+ @int_values.each do |metric|
89
+ aggs_stats[metric] += (v[metric] || 0)
90
+ end
91
+ @derived_values.each do |metric|
92
+ aggs_stats[metric] += (v[metric] || 0)
93
+ end
94
+ if v[:hit_ratio] && v[:miss_ratio]
95
+ @derivatives[0,2].each do |metric|
96
+ aggs_stats[metric] += v[metric]
97
+ end
98
+ aggs_count += 1
99
+
100
+ @derivatives[2,@derivatives.length - 2].each do |metric|
101
+ aggs_stats[metric] += v[metric]
102
+ end
103
+ end
104
+ end
105
+ if aggs_count > 0
106
+ aggs_stats[:hit_ratio] = aggs_stats[:hit_ratio] /aggs_count
107
+ aggs_stats[:miss_ratio] = aggs_stats[:miss_ratio] /aggs_count
108
+ end
109
+
110
+ if aggs_stats[:uptime] > 0
111
+ @int_values.each do |stat|
112
+ debug "recording #{MEMCACHED}/all/#{stat.to_s} = #{aggs_stats[stat]}"
113
+ begin
114
+ stats("#{MEMCACHED}/all/#{stat.to_s}").record_data_point(aggs_stats[stat])
115
+ rescue => e
116
+ debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
117
+ end
118
+ end
119
+
120
+ @derived_values.each do |stat|
121
+ debug "recording #{MEMCACHED}/all/#{stat.to_s} = #{aggs_stats[stat]}"
122
+ begin
123
+ stats("#{MEMCACHED}/all/#{stat.to_s}").record_data_point(aggs_stats[stat])
124
+ rescue => e
125
+ debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
126
+ end
127
+ end
128
+ if aggs_count > 0
129
+ @derivatives.each do |stat|
130
+ debug "recording #{MEMCACHED}/all/#{stat.to_s} = #{aggs_stats[stat].to_i}"
131
+ begin
132
+ stats("#{MEMCACHED}/all/#{stat.to_s}").record_data_point(aggs_stats[stat].to_i)
133
+ rescue => e
134
+ debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
135
+ end
136
+ end
137
+ end
138
+ else
139
+ debug "skipping aggregates since aggregate uptime is zero"
140
+ end
141
+ rescue => e
142
+ debug "Could not record stat: stats\n #{e.backtrace.join("\n")}"
143
+ end
144
+ end
145
+
146
+
147
+ #TODO send stats for down nodes
148
+ def issue_stats(hostname_port)
149
+ debug "get stats from hostname #{hostname_port}"
150
+ begin
151
+ split = hostname_port.split(':', 2)
152
+ hostname = split.first
153
+ port = split.last
154
+
155
+ socket = TCPSocket.open(hostname, port)
156
+ socket.send("stats\r\n", 0)
157
+
158
+ # TODO UDP or use memcached gem to use udp first and fallback to tcp
159
+ # socket = UDPSocket.open
160
+ # socket.connect(@host, @port)
161
+ # socket.send("stats\r\n", 0, 'localhost', '11211')
162
+
163
+ statistics = ""
164
+ loop do
165
+ data = socket.recv(4096)
166
+ if !data || data.length == 0
167
+ break
168
+ end
169
+ statistics << data
170
+ end_index = statistics =~ /\s+END\s+$/
171
+ if end_index
172
+ return statistics[0, end_index]
173
+ end
174
+ end
175
+ rescue IOError, SystemCallError => e
176
+ NewRelic::IA::CLI.log.warn "memcached: unable to connect to memcached node at #{hostname_port}: #{e.message}"
177
+ logger.info "memcached: unable to connect to memcached node at #{hostname_port}"
178
+ logger.error "memcached: #{e.message}"
179
+ debug e.backtrace.join("\n")
180
+ ensure
181
+ socket.close if socket rescue nil
182
+ end
183
+ nil
184
+ end
185
+
186
+ def parse_stats(hostname_port, stats_text)
187
+ end_index = stats_text =~ /\s+END\s+$/
188
+ stats_text = stats_text[0, end_index] if end_index
189
+ sss = stats_text.split(/\s+/)
190
+ if sss.size % 3 != 0
191
+ logger.error "memcached: unexcpected stats output from #{hostname_port}: #{stats_text}"
192
+ break
193
+ end
194
+ triplets = []
195
+ while sss.any? do
196
+ triplets << [ sss.shift, sss.shift, sss.shift]
197
+ end
198
+ stats = Hash.new
199
+ triplets.each do |triplet|
200
+ debug "#{triplet[1].to_sym} = #{triplet[2]}"
201
+ stats[triplet[1].to_sym] = triplet[2]
202
+ end
203
+ return stats
204
+ end
205
+
206
+ def parse_and_report_stats(hostname_port, stats_text)
207
+ # pid = 21355
208
+ # uptime = 2089
209
+ # time = 1264673782
210
+ # version = 1.2.8
211
+ # pointer_size = 64
212
+ # rusage_user = 0.020996
213
+ # rusage_system = 0.020996
214
+ # curr_items = 277
215
+ # total_items = 356
216
+ # bytes = 544955
217
+ # curr_connections = 14
218
+ # total_connections = 15
219
+ # connection_structures = 15
220
+ # cmd_flush = 0
221
+ # cmd_get = 549
222
+ # cmd_set = 356
223
+ # get_hits = 185
224
+ # get_misses = 364
225
+ # evictions = 0
226
+ # bytes_read = 703195
227
+ # bytes_written = 344345
228
+ # limit_maxbytes = 1048576000
229
+ # threads = 5
230
+ # accepting_conns = 1
231
+ # listen_disabled_num = 0
232
+
233
+
234
+ # average_value
235
+ # * Active Connections - free
236
+ # * Current items
237
+ # * evictions
238
+ # * Total Size (memcache stat: limit_maxbytes)
239
+ # * Used size (memcache stat: bytes)
240
+ #
241
+ # need to compute during collection
242
+ # * Hit Ratio (%)
243
+ # * Requests per interval
244
+ # * Hits per interval
245
+ # * Misses per interval
246
+ # * Sets per interval
247
+ # * Free size (memcache stat: limit_maxbytes - bytes)
248
+ #
249
+ # Also send all stats.
250
+ #
251
+ #
252
+ stats = parse_stats(hostname_port, stats_text)
253
+
254
+ #we store ints in the hash
255
+ @int_values.each do |stat|
256
+ stats[stat] = stats[stat].to_i
257
+ end
258
+ #time is not shipped to collector but we add it for derivative calculations
259
+ stats[:time] = Time.at stats[:time].to_i
260
+
261
+ stats[:free_bytes] = stats[:limit_maxbytes] - stats[:bytes]
262
+
263
+ previous_stats = @last_stats[hostname_port]
264
+ if previous_stats
265
+ tn = stats[:time]
266
+ tm = previous_stats[:time]
267
+ previous_r = previous_stats[:cmd_get] + previous_stats[:cmd_set]+ previous_stats[:cmd_flush]
268
+ current_r = stats[:cmd_get] + stats[:cmd_set]+ stats[:cmd_flush]
269
+
270
+ #unit per minute
271
+ stats[:rpm] = (current_r - previous_r) / (tn - tm) * 60
272
+ stats[:gpm] = (stats[:cmd_get] - previous_stats[:cmd_get]) / (tn - tm) * 60
273
+ stats[:spm] = (stats[:cmd_set] - previous_stats[:cmd_set]) / (tn - tm) * 60
274
+ stats[:fpm] = (stats[:cmd_flush] - previous_stats[:cmd_flush]) / (tn - tm) * 60
275
+ stats[:hpm] = (stats[:get_hits] - previous_stats[:get_hits]) / (tn - tm) * 60
276
+ stats[:mpm] = (stats[:get_misses] - previous_stats[:get_misses]) / (tn - tm) * 60
277
+ stats[:epm] = (stats[:evictions] - previous_stats[:evictions]) / (tn - tm) * 60
278
+ if stats[:hpm] + stats[:mpm] > 0
279
+ stats[:hit_ratio] = stats[:hpm] / (stats[:hpm]+stats[:mpm])*100
280
+ stats[:miss_ratio] = stats[:mpm] / (stats[:hpm]+stats[:mpm])*100
281
+ else
282
+ stats[:hit_ratio] = 100
283
+ stats[:miss_ratio] = 0
284
+ end
285
+ end
286
+
287
+ #string_values = [:version]
288
+ #float_values = [:rusage_user, :rusage_system]
289
+
290
+ @int_values.each do |stat|
291
+ debug "recording #{MEMCACHED}/#{hostname_port}/#{stat.to_s} = #{stats[stat]}"
292
+ begin
293
+ stats("#{MEMCACHED}/#{hostname_port}/#{stat.to_s}").record_data_point(stats[stat])
294
+ rescue => e
295
+ debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
296
+ end
297
+ end
298
+ @derived_values.each do |stat|
299
+ debug "recording #{MEMCACHED}/#{hostname_port}/#{stat.to_s} = #{stats[stat]}"
300
+ begin
301
+ stats("#{MEMCACHED}/#{hostname_port}/#{stat.to_s}").record_data_point(stats[stat])
302
+ rescue => e
303
+ debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
304
+ end
305
+ end
306
+ if previous_stats
307
+ @derivatives.each do |stat|
308
+ begin
309
+ value = stats[stat].to_i
310
+ debug "recording #{MEMCACHED}/#{hostname_port}/#{stat.to_s} = #{value}"
311
+ stats("#{MEMCACHED}/#{hostname_port}/#{stat.to_s}").record_data_point(value)
312
+ rescue => e
313
+ puts "Error converting #{stat} value <#{stats[stat]}> to i: #{e.message}"
314
+ puts "stats: #{stats.inspect}"
315
+ debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
316
+ end
317
+ end
318
+ end
319
+
320
+ # float_values.each do |stat|
321
+ # debug "recording #{MEMCACHED}/#{hostname_port}/#{stat.to_s} = #{stats[stat].to_f}"
322
+ # begin
323
+ # stats("#{MEMCACHED}/#{hostname_port}/#{stat.to_s}").record_data_point(stats[stat].to_f)
324
+ # rescue => e
325
+ # debug "Could not record stat: #{stat}\n #{e.backtrace.join("\n")}"
326
+ # end
327
+ # end
328
+ debug "Done with record data"
329
+ return stats
330
+ end
331
+
332
+ def stats(s)
333
+ NewRelic::Agent.get_stats_no_scope(s)
334
+ end
335
+
336
+ def debug(msg)
337
+ logger.debug "memcached: #{msg}"
338
+ end
339
+ end
340
+
@@ -4,5 +4,5 @@ module NewRelic::IA::MetricNames
4
4
  DISK_IO = "System/Resource/DiskIO/b"
5
5
  SYSTEM_CPU = "System/CPU/System/percent"
6
6
  USER_CPU = "System/CPU/User/percent"
7
-
8
- end
7
+ MEMCACHED = "System/Memcached"
8
+ end
@@ -2,12 +2,25 @@
2
2
  # This is a configuration file for the RPM Agent, tailored
3
3
  # for use as a system monitor.
4
4
  #
5
+ # <%= generated_for_user %>
6
+ #
5
7
  common: &default_settings
6
- license_key: 'your licene key here'
7
- agent_enabled: true
8
- disable_samplers: true
8
+ log_level: info
9
+ license_key: '<%= license_key %>'
10
+
9
11
  app_name: System Monitor
10
12
  ssl: false
13
+
14
+ # Set the array of nodes for the memcache monitor
15
+ memcached_nodes:
16
+ - localhost:11211
17
+ # - localhost:11212
18
+ # - localhost:11213
19
+
20
+ # These settings ensure we don't end up actually monitoring
21
+ # the IA agent itself--we aren't really interested in that.
22
+ # Don't change these.
23
+ disable_samplers: true
11
24
  capture_params: false
12
25
  transaction_tracer:
13
26
  enabled: false
@@ -16,11 +29,7 @@ common: &default_settings
16
29
  # provide newrelic conifguration settings for these enviromnents here.
17
30
  production:
18
31
  <<: *default_settings
19
- enabled: true
20
32
 
21
33
  development:
22
34
  <<: *default_settings
23
- # turn off communication to RPM service in development mode.
24
- # NOTE: for initial evaluation purposes, you may want to temporarily turn
25
- # the agent on in development mode.
26
- enabled: false
35
+
@@ -0,0 +1,5 @@
1
+ module NewRelic
2
+ module IA
3
+ VERSION = '0.2.0'
4
+ end
5
+ end
@@ -1,15 +1,12 @@
1
- $:.unshift(File.dirname(__FILE__)) unless
1
+ # Make sure the code is on the path, and load the version file.
2
+
3
+ $:.unshift(File.expand_path(File.dirname(__FILE__))) unless
2
4
  $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
5
 
4
- require 'rubygems'
5
- gem 'newrelic_rpm'
6
6
  # You can select different newrelic.yml sections by setting the
7
7
  # RUBY_ENV environment variable, similar to RAILS_ENV (which is also checked).
8
8
  # Default is 'monitor'
9
9
  # ENV['RUBY_ENV'] = 'production'
10
10
 
11
- module NewRelic
12
- module IA
13
- VERSION = '0.1.0'
14
- end
15
- end
11
+ require 'new_relic/ia/version.rb'
12
+
@@ -2,6 +2,7 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
2
  require 'new_relic/ia/cli'
3
3
  require 'new_relic/ia/iostat_reader'
4
4
  require 'new_relic/ia/disk_sampler'
5
+ require 'new_relic/ia/memcached_sampler'
5
6
  describe NewRelic::IA::CLI, "execute" do
6
7
  before(:each) do
7
8
  @stdout_io = StringIO.new
@@ -27,6 +28,11 @@ describe NewRelic::IA::CLI, "execute" do
27
28
  stat = NewRelic::IA::CLI.execute(@stdout_io, [ "disk"])
28
29
  stat.should == nil
29
30
  end
31
+ it "should start memcached" do
32
+ NewRelic::Agent::StatsEngine.any_instance.expects(:add_harvest_sampler)
33
+ stat = NewRelic::IA::CLI.execute(@stdout_io, [ "memcached"])
34
+ stat.should == nil
35
+ end
30
36
  it "should override the env" do
31
37
  stat = NewRelic::IA::CLI.execute(@stdout_io, [ "disk", "-e", "production"])
32
38
  stat.should == nil
@@ -0,0 +1,24 @@
1
+ STAT pid 14109
2
+ STAT uptime 5616278
3
+ STAT time 1270358085
4
+ STAT version 1.2.6
5
+ STAT pointer_size 64
6
+ STAT rusage_user 2311.216442
7
+ STAT rusage_system 6675.001161
8
+ STAT curr_items 643807
9
+ STAT total_items 651490771
10
+ STAT bytes 2896024756
11
+ STAT curr_connections 47
12
+ STAT total_connections 2449591
13
+ STAT connection_structures 1878
14
+ STAT cmd_get 732073179
15
+ STAT cmd_set 672331163
16
+ STAT get_hits 686458260
17
+ STAT get_misses 45614919
18
+ STAT evictions 25291211
19
+ STAT bytes_read 2349066138990
20
+ STAT bytes_written 6488057125567
21
+ STAT limit_maxbytes 3221225472
22
+ STAT threads 4
23
+ END
24
+