techarch-newrelic_rpm 2.10.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +335 -0
- data/LICENSE +37 -0
- data/Manifest +159 -0
- data/README.md +138 -0
- data/Rakefile +22 -0
- data/bin/mongrel_rpm +33 -0
- data/bin/newrelic_cmd +4 -0
- data/cert/cacert.pem +34 -0
- data/init.rb +38 -0
- data/install.rb +45 -0
- data/lib/new_relic/agent.rb +281 -0
- data/lib/new_relic/agent/agent.rb +636 -0
- data/lib/new_relic/agent/chained_call.rb +13 -0
- data/lib/new_relic/agent/collection_helper.rb +66 -0
- data/lib/new_relic/agent/error_collector.rb +121 -0
- data/lib/new_relic/agent/instrumentation/active_merchant.rb +18 -0
- data/lib/new_relic/agent/instrumentation/active_record_instrumentation.rb +88 -0
- data/lib/new_relic/agent/instrumentation/authlogic.rb +8 -0
- data/lib/new_relic/agent/instrumentation/controller_instrumentation.rb +437 -0
- data/lib/new_relic/agent/instrumentation/data_mapper.rb +90 -0
- data/lib/new_relic/agent/instrumentation/dispatcher_instrumentation.rb +167 -0
- data/lib/new_relic/agent/instrumentation/memcache.rb +24 -0
- data/lib/new_relic/agent/instrumentation/merb/controller.rb +26 -0
- data/lib/new_relic/agent/instrumentation/merb/dispatcher.rb +13 -0
- data/lib/new_relic/agent/instrumentation/merb/errors.rb +8 -0
- data/lib/new_relic/agent/instrumentation/net.rb +23 -0
- data/lib/new_relic/agent/instrumentation/passenger_instrumentation.rb +20 -0
- data/lib/new_relic/agent/instrumentation/rack.rb +108 -0
- data/lib/new_relic/agent/instrumentation/rails/action_controller.rb +59 -0
- data/lib/new_relic/agent/instrumentation/rails/action_web_service.rb +27 -0
- data/lib/new_relic/agent/instrumentation/rails/dispatcher.rb +41 -0
- data/lib/new_relic/agent/instrumentation/rails/errors.rb +27 -0
- data/lib/new_relic/agent/instrumentation/sinatra.rb +39 -0
- data/lib/new_relic/agent/method_tracer.rb +349 -0
- data/lib/new_relic/agent/patch_const_missing.rb +125 -0
- data/lib/new_relic/agent/sampler.rb +12 -0
- data/lib/new_relic/agent/samplers/cpu_sampler.rb +49 -0
- data/lib/new_relic/agent/samplers/memory_sampler.rb +138 -0
- data/lib/new_relic/agent/samplers/mongrel_sampler.rb +22 -0
- data/lib/new_relic/agent/shim_agent.rb +21 -0
- data/lib/new_relic/agent/stats_engine.rb +22 -0
- data/lib/new_relic/agent/stats_engine/metric_stats.rb +111 -0
- data/lib/new_relic/agent/stats_engine/samplers.rb +71 -0
- data/lib/new_relic/agent/stats_engine/transactions.rb +152 -0
- data/lib/new_relic/agent/transaction_sampler.rb +310 -0
- data/lib/new_relic/agent/worker_loop.rb +118 -0
- data/lib/new_relic/commands/deployments.rb +145 -0
- data/lib/new_relic/commands/new_relic_commands.rb +30 -0
- data/lib/new_relic/control.rb +473 -0
- data/lib/new_relic/control/external.rb +13 -0
- data/lib/new_relic/control/merb.rb +22 -0
- data/lib/new_relic/control/rails.rb +145 -0
- data/lib/new_relic/control/ruby.rb +36 -0
- data/lib/new_relic/control/sinatra.rb +14 -0
- data/lib/new_relic/histogram.rb +89 -0
- data/lib/new_relic/local_environment.rb +328 -0
- data/lib/new_relic/merbtasks.rb +6 -0
- data/lib/new_relic/metric_data.rb +42 -0
- data/lib/new_relic/metric_parser.rb +124 -0
- data/lib/new_relic/metric_parser/action_mailer.rb +9 -0
- data/lib/new_relic/metric_parser/active_merchant.rb +26 -0
- data/lib/new_relic/metric_parser/active_record.rb +25 -0
- data/lib/new_relic/metric_parser/controller.rb +54 -0
- data/lib/new_relic/metric_parser/controller_cpu.rb +38 -0
- data/lib/new_relic/metric_parser/errors.rb +6 -0
- data/lib/new_relic/metric_parser/external.rb +50 -0
- data/lib/new_relic/metric_parser/mem_cache.rb +12 -0
- data/lib/new_relic/metric_parser/view.rb +61 -0
- data/lib/new_relic/metric_parser/web_frontend.rb +14 -0
- data/lib/new_relic/metric_parser/web_service.rb +9 -0
- data/lib/new_relic/metric_spec.rb +52 -0
- data/lib/new_relic/metrics.rb +7 -0
- data/lib/new_relic/noticed_error.rb +23 -0
- data/lib/new_relic/rack/metric_app.rb +56 -0
- data/lib/new_relic/rack/newrelic.ru +25 -0
- data/lib/new_relic/rack/newrelic.yml +25 -0
- data/lib/new_relic/rack_app.rb +5 -0
- data/lib/new_relic/recipes.rb +82 -0
- data/lib/new_relic/stats.rb +361 -0
- data/lib/new_relic/transaction_analysis.rb +121 -0
- data/lib/new_relic/transaction_sample.rb +666 -0
- data/lib/new_relic/version.rb +54 -0
- data/lib/new_relic_api.rb +313 -0
- data/lib/newrelic_rpm.rb +40 -0
- data/lib/tasks/all.rb +4 -0
- data/lib/tasks/install.rake +7 -0
- data/lib/tasks/tests.rake +13 -0
- data/newrelic.yml +227 -0
- data/recipes/newrelic.rb +6 -0
- data/techarch-newrelic_rpm.gemspec +32 -0
- data/test/active_record_fixtures.rb +55 -0
- data/test/config/newrelic.yml +46 -0
- data/test/config/test_control.rb +39 -0
- data/test/new_relic/agent/active_record_instrumentation_test.rb +264 -0
- data/test/new_relic/agent/agent_controller_test.rb +107 -0
- data/test/new_relic/agent/agent_test.rb +119 -0
- data/test/new_relic/agent/agent_test_controller.rb +44 -0
- data/test/new_relic/agent/classloader_patch_test.rb +56 -0
- data/test/new_relic/agent/collection_helper_test.rb +125 -0
- data/test/new_relic/agent/dispatcher_instrumentation_test.rb +76 -0
- data/test/new_relic/agent/error_collector_test.rb +172 -0
- data/test/new_relic/agent/method_tracer_test.rb +340 -0
- data/test/new_relic/agent/metric_data_test.rb +56 -0
- data/test/new_relic/agent/mock_ar_connection.rb +40 -0
- data/test/new_relic/agent/mock_scope_listener.rb +23 -0
- data/test/new_relic/agent/net_instrumentation_test.rb +63 -0
- data/test/new_relic/agent/stats_engine/metric_stats_test.rb +79 -0
- data/test/new_relic/agent/stats_engine/samplers_test.rb +81 -0
- data/test/new_relic/agent/stats_engine/stats_engine_test.rb +184 -0
- data/test/new_relic/agent/task_instrumentation_test.rb +126 -0
- data/test/new_relic/agent/testable_agent.rb +13 -0
- data/test/new_relic/agent/transaction_sample_builder_test.rb +195 -0
- data/test/new_relic/agent/transaction_sample_test.rb +186 -0
- data/test/new_relic/agent/transaction_sampler_test.rb +385 -0
- data/test/new_relic/agent/worker_loop_test.rb +103 -0
- data/test/new_relic/control_test.rb +113 -0
- data/test/new_relic/deployments_api_test.rb +68 -0
- data/test/new_relic/environment_test.rb +75 -0
- data/test/new_relic/metric_parser_test.rb +172 -0
- data/test/new_relic/metric_spec_test.rb +177 -0
- data/test/new_relic/shim_agent_test.rb +9 -0
- data/test/new_relic/stats_test.rb +291 -0
- data/test/new_relic/version_number_test.rb +74 -0
- data/test/test_helper.rb +38 -0
- data/test/ui/newrelic_controller_test.rb +14 -0
- data/test/ui/newrelic_helper_test.rb +53 -0
- data/ui/controllers/newrelic_controller.rb +220 -0
- data/ui/helpers/google_pie_chart.rb +55 -0
- data/ui/helpers/newrelic_helper.rb +317 -0
- data/ui/views/layouts/newrelic_default.rhtml +47 -0
- data/ui/views/newrelic/_explain_plans.rhtml +27 -0
- data/ui/views/newrelic/_sample.rhtml +19 -0
- data/ui/views/newrelic/_segment.rhtml +28 -0
- data/ui/views/newrelic/_segment_limit_message.rhtml +1 -0
- data/ui/views/newrelic/_segment_row.rhtml +14 -0
- data/ui/views/newrelic/_show_sample_detail.rhtml +24 -0
- data/ui/views/newrelic/_show_sample_sql.rhtml +20 -0
- data/ui/views/newrelic/_show_sample_summary.rhtml +3 -0
- data/ui/views/newrelic/_sql_row.rhtml +11 -0
- data/ui/views/newrelic/_stack_trace.rhtml +30 -0
- data/ui/views/newrelic/_table.rhtml +12 -0
- data/ui/views/newrelic/explain_sql.rhtml +42 -0
- data/ui/views/newrelic/images/arrow-close.png +0 -0
- data/ui/views/newrelic/images/arrow-open.png +0 -0
- data/ui/views/newrelic/images/blue_bar.gif +0 -0
- data/ui/views/newrelic/images/file_icon.png +0 -0
- data/ui/views/newrelic/images/gray_bar.gif +0 -0
- data/ui/views/newrelic/images/new_relic_rpm_desktop.gif +0 -0
- data/ui/views/newrelic/images/textmate.png +0 -0
- data/ui/views/newrelic/index.rhtml +57 -0
- data/ui/views/newrelic/javascript/prototype-scriptaculous.js +7288 -0
- data/ui/views/newrelic/javascript/transaction_sample.js +107 -0
- data/ui/views/newrelic/sample_not_found.rhtml +2 -0
- data/ui/views/newrelic/show_sample.rhtml +80 -0
- data/ui/views/newrelic/show_source.rhtml +3 -0
- data/ui/views/newrelic/stylesheets/style.css +484 -0
- data/ui/views/newrelic/threads.rhtml +52 -0
- metadata +330 -0
@@ -0,0 +1,636 @@
|
|
1
|
+
require 'socket'
|
2
|
+
require 'net/https'
|
3
|
+
require 'net/http'
|
4
|
+
require 'logger'
|
5
|
+
require 'zlib'
|
6
|
+
require 'stringio'
|
7
|
+
|
8
|
+
# The NewRelic Agent collects performance data from ruby applications
|
9
|
+
# in realtime as the application runs, and periodically sends that
|
10
|
+
# data to the NewRelic server.
|
11
|
+
module NewRelic::Agent
|
12
|
+
|
13
|
+
# The Agent is a singleton that is instantiated when the plugin is
|
14
|
+
# activated.
|
15
|
+
class Agent
|
16
|
+
|
17
|
+
# Specifies the version of the agent's communication protocol with
|
18
|
+
# the NewRelic hosted site.
|
19
|
+
|
20
|
+
PROTOCOL_VERSION = 6
|
21
|
+
|
22
|
+
attr_reader :obfuscator
|
23
|
+
attr_reader :stats_engine
|
24
|
+
attr_reader :transaction_sampler
|
25
|
+
attr_reader :error_collector
|
26
|
+
attr_reader :task_loop
|
27
|
+
attr_reader :record_sql
|
28
|
+
attr_reader :histogram
|
29
|
+
|
30
|
+
# Should only be called by NewRelic::Control
|
31
|
+
def self.instance
|
32
|
+
@instance ||= self.new
|
33
|
+
end
|
34
|
+
# This method is deprecated. Use NewRelic::Agent.manual_start
|
35
|
+
def manual_start(ignored=nil, also_ignored=nil)
|
36
|
+
raise "This method no longer supported. Instead use the class method NewRelic::Agent.manual_start"
|
37
|
+
end
|
38
|
+
|
39
|
+
# this method makes sure that the agent is running. it's important
|
40
|
+
# for passenger where processes are forked and the agent is
|
41
|
+
# dormant
|
42
|
+
#
|
43
|
+
def ensure_worker_thread_started
|
44
|
+
return unless control.agent_enabled? && control.monitor_mode? && !@invalid_license
|
45
|
+
if !running?
|
46
|
+
log.info "Detected that the worker loop is not running. Restarting."
|
47
|
+
# Assume we've been forked, clear out stats that are left over from parent process
|
48
|
+
@stats_engine.reset_stats
|
49
|
+
launch_worker_thread
|
50
|
+
@stats_engine.spawn_sampler_thread
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# True if the worker thread has been started. Doesn't necessarily
|
55
|
+
# mean we are connected
|
56
|
+
def running?
|
57
|
+
control.agent_enabled? && control.monitor_mode? && @task_loop && @task_loop.pid == $$
|
58
|
+
end
|
59
|
+
|
60
|
+
# True if we have initialized and completed 'start'
|
61
|
+
def started?
|
62
|
+
@started
|
63
|
+
end
|
64
|
+
|
65
|
+
# Attempt a graceful shutdown of the agent.
|
66
|
+
def shutdown
|
67
|
+
return if not started?
|
68
|
+
if @task_loop
|
69
|
+
@task_loop.stop
|
70
|
+
|
71
|
+
log.debug "Starting Agent shutdown"
|
72
|
+
|
73
|
+
# if litespeed, then ignore all future SIGUSR1 - it's
|
74
|
+
# litespeed trying to shut us down
|
75
|
+
|
76
|
+
if control.dispatcher == :litespeed
|
77
|
+
Signal.trap("SIGUSR1", "IGNORE")
|
78
|
+
Signal.trap("SIGTERM", "IGNORE")
|
79
|
+
end
|
80
|
+
|
81
|
+
begin
|
82
|
+
graceful_disconnect
|
83
|
+
rescue => e
|
84
|
+
log.error e
|
85
|
+
log.error e.backtrace.join("\n")
|
86
|
+
end
|
87
|
+
end
|
88
|
+
@started = nil
|
89
|
+
end
|
90
|
+
|
91
|
+
def start_transaction
|
92
|
+
Thread::current[:custom_params] = nil
|
93
|
+
@stats_engine.start_transaction
|
94
|
+
end
|
95
|
+
|
96
|
+
def end_transaction
|
97
|
+
Thread::current[:custom_params] = nil
|
98
|
+
@stats_engine.end_transaction
|
99
|
+
end
|
100
|
+
|
101
|
+
def set_record_sql(should_record)
|
102
|
+
prev = Thread::current[:record_sql]
|
103
|
+
Thread::current[:record_sql] = should_record
|
104
|
+
prev.nil? || prev
|
105
|
+
end
|
106
|
+
|
107
|
+
def set_record_tt(should_record)
|
108
|
+
prev = Thread::current[:record_tt]
|
109
|
+
Thread::current[:record_tt] = should_record
|
110
|
+
prev.nil? || prev
|
111
|
+
end
|
112
|
+
# Push flag indicating whether we should be tracing in this
|
113
|
+
# thread.
|
114
|
+
def push_trace_execution_flag(should_trace=false)
|
115
|
+
(Thread.current[:newrelic_untraced] ||= []) << should_trace
|
116
|
+
end
|
117
|
+
|
118
|
+
# Pop the current trace execution status. Restore trace execution status
|
119
|
+
# to what it was before we pushed the current flag.
|
120
|
+
def pop_trace_execution_flag
|
121
|
+
Thread.current[:newrelic_untraced].pop if Thread.current[:newrelic_untraced]
|
122
|
+
end
|
123
|
+
|
124
|
+
def add_custom_parameters(params)
|
125
|
+
p = Thread::current[:custom_params] || (Thread::current[:custom_params] = {})
|
126
|
+
|
127
|
+
p.merge!(params)
|
128
|
+
end
|
129
|
+
|
130
|
+
def custom_params
|
131
|
+
Thread::current[:custom_params] || {}
|
132
|
+
end
|
133
|
+
|
134
|
+
def set_sql_obfuscator(type, &block)
|
135
|
+
if type == :before
|
136
|
+
@obfuscator = NewRelic::ChainedCall.new(block, @obfuscator)
|
137
|
+
elsif type == :after
|
138
|
+
@obfuscator = NewRelic::ChainedCall.new(@obfuscator, block)
|
139
|
+
elsif type == :replace
|
140
|
+
@obfuscator = block
|
141
|
+
else
|
142
|
+
fail "unknown sql_obfuscator type #{type}"
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def log
|
147
|
+
NewRelic::Control.instance.log
|
148
|
+
end
|
149
|
+
|
150
|
+
# Start up the agent. This verifies that the agent_enabled? is
|
151
|
+
# true and initializes the sampler based on the current
|
152
|
+
# controluration settings. Then it will fire up the background
|
153
|
+
# thread for sending data to the server if applicable.
|
154
|
+
def start
|
155
|
+
if started?
|
156
|
+
control.log! "Agent Started Already!", :error
|
157
|
+
return
|
158
|
+
end
|
159
|
+
return if !control.agent_enabled?
|
160
|
+
|
161
|
+
@local_host = determine_host
|
162
|
+
|
163
|
+
log.info "Web container: #{control.dispatcher.to_s}"
|
164
|
+
|
165
|
+
@started = true
|
166
|
+
|
167
|
+
sampler_config = control.fetch('transaction_tracer', {})
|
168
|
+
@use_transaction_sampler = sampler_config.fetch('enabled', true)
|
169
|
+
|
170
|
+
@record_sql = sampler_config.fetch('record_sql', :obfuscated).to_sym
|
171
|
+
|
172
|
+
# use transaction_threshold: 4.0 to force the TT collection
|
173
|
+
# threshold to 4 seconds
|
174
|
+
# use transaction_threshold: apdex_f to use your apdex t value
|
175
|
+
# multiplied by 4
|
176
|
+
# undefined transaction_threshold defaults to 2.0
|
177
|
+
apdex_f = 4 * NewRelic::Control.instance.apdex_t
|
178
|
+
@slowest_transaction_threshold = sampler_config.fetch('transaction_threshold', 2.0)
|
179
|
+
if @slowest_transaction_threshold =~ /apdex_f/i
|
180
|
+
@slowest_transaction_threshold = apdex_f
|
181
|
+
end
|
182
|
+
@slowest_transaction_threshold = @slowest_transaction_threshold.to_f
|
183
|
+
|
184
|
+
if @use_transaction_sampler
|
185
|
+
log.info "Transaction tracing threshold is #{@slowest_transaction_threshold} seconds."
|
186
|
+
else
|
187
|
+
log.info "Transaction tracing not enabled."
|
188
|
+
end
|
189
|
+
@explain_threshold = sampler_config.fetch('explain_threshold', 0.5).to_f
|
190
|
+
@explain_enabled = sampler_config.fetch('explain_enabled', true)
|
191
|
+
@random_sample = sampler_config.fetch('random_sample', false)
|
192
|
+
log.warn "Agent is configured to send raw SQL to RPM service" if @record_sql == :raw
|
193
|
+
# Initialize transaction sampler
|
194
|
+
@transaction_sampler.random_sampling = @random_sample
|
195
|
+
|
196
|
+
if control.monitor_mode?
|
197
|
+
if !control.license_key
|
198
|
+
@invalid_license = true
|
199
|
+
control.log! "No license key found. Please edit your newrelic.yml file and insert your license key.", :error
|
200
|
+
elsif control.license_key.length != 40
|
201
|
+
@invalid_license = true
|
202
|
+
control.log! "Invalid license key: #{control.license_key}", :error
|
203
|
+
else
|
204
|
+
launch_worker_thread
|
205
|
+
# When the VM shuts down, attempt to send a message to the
|
206
|
+
# server that this agent run is stopping, assuming it has
|
207
|
+
# successfully connected
|
208
|
+
# This shutdown handler doesn't work if Sinatra is running
|
209
|
+
# because it executes in the shutdown handler!
|
210
|
+
at_exit { shutdown } unless [:sinatra, :unicorn].include? NewRelic::Control.instance.dispatcher
|
211
|
+
end
|
212
|
+
end
|
213
|
+
control.log! "New Relic RPM Agent #{NewRelic::VERSION::STRING} Initialized: pid = #{$$}"
|
214
|
+
control.log! "Agent Log found in #{NewRelic::Control.instance.log_file}" if NewRelic::Control.instance.log_file
|
215
|
+
end
|
216
|
+
|
217
|
+
private
|
218
|
+
def collector
|
219
|
+
@collector ||= control.server
|
220
|
+
end
|
221
|
+
|
222
|
+
# Connect to the server, and run the worker loop forever.
|
223
|
+
# Will not return.
|
224
|
+
def run_task_loop
|
225
|
+
# determine the reporting period (server based)
|
226
|
+
# note if the agent attempts to report more frequently than
|
227
|
+
# the specified report data, then it will be ignored.
|
228
|
+
|
229
|
+
control.log! "Reporting performance data every #{@report_period} seconds."
|
230
|
+
@task_loop.add_task(@report_period, "Timeslice Data Send") do
|
231
|
+
harvest_and_send_timeslice_data
|
232
|
+
end
|
233
|
+
|
234
|
+
if @should_send_samples && @use_transaction_sampler
|
235
|
+
@task_loop.add_task(@report_period, "Transaction Sampler Send") do
|
236
|
+
harvest_and_send_slowest_sample
|
237
|
+
end
|
238
|
+
elsif !control.developer_mode?
|
239
|
+
# We still need the sampler for dev mode.
|
240
|
+
@transaction_sampler.disable
|
241
|
+
end
|
242
|
+
|
243
|
+
if @should_send_errors && @error_collector.enabled
|
244
|
+
@task_loop.add_task(@report_period, "Error Send") do
|
245
|
+
harvest_and_send_errors
|
246
|
+
end
|
247
|
+
end
|
248
|
+
log.debug("Running worker loop")
|
249
|
+
@task_loop.run
|
250
|
+
rescue StandardError
|
251
|
+
log.debug("Error in worker loop: #{$!}")
|
252
|
+
@connected = false
|
253
|
+
raise
|
254
|
+
end
|
255
|
+
|
256
|
+
def launch_worker_thread
|
257
|
+
if (control.dispatcher == :passenger && $0 =~ /ApplicationSpawner/)
|
258
|
+
log.debug "Process is passenger spawner - don't connect to RPM service"
|
259
|
+
return
|
260
|
+
end
|
261
|
+
|
262
|
+
@task_loop = WorkerLoop.new(log)
|
263
|
+
|
264
|
+
if control['check_bg_loading']
|
265
|
+
log.warn "Agent background loading checking turned on"
|
266
|
+
require 'new_relic/agent/patch_const_missing'
|
267
|
+
ClassLoadingWatcher.enable_warning
|
268
|
+
end
|
269
|
+
log.debug "Creating RPM worker thread."
|
270
|
+
@worker_thread = Thread.new do
|
271
|
+
begin
|
272
|
+
ClassLoadingWatcher.background_thread=Thread.current if control['check_bg_loading']
|
273
|
+
NewRelic::Agent.disable_all_tracing do
|
274
|
+
connect
|
275
|
+
run_task_loop if @connected
|
276
|
+
end
|
277
|
+
rescue NewRelic::Agent::ForceRestartException => e
|
278
|
+
log.info e.message
|
279
|
+
# disconnect and start over.
|
280
|
+
# clear the stats engine
|
281
|
+
@metric_ids = {}
|
282
|
+
@unsent_errors = []
|
283
|
+
@traces = nil
|
284
|
+
@unsent_timeslice_data = {}
|
285
|
+
@last_harvest_time = Time.now
|
286
|
+
@connected = false
|
287
|
+
# Wait a short time before trying to reconnect
|
288
|
+
sleep 30
|
289
|
+
retry
|
290
|
+
rescue IgnoreSilentlyException
|
291
|
+
control.log! "Unable to establish connection with the server. Run with log level set to debug for more information."
|
292
|
+
rescue Exception => e
|
293
|
+
@connected = false
|
294
|
+
control.log! e, :error
|
295
|
+
control.log! e.backtrace.join("\n "), :error
|
296
|
+
end
|
297
|
+
end
|
298
|
+
@worker_thread['newrelic_label'] = 'Worker Loop'
|
299
|
+
end
|
300
|
+
|
301
|
+
def control
|
302
|
+
NewRelic::Control.instance
|
303
|
+
end
|
304
|
+
|
305
|
+
def initialize
|
306
|
+
@connected = false
|
307
|
+
@launch_time = Time.now
|
308
|
+
|
309
|
+
@metric_ids = {}
|
310
|
+
@histogram = NewRelic::Histogram.new(NewRelic::Control.instance.apdex_t / 10)
|
311
|
+
@stats_engine = NewRelic::Agent::StatsEngine.new
|
312
|
+
@transaction_sampler = NewRelic::Agent::TransactionSampler.new
|
313
|
+
@stats_engine.transaction_sampler = @transaction_sampler
|
314
|
+
@error_collector = NewRelic::Agent::ErrorCollector.new(self)
|
315
|
+
|
316
|
+
@request_timeout = NewRelic::Control.instance.fetch('timeout', 2 * 60)
|
317
|
+
|
318
|
+
@invalid_license = false
|
319
|
+
|
320
|
+
@last_harvest_time = Time.now
|
321
|
+
@obfuscator = method(:default_sql_obfuscator)
|
322
|
+
end
|
323
|
+
|
324
|
+
# Connect to the server and validate the license. If successful,
|
325
|
+
# @connected has true when finished. If not successful, you can
|
326
|
+
# keep calling this. Return false if we could not establish a
|
327
|
+
# connection with the server and we should not retry, such as if
|
328
|
+
# there's a bad license key.
|
329
|
+
def connect
|
330
|
+
# wait a few seconds for the web server to boot, necessary in development
|
331
|
+
connect_retry_period = 5
|
332
|
+
connect_attempts = 0
|
333
|
+
@agent_id = nil
|
334
|
+
begin
|
335
|
+
sleep connect_retry_period.to_i
|
336
|
+
environment = control['send_environment_info'] != false ? control.local_env.snapshot : []
|
337
|
+
@agent_id ||= invoke_remote :start, @local_host, {
|
338
|
+
:pid => $$,
|
339
|
+
:launch_time => @launch_time.to_f,
|
340
|
+
:agent_version => NewRelic::VERSION::STRING,
|
341
|
+
:environment => environment,
|
342
|
+
:settings => control.settings,
|
343
|
+
:validate_seed => ENV['NR_VALIDATE_SEED'],
|
344
|
+
:validate_token => ENV['NR_VALIDATE_TOKEN'] }
|
345
|
+
|
346
|
+
host = invoke_remote(:get_redirect_host) rescue nil
|
347
|
+
|
348
|
+
@collector = control.server_from_host(host) if host
|
349
|
+
|
350
|
+
@report_period = invoke_remote :get_data_report_period, @agent_id
|
351
|
+
|
352
|
+
control.log! "Connected to NewRelic Service at #{@collector}"
|
353
|
+
log.debug "Agent ID = #{@agent_id}."
|
354
|
+
|
355
|
+
# Ask the server for permission to send transaction samples.
|
356
|
+
# determined by subscription license.
|
357
|
+
@should_send_samples = invoke_remote :should_collect_samples, @agent_id
|
358
|
+
|
359
|
+
if @should_send_samples
|
360
|
+
sampling_rate = invoke_remote :sampling_rate, @agent_id if @random_sample
|
361
|
+
@transaction_sampler.sampling_rate = sampling_rate
|
362
|
+
log.info "Transaction sample rate: #{@transaction_sampler.sampling_rate}" if sampling_rate
|
363
|
+
end
|
364
|
+
|
365
|
+
# Ask for permission to collect error data
|
366
|
+
@should_send_errors = invoke_remote :should_collect_errors, @agent_id
|
367
|
+
|
368
|
+
log.info "Transaction traces will be sent to the RPM service" if @use_transaction_sampler && @should_send_samples
|
369
|
+
log.info "Errors will be sent to the RPM service" if @error_collector.enabled && @should_send_errors
|
370
|
+
|
371
|
+
@connected = true
|
372
|
+
|
373
|
+
rescue LicenseException => e
|
374
|
+
control.log! e.message, :error
|
375
|
+
control.log! "Visit NewRelic.com to obtain a valid license key, or to upgrade your account."
|
376
|
+
@invalid_license = true
|
377
|
+
return false
|
378
|
+
|
379
|
+
rescue Timeout::Error, StandardError => e
|
380
|
+
log.info "Unable to establish connection with New Relic RPM Service at #{control.server}"
|
381
|
+
unless e.instance_of? IgnoreSilentlyException
|
382
|
+
log.error e.message
|
383
|
+
log.debug e.backtrace.join("\n")
|
384
|
+
end
|
385
|
+
# retry logic
|
386
|
+
connect_attempts += 1
|
387
|
+
case connect_attempts
|
388
|
+
when 1..2
|
389
|
+
connect_retry_period, period_msg = 60, "1 minute"
|
390
|
+
when 3..5 then
|
391
|
+
connect_retry_period, period_msg = 60 * 2, "2 minutes"
|
392
|
+
else
|
393
|
+
connect_retry_period, period_msg = 10*60, "10 minutes"
|
394
|
+
end
|
395
|
+
log.info "Will re-attempt in #{period_msg}"
|
396
|
+
retry
|
397
|
+
end
|
398
|
+
end
|
399
|
+
|
400
|
+
def determine_host
|
401
|
+
Socket.gethostname
|
402
|
+
end
|
403
|
+
|
404
|
+
def determine_home_directory
|
405
|
+
control.root
|
406
|
+
end
|
407
|
+
|
408
|
+
def harvest_and_send_timeslice_data
|
409
|
+
|
410
|
+
NewRelic::Agent::Instrumentation::DispatcherInstrumentation::BusyCalculator.harvest_busy
|
411
|
+
|
412
|
+
now = Time.now
|
413
|
+
|
414
|
+
@unsent_timeslice_data ||= {}
|
415
|
+
@unsent_timeslice_data = @stats_engine.harvest_timeslice_data(@unsent_timeslice_data, @metric_ids)
|
416
|
+
|
417
|
+
begin
|
418
|
+
metric_ids = invoke_remote(:metric_data, @agent_id,
|
419
|
+
@last_harvest_time.to_f,
|
420
|
+
now.to_f,
|
421
|
+
@unsent_timeslice_data.values)
|
422
|
+
|
423
|
+
rescue Timeout::Error
|
424
|
+
# assume that the data was received. chances are that it was
|
425
|
+
metric_ids = nil
|
426
|
+
end
|
427
|
+
|
428
|
+
@metric_ids.merge! metric_ids if metric_ids
|
429
|
+
|
430
|
+
log.debug "#{now}: sent #{@unsent_timeslice_data.length} timeslices (#{@agent_id}) in #{Time.now - now} seconds"
|
431
|
+
|
432
|
+
# if we successfully invoked this web service, then clear the unsent message cache.
|
433
|
+
@unsent_timeslice_data = {}
|
434
|
+
@last_harvest_time = now
|
435
|
+
|
436
|
+
# handle_messages
|
437
|
+
|
438
|
+
# note - exceptions are logged in invoke_remote. If an exception is encountered here,
|
439
|
+
# then the metric data is downsampled for another timeslices
|
440
|
+
end
|
441
|
+
|
442
|
+
def harvest_and_send_slowest_sample
|
443
|
+
@traces = @transaction_sampler.harvest(@traces, @slowest_transaction_threshold)
|
444
|
+
|
445
|
+
unless @traces.empty?
|
446
|
+
now = Time.now
|
447
|
+
log.debug "Sending (#{@traces.length}) transaction traces"
|
448
|
+
begin
|
449
|
+
# take the traces and prepare them for sending across the
|
450
|
+
# wire. This includes gathering SQL explanations, stripping
|
451
|
+
# out stack traces, and normalizing SQL. note that we
|
452
|
+
# explain only the sql statements whose segments' execution
|
453
|
+
# times exceed our threshold (to avoid unnecessary overhead
|
454
|
+
# of running explains on fast queries.)
|
455
|
+
traces = @traces.collect {|trace| trace.prepare_to_send(:explain_sql => @explain_threshold, :record_sql => @record_sql, :keep_backtraces => true, :explain_enabled => @explain_enabled)}
|
456
|
+
invoke_remote :transaction_sample_data, @agent_id, traces
|
457
|
+
rescue PostTooBigException
|
458
|
+
# we tried to send too much data, drop the first trace and
|
459
|
+
# try again
|
460
|
+
retry if @traces.shift
|
461
|
+
end
|
462
|
+
|
463
|
+
log.debug "#{now}: sent slowest sample (#{@agent_id}) in #{Time.now - now} seconds"
|
464
|
+
end
|
465
|
+
|
466
|
+
# if we succeed sending this sample, then we don't need to keep
|
467
|
+
# the slowest sample around - it has been sent already and we
|
468
|
+
# can collect the next one
|
469
|
+
@traces = nil
|
470
|
+
|
471
|
+
# note - exceptions are logged in invoke_remote. If an
|
472
|
+
# exception is encountered here, then the slowest sample of is
|
473
|
+
# determined of the entire period since the last reported
|
474
|
+
# sample.
|
475
|
+
end
|
476
|
+
|
477
|
+
def harvest_and_send_errors
|
478
|
+
@unsent_errors = @error_collector.harvest_errors(@unsent_errors)
|
479
|
+
if @unsent_errors && @unsent_errors.length > 0
|
480
|
+
log.debug "Sending #{@unsent_errors.length} errors"
|
481
|
+
begin
|
482
|
+
invoke_remote :error_data, @agent_id, @unsent_errors
|
483
|
+
rescue PostTooBigException
|
484
|
+
@unsent_errors.shift
|
485
|
+
retry
|
486
|
+
end
|
487
|
+
# if the remote invocation fails, then we never clear
|
488
|
+
# @unsent_errors, and therefore we can re-attempt to send on
|
489
|
+
# the next heartbeat. Note the error collector maxes out at
|
490
|
+
# 20 instances to prevent leakage
|
491
|
+
@unsent_errors = []
|
492
|
+
end
|
493
|
+
end
|
494
|
+
|
495
|
+
def compress_data(object)
|
496
|
+
dump = Marshal.dump(object)
|
497
|
+
|
498
|
+
# this checks to make sure mongrel won't choke on big uploads
|
499
|
+
check_post_size(dump)
|
500
|
+
|
501
|
+
# we currently optimize for CPU here since we get roughly a 10x
|
502
|
+
# reduction in message size with this, and CPU overhead is at a
|
503
|
+
# premium. For extra-large posts, we use the higher compression
|
504
|
+
# since otherwise it actually errors out.
|
505
|
+
|
506
|
+
dump_size = dump.size
|
507
|
+
|
508
|
+
# small payloads don't need compression
|
509
|
+
return [dump, 'identity'] if dump_size < 2000
|
510
|
+
|
511
|
+
# medium payloads get fast compression, to save CPU
|
512
|
+
# big payloads get all the compression possible, to stay under
|
513
|
+
# the 2,000,000 byte post threshold
|
514
|
+
compression = dump_size < 2000000 ? Zlib::BEST_SPEED : Zlib::BEST_COMPRESSION
|
515
|
+
|
516
|
+
[Zlib::Deflate.deflate(dump, compression), 'deflate']
|
517
|
+
end
|
518
|
+
|
519
|
+
def check_post_size(post_string)
|
520
|
+
# TODO: define this as a config option on the server side
|
521
|
+
return if post_string.size < control.post_size_limit
|
522
|
+
log.warn "Tried to send too much data: #{post_string.size} bytes"
|
523
|
+
raise PostTooBigException
|
524
|
+
end
|
525
|
+
|
526
|
+
def send_request(opts)
|
527
|
+
request = Net::HTTP::Post.new(opts[:uri], 'CONTENT-ENCODING' => opts[:encoding], 'ACCEPT-ENCODING' => 'gzip', 'HOST' => opts[:collector].name)
|
528
|
+
request.content_type = "application/octet-stream"
|
529
|
+
request.body = opts[:data]
|
530
|
+
|
531
|
+
log.debug "connect to #{opts[:collector]}#{opts[:uri]}"
|
532
|
+
|
533
|
+
response = nil
|
534
|
+
http = control.http_connection(collector)
|
535
|
+
begin
|
536
|
+
timeout(@request_timeout) do
|
537
|
+
response = http.request(request)
|
538
|
+
end
|
539
|
+
rescue Timeout::Error
|
540
|
+
log.warn "Timed out trying to post data to RPM (timeout = #{@request_timeout} seconds)" unless @request_timeout < 30
|
541
|
+
raise
|
542
|
+
end
|
543
|
+
if response.is_a? Net::HTTPServiceUnavailable
|
544
|
+
log.debug(response.body || response.message)
|
545
|
+
raise IgnoreSilentlyException
|
546
|
+
elsif response.is_a? Net::HTTPGatewayTimeOut
|
547
|
+
log.debug("Timed out getting response: #{response.message}")
|
548
|
+
raise Timeout::Error, response.message
|
549
|
+
elsif !(response.is_a? Net::HTTPSuccess)
|
550
|
+
log.debug "Unexpected response from server: #{response.code}: #{response.message}"
|
551
|
+
raise IgnoreSilentlyException
|
552
|
+
end
|
553
|
+
response
|
554
|
+
end
|
555
|
+
|
556
|
+
def decompress_response(response)
|
557
|
+
if response['content-encoding'] != 'gzip'
|
558
|
+
log.debug "Uncompressed content returned"
|
559
|
+
return response.body
|
560
|
+
end
|
561
|
+
log.debug "Decompressing return value"
|
562
|
+
i = Zlib::GzipReader.new(StringIO.new(response.body))
|
563
|
+
i.read
|
564
|
+
end
|
565
|
+
|
566
|
+
def check_for_exception(response)
|
567
|
+
dump = decompress_response(response)
|
568
|
+
value = Marshal.load(dump)
|
569
|
+
raise value if value.is_a? Exception
|
570
|
+
value
|
571
|
+
end
|
572
|
+
|
573
|
+
def remote_method_uri(method)
|
574
|
+
uri = "/agent_listener/#{PROTOCOL_VERSION}/#{control.license_key}/#{method}"
|
575
|
+
uri << "?run_id=#{@agent_id}" if @agent_id
|
576
|
+
uri
|
577
|
+
end
|
578
|
+
|
579
|
+
# send a message via post
|
580
|
+
def invoke_remote(method, *args)
|
581
|
+
#determines whether to zip the data or send plain
|
582
|
+
post_data, encoding = compress_data(args)
|
583
|
+
|
584
|
+
response = send_request({:uri => remote_method_uri(method), :encoding => encoding, :collector => collector, :data => post_data})
|
585
|
+
|
586
|
+
# raises the right exception if the remote server tells it to die
|
587
|
+
return check_for_exception(response)
|
588
|
+
rescue ForceRestartException => e
|
589
|
+
log.info e.message
|
590
|
+
raise
|
591
|
+
rescue ForceDisconnectException => e
|
592
|
+
log.error "RPM forced this agent to disconnect (#{e.message})\n" \
|
593
|
+
"Restart this process to resume monitoring via rpm.newrelic.com."
|
594
|
+
# when a disconnect is requested, stop the current thread, which
|
595
|
+
# is the worker thread that gathers data and talks to the
|
596
|
+
# server.
|
597
|
+
@connected = false
|
598
|
+
Thread.exit
|
599
|
+
rescue SystemCallError, SocketError => e
|
600
|
+
# These include Errno connection errors
|
601
|
+
log.debug "Recoverable error connecting to the server: #{e}"
|
602
|
+
raise IgnoreSilentlyException
|
603
|
+
end
|
604
|
+
|
605
|
+
def graceful_disconnect
|
606
|
+
if @connected && !(control.server.name == "localhost" && control.dispatcher_instance_id == '3000')
|
607
|
+
begin
|
608
|
+
log.debug "Sending graceful shutdown message to #{control.server}"
|
609
|
+
|
610
|
+
@request_timeout = 10
|
611
|
+
|
612
|
+
log.debug "Sending RPM service agent run shutdown message"
|
613
|
+
invoke_remote :shutdown, @agent_id, Time.now.to_f
|
614
|
+
|
615
|
+
log.debug "Graceful shutdown complete"
|
616
|
+
|
617
|
+
rescue Timeout::Error, StandardError
|
618
|
+
end
|
619
|
+
else
|
620
|
+
log.debug "Bypassing graceful shutdown - agent not connected"
|
621
|
+
end
|
622
|
+
end
|
623
|
+
def default_sql_obfuscator(sql)
|
624
|
+
sql = sql.dup
|
625
|
+
# This is hardly readable. Use the unit tests.
|
626
|
+
# remove single quoted strings:
|
627
|
+
sql.gsub!(/'(.*?[^\\'])??'(?!')/, '?')
|
628
|
+
# remove double quoted strings:
|
629
|
+
sql.gsub!(/"(.*?[^\\"])??"(?!")/, '?')
|
630
|
+
# replace all number literals
|
631
|
+
sql.gsub!(/\d+/, "?")
|
632
|
+
sql
|
633
|
+
end
|
634
|
+
end
|
635
|
+
|
636
|
+
end
|