ghazel-newrelic_rpm 3.1.0.1 → 3.4.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (175) hide show
  1. data/CHANGELOG +120 -35
  2. data/LICENSE +29 -2
  3. data/README.rdoc +2 -2
  4. data/bin/mongrel_rpm +0 -0
  5. data/bin/newrelic +0 -0
  6. data/bin/newrelic_cmd +0 -0
  7. data/lib/new_relic/agent.rb +50 -38
  8. data/lib/new_relic/agent/agent.rb +459 -337
  9. data/lib/new_relic/agent/beacon_configuration.rb +71 -11
  10. data/lib/new_relic/agent/browser_monitoring.rb +73 -14
  11. data/lib/new_relic/agent/busy_calculator.rb +11 -3
  12. data/lib/new_relic/agent/chained_call.rb +2 -2
  13. data/lib/new_relic/agent/database.rb +223 -0
  14. data/lib/new_relic/agent/error_collector.rb +231 -183
  15. data/lib/new_relic/agent/instrumentation.rb +2 -2
  16. data/lib/new_relic/agent/instrumentation/active_merchant.rb +10 -2
  17. data/lib/new_relic/agent/instrumentation/active_record.rb +138 -0
  18. data/lib/new_relic/agent/instrumentation/acts_as_solr.rb +7 -1
  19. data/lib/new_relic/agent/instrumentation/authlogic.rb +6 -0
  20. data/lib/new_relic/agent/instrumentation/controller_instrumentation.rb +46 -14
  21. data/lib/new_relic/agent/instrumentation/data_mapper.rb +8 -2
  22. data/lib/new_relic/agent/instrumentation/delayed_job_instrumentation.rb +11 -3
  23. data/lib/new_relic/agent/instrumentation/memcache.rb +49 -25
  24. data/lib/new_relic/agent/instrumentation/merb/controller.rb +7 -2
  25. data/lib/new_relic/agent/instrumentation/merb/errors.rb +7 -1
  26. data/lib/new_relic/agent/instrumentation/metric_frame.rb +31 -4
  27. data/lib/new_relic/agent/instrumentation/metric_frame/pop.rb +1 -5
  28. data/lib/new_relic/agent/instrumentation/net.rb +8 -2
  29. data/lib/new_relic/agent/instrumentation/passenger_instrumentation.rb +5 -2
  30. data/lib/new_relic/agent/instrumentation/queue_time.rb +1 -1
  31. data/lib/new_relic/agent/instrumentation/rails/action_controller.rb +66 -35
  32. data/lib/new_relic/agent/instrumentation/rails/action_web_service.rb +7 -1
  33. data/lib/new_relic/agent/instrumentation/rails/errors.rb +7 -1
  34. data/lib/new_relic/agent/instrumentation/rails3/action_controller.rb +121 -1
  35. data/lib/new_relic/agent/instrumentation/rails3/errors.rb +7 -1
  36. data/lib/new_relic/agent/instrumentation/rainbows_instrumentation.rb +21 -0
  37. data/lib/new_relic/agent/instrumentation/resque.rb +80 -0
  38. data/lib/new_relic/agent/instrumentation/sinatra.rb +46 -20
  39. data/lib/new_relic/agent/instrumentation/sunspot.rb +6 -0
  40. data/lib/new_relic/agent/instrumentation/unicorn_instrumentation.rb +7 -2
  41. data/lib/new_relic/agent/method_tracer.rb +205 -99
  42. data/lib/new_relic/agent/new_relic_service.rb +221 -0
  43. data/lib/new_relic/agent/pipe_channel_manager.rb +161 -0
  44. data/lib/new_relic/agent/pipe_service.rb +54 -0
  45. data/lib/new_relic/agent/samplers/delayed_job_sampler.rb +89 -0
  46. data/lib/new_relic/agent/samplers/memory_sampler.rb +6 -7
  47. data/lib/new_relic/agent/shim_agent.rb +5 -5
  48. data/lib/new_relic/agent/sql_sampler.rb +282 -0
  49. data/lib/new_relic/agent/stats_engine.rb +2 -0
  50. data/lib/new_relic/agent/stats_engine/gc_profiler.rb +123 -0
  51. data/lib/new_relic/agent/stats_engine/metric_stats.rb +35 -30
  52. data/lib/new_relic/agent/stats_engine/samplers.rb +10 -4
  53. data/lib/new_relic/agent/stats_engine/transactions.rb +28 -87
  54. data/lib/new_relic/agent/transaction_info.rb +74 -0
  55. data/lib/new_relic/agent/transaction_sample_builder.rb +18 -3
  56. data/lib/new_relic/agent/transaction_sampler.rb +108 -20
  57. data/lib/new_relic/agent/worker_loop.rb +14 -6
  58. data/lib/new_relic/collection_helper.rb +19 -11
  59. data/lib/new_relic/command.rb +1 -1
  60. data/lib/new_relic/commands/deployments.rb +2 -2
  61. data/lib/new_relic/commands/install.rb +2 -13
  62. data/lib/new_relic/control.rb +2 -3
  63. data/lib/new_relic/control/class_methods.rb +12 -6
  64. data/lib/new_relic/control/configuration.rb +57 -8
  65. data/lib/new_relic/control/frameworks.rb +10 -0
  66. data/lib/new_relic/control/frameworks/external.rb +4 -4
  67. data/lib/new_relic/control/frameworks/merb.rb +2 -1
  68. data/lib/new_relic/control/frameworks/rails.rb +35 -22
  69. data/lib/new_relic/control/frameworks/rails3.rb +12 -7
  70. data/lib/new_relic/control/frameworks/ruby.rb +5 -5
  71. data/lib/new_relic/control/frameworks/sinatra.rb +1 -4
  72. data/lib/new_relic/control/instance_methods.rb +38 -12
  73. data/lib/new_relic/control/instrumentation.rb +23 -4
  74. data/lib/new_relic/control/logging_methods.rb +70 -15
  75. data/lib/new_relic/control/server_methods.rb +22 -9
  76. data/lib/new_relic/delayed_job_injection.rb +16 -3
  77. data/lib/new_relic/helper.rb +21 -0
  78. data/lib/new_relic/language_support.rb +95 -0
  79. data/lib/new_relic/local_environment.rb +92 -48
  80. data/lib/new_relic/metric_data.rb +7 -2
  81. data/lib/new_relic/metric_spec.rb +12 -9
  82. data/lib/new_relic/noticed_error.rb +6 -1
  83. data/lib/new_relic/rack/browser_monitoring.rb +18 -19
  84. data/lib/new_relic/rack/developer_mode.rb +3 -2
  85. data/lib/new_relic/recipes.rb +8 -4
  86. data/lib/new_relic/stats.rb +17 -60
  87. data/lib/new_relic/transaction_analysis.rb +2 -1
  88. data/lib/new_relic/transaction_analysis/segment_summary.rb +4 -2
  89. data/lib/new_relic/transaction_sample.rb +60 -75
  90. data/lib/new_relic/transaction_sample/segment.rb +31 -79
  91. data/lib/new_relic/version.rb +2 -2
  92. data/lib/newrelic_rpm.rb +1 -1
  93. data/newrelic.yml +2 -2
  94. data/newrelic_rpm.gemspec +46 -54
  95. data/test/active_record_fixtures.rb +3 -3
  96. data/test/config/newrelic.yml +1 -1
  97. data/test/fixtures/proc_cpuinfo.txt +575 -0
  98. data/test/new_relic/agent/agent/connect_test.rb +128 -25
  99. data/test/new_relic/agent/agent/start_test.rb +9 -94
  100. data/test/new_relic/agent/agent/start_worker_thread_test.rb +2 -4
  101. data/test/new_relic/agent/agent_test.rb +51 -78
  102. data/test/new_relic/agent/agent_test_controller.rb +1 -1
  103. data/test/new_relic/agent/agent_test_controller_test.rb +49 -33
  104. data/test/new_relic/agent/beacon_configuration_test.rb +12 -5
  105. data/test/new_relic/agent/browser_monitoring_test.rb +99 -50
  106. data/test/new_relic/agent/database_test.rb +161 -0
  107. data/test/new_relic/agent/error_collector_test.rb +47 -23
  108. data/test/new_relic/agent/instrumentation/active_record_instrumentation_test.rb +96 -42
  109. data/test/new_relic/agent/instrumentation/controller_instrumentation_test.rb +0 -2
  110. data/test/new_relic/agent/instrumentation/instrumentation_test.rb +1 -1
  111. data/test/new_relic/agent/instrumentation/metric_frame/pop_test.rb +3 -11
  112. data/test/new_relic/agent/instrumentation/net_instrumentation_test.rb +9 -9
  113. data/test/new_relic/agent/instrumentation/queue_time_test.rb +6 -11
  114. data/test/new_relic/agent/memcache_instrumentation_test.rb +54 -18
  115. data/test/new_relic/agent/method_tracer/class_methods/add_method_tracer_test.rb +1 -1
  116. data/test/new_relic/agent/method_tracer/instance_methods/trace_execution_scoped_test.rb +1 -1
  117. data/test/new_relic/agent/method_tracer_test.rb +3 -2
  118. data/test/new_relic/agent/new_relic_service_test.rb +151 -0
  119. data/test/new_relic/agent/pipe_channel_manager_test.rb +114 -0
  120. data/test/new_relic/agent/pipe_service_test.rb +113 -0
  121. data/test/new_relic/agent/rpm_agent_test.rb +4 -31
  122. data/test/new_relic/agent/sql_sampler_test.rb +192 -0
  123. data/test/new_relic/agent/stats_engine/metric_stats_test.rb +19 -18
  124. data/test/new_relic/agent/stats_engine_test.rb +41 -6
  125. data/test/new_relic/agent/transaction_info_test.rb +13 -0
  126. data/test/new_relic/agent/transaction_sample_builder_test.rb +27 -4
  127. data/test/new_relic/agent/transaction_sampler_test.rb +68 -46
  128. data/test/new_relic/agent/worker_loop_test.rb +3 -3
  129. data/test/new_relic/agent_test.rb +242 -0
  130. data/test/new_relic/collection_helper_test.rb +50 -28
  131. data/test/new_relic/control/configuration_test.rb +77 -0
  132. data/test/new_relic/control/logging_methods_test.rb +49 -21
  133. data/test/new_relic/control_test.rb +115 -54
  134. data/test/new_relic/delayed_job_injection_test.rb +21 -0
  135. data/test/new_relic/fake_collector.rb +210 -0
  136. data/test/new_relic/fake_service.rb +44 -0
  137. data/test/new_relic/local_environment_test.rb +14 -1
  138. data/test/new_relic/metric_parser/metric_parser_test.rb +11 -0
  139. data/test/new_relic/rack/browser_monitoring_test.rb +84 -23
  140. data/test/new_relic/rack/developer_mode_helper_test.rb +141 -0
  141. data/test/new_relic/rack/developer_mode_test.rb +31 -0
  142. data/test/new_relic/stats_test.rb +3 -18
  143. data/test/new_relic/transaction_analysis/segment_summary_test.rb +14 -0
  144. data/test/new_relic/transaction_analysis_test.rb +3 -3
  145. data/test/new_relic/transaction_sample/segment_test.rb +15 -80
  146. data/test/new_relic/transaction_sample_test.rb +25 -18
  147. data/test/script/build_test_gem.sh +51 -0
  148. data/test/script/ci.sh +140 -0
  149. data/test/script/ci_agent-tests_runner.sh +82 -0
  150. data/test/script/ci_bench.sh +52 -0
  151. data/test/script/ci_multiverse_runner.sh +63 -0
  152. data/test/test_contexts.rb +1 -0
  153. data/test/test_helper.rb +18 -5
  154. data/ui/helpers/developer_mode_helper.rb +14 -8
  155. data/ui/helpers/google_pie_chart.rb +0 -1
  156. data/ui/views/newrelic/index.rhtml +2 -2
  157. data/vendor/gems/dependency_detection-0.0.1.build/LICENSE +4 -18
  158. data/vendor/gems/dependency_detection-0.0.1.build/lib/dependency_detection.rb +10 -0
  159. data/vendor/gems/metric_parser-0.1.0.pre1/lib/new_relic/metric_parser/mem_cache.rb +11 -11
  160. data/vendor/gems/metric_parser-0.1.0.pre1/lib/new_relic/metric_parser/metric_parser.rb +17 -4
  161. data/vendor/gems/metric_parser-0.1.0.pre1/lib/new_relic/metric_parser/view.rb +4 -0
  162. metadata +50 -36
  163. data/lib/new_relic/agent/instrumentation/rails/active_record_instrumentation.rb +0 -108
  164. data/lib/new_relic/agent/instrumentation/rails3/active_record_instrumentation.rb +0 -112
  165. data/lib/new_relic/agent/samplers/delayed_job_lock_sampler.rb +0 -40
  166. data/lib/new_relic/data_serialization.rb +0 -84
  167. data/lib/new_relic/histogram.rb +0 -91
  168. data/lib/new_relic/rack/metric_app.rb +0 -65
  169. data/lib/new_relic/rack/mongrel_rpm.ru +0 -28
  170. data/lib/new_relic/rack/newrelic.yml +0 -27
  171. data/lib/new_relic/rack_app.rb +0 -6
  172. data/test/new_relic/data_serialization_test.rb +0 -70
  173. data/vendor/gems/dependency_detection-0.0.1.build/README +0 -0
  174. data/vendor/gems/metric_parser-0.1.0.pre1/LICENSE +0 -0
  175. data/vendor/gems/metric_parser-0.1.0.pre1/README +0 -0
@@ -4,7 +4,8 @@ require 'net/http'
4
4
  require 'logger'
5
5
  require 'zlib'
6
6
  require 'stringio'
7
- require 'new_relic/data_serialization'
7
+ require 'new_relic/agent/new_relic_service'
8
+ require 'new_relic/agent/pipe_service'
8
9
 
9
10
  module NewRelic
10
11
  module Agent
@@ -14,70 +15,89 @@ module NewRelic
14
15
  # in realtime as the application runs, and periodically sends that
15
16
  # data to the NewRelic server.
16
17
  class Agent
17
-
18
- # Specifies the version of the agent's communication protocol with
19
- # the NewRelic hosted site.
20
-
21
- PROTOCOL_VERSION = 8
22
- # 14105: v8 (tag 2.10.3)
23
- # (no v7)
24
- # 10379: v6 (not tagged)
25
- # 4078: v5 (tag 2.5.4)
26
- # 2292: v4 (tag 2.3.6)
27
- # 1754: v3 (tag 2.3.0)
28
- # 534: v2 (shows up in 2.1.0, our first tag)
29
-
30
-
31
18
  def initialize
32
-
33
19
  @launch_time = Time.now
34
20
 
35
21
  @metric_ids = {}
36
- @histogram = NewRelic::Histogram.new(NewRelic::Control.instance.apdex_t / 10)
37
22
  @stats_engine = NewRelic::Agent::StatsEngine.new
38
23
  @transaction_sampler = NewRelic::Agent::TransactionSampler.new
24
+ @sql_sampler = NewRelic::Agent::SqlSampler.new
39
25
  @stats_engine.transaction_sampler = @transaction_sampler
40
26
  @error_collector = NewRelic::Agent::ErrorCollector.new
41
27
  @connect_attempts = 0
42
28
 
43
- @request_timeout = NewRelic::Control.instance.fetch('timeout', 2 * 60)
44
-
45
29
  @last_harvest_time = Time.now
46
- @obfuscator = method(:default_sql_obfuscator)
30
+ @obfuscator = lambda {|sql| NewRelic::Agent::Database.default_sql_obfuscator(sql) }
31
+ @forked = false
32
+
33
+ # FIXME: temporary work around for RUBY-839
34
+ if control.monitor_mode?
35
+ @service = NewRelic::Agent::NewRelicService.new(control.license_key, control.server)
36
+ end
47
37
  end
48
38
 
39
+ # contains all the class-level methods for NewRelic::Agent::Agent
49
40
  module ClassMethods
50
- # Should only be called by NewRelic::Control
41
+ # Should only be called by NewRelic::Control - returns a
42
+ # memoized singleton instance of the agent, creating one if needed
51
43
  def instance
52
44
  @instance ||= self.new
53
45
  end
54
46
  end
55
-
47
+
48
+ # Holds all the methods defined on NewRelic::Agent::Agent
49
+ # instances
56
50
  module InstanceMethods
57
-
51
+
52
+ # holds a proc that is used to obfuscate sql statements
58
53
  attr_reader :obfuscator
54
+ # the statistics engine that holds all the timeslice data
59
55
  attr_reader :stats_engine
56
+ # the transaction sampler that handles recording transactions
60
57
  attr_reader :transaction_sampler
58
+ attr_reader :sql_sampler
59
+ # error collector is a simple collection of recorded errors
61
60
  attr_reader :error_collector
61
+ # whether we should record raw, obfuscated, or no sql
62
62
  attr_reader :record_sql
63
- attr_reader :histogram
63
+ # a cached set of metric_ids to save the collector some time -
64
+ # it returns a metric id for every metric name we send it, and
65
+ # in the future we transmit using the metric id only
64
66
  attr_reader :metric_ids
67
+ # in theory a set of rules applied by the agent to the output
68
+ # of its metrics. Currently unimplemented
65
69
  attr_reader :url_rules
70
+ # a configuration for the Real User Monitoring system -
71
+ # handles things like static setup of the header for inclusion
72
+ # into pages
66
73
  attr_reader :beacon_configuration
67
-
74
+ attr_accessor :service
75
+
76
+ # Returns the length of the unsent errors array, if it exists,
77
+ # otherwise nil
68
78
  def unsent_errors_size
69
79
  @unsent_errors.length if @unsent_errors
70
80
  end
71
-
81
+
82
+ # Returns the length of the traces array, if it exists,
83
+ # otherwise nil
72
84
  def unsent_traces_size
73
85
  @traces.length if @traces
74
86
  end
75
-
87
+
88
+ # Initializes the unsent timeslice data hash, if needed, and
89
+ # returns the number of keys it contains
76
90
  def unsent_timeslice_data
77
91
  @unsent_timeslice_data ||= {}
78
92
  @unsent_timeslice_data.keys.length
79
93
  end
80
94
 
95
+ # fakes out a transaction that did not happen in this process
96
+ # by creating apdex, summary metrics, and recording statistics
97
+ # for the transaction
98
+ #
99
+ # This method is *deprecated* - it may be removed in future
100
+ # versions of the agent
81
101
  def record_transaction(duration_seconds, options={})
82
102
  is_error = options['is_error'] || options['error_message'] || options['exception']
83
103
  metric = options['metric']
@@ -87,7 +107,6 @@ module NewRelic
87
107
 
88
108
  if metric_info.is_web_transaction?
89
109
  NewRelic::Agent::Instrumentation::MetricFrame.record_apdex(metric_info, duration_seconds, duration_seconds, is_error)
90
- histogram.process(duration_seconds)
91
110
  end
92
111
  metrics = metric_info.summary_metrics
93
112
 
@@ -101,9 +120,9 @@ module NewRelic
101
120
  if options['exception']
102
121
  e = options['exception']
103
122
  elsif options['error_message']
104
- e = Exception.new options['error_message']
123
+ e = StandardError.new options['error_message']
105
124
  else
106
- e = Exception.new 'Unknown Error'
125
+ e = StandardError.new 'Unknown Error'
107
126
  end
108
127
  error_collector.notice_error e, :uri => options['uri'], :metric => metric
109
128
  end
@@ -130,11 +149,17 @@ module NewRelic
130
149
  # connection, this tells me to only try it once so this method returns
131
150
  # quickly if there is some kind of latency with the server.
132
151
  def after_fork(options={})
133
-
152
+ @forked = true
134
153
  # @connected gets false after we fail to connect or have an error
135
154
  # connecting. @connected has nil if we haven't finished trying to connect.
136
155
  # or we didn't attempt a connection because this is the master process
137
-
156
+
157
+ if channel_id = options[:report_to_channel]
158
+ @service = NewRelic::Agent::PipeService.new(channel_id)
159
+ @connected_pid = $$
160
+ @metric_ids = {}
161
+ end
162
+
138
163
  # log.debug "Agent received after_fork notice in #$$: [#{control.agent_enabled?}; monitor=#{control.monitor_mode?}; connected: #{@connected.inspect}; thread=#{@worker_thread.inspect}]"
139
164
  return if !control.agent_enabled? or
140
165
  !control.monitor_mode? or
@@ -151,7 +176,11 @@ module NewRelic
151
176
  start_worker_thread(options)
152
177
  @stats_engine.start_sampler_thread
153
178
  end
154
-
179
+
180
+ def forked?
181
+ @forked
182
+ end
183
+
155
184
  # True if we have initialized and completed 'start'
156
185
  def started?
157
186
  @started
@@ -163,57 +192,73 @@ module NewRelic
163
192
  @connected
164
193
  end
165
194
 
166
- # Attempt a graceful shutdown of the agent.
195
+ # Attempt a graceful shutdown of the agent, running the worker
196
+ # loop if it exists and is running.
197
+ #
198
+ # Options:
199
+ # :force_send => (true/false) # force the agent to send data
200
+ # before shutting down
167
201
  def shutdown(options={})
168
202
  run_loop_before_exit = options.fetch(:force_send, false)
169
203
  return if not started?
170
204
  if @worker_loop
171
205
  @worker_loop.run_task if run_loop_before_exit
172
206
  @worker_loop.stop
207
+ end
173
208
 
174
- log.debug "Starting Agent shutdown"
209
+ log.debug "Starting Agent shutdown"
175
210
 
176
- # if litespeed, then ignore all future SIGUSR1 - it's
177
- # litespeed trying to shut us down
211
+ # if litespeed, then ignore all future SIGUSR1 - it's
212
+ # litespeed trying to shut us down
178
213
 
179
- if control.dispatcher == :litespeed
180
- Signal.trap("SIGUSR1", "IGNORE")
181
- Signal.trap("SIGTERM", "IGNORE")
182
- end
214
+ if control.dispatcher == :litespeed
215
+ Signal.trap("SIGUSR1", "IGNORE")
216
+ Signal.trap("SIGTERM", "IGNORE")
217
+ end
183
218
 
184
- begin
185
- NewRelic::Agent.disable_all_tracing do
186
- graceful_disconnect
187
- end
188
- rescue => e
189
- log.error e
190
- log.error e.backtrace.join("\n")
219
+ begin
220
+ NewRelic::Agent.disable_all_tracing do
221
+ graceful_disconnect
191
222
  end
223
+ rescue => e
224
+ log.error e
225
+ log.error e.backtrace.join("\n")
192
226
  end
193
227
  @started = nil
194
228
  end
195
229
 
230
+ # Tells the statistics engine we are starting a new transaction
196
231
  def start_transaction
197
232
  @stats_engine.start_transaction
198
233
  end
199
234
 
235
+ # Tells the statistics engine we are ending a transaction
200
236
  def end_transaction
201
237
  @stats_engine.end_transaction
202
238
  end
203
239
 
240
+ # Sets a thread local variable as to whether we should or
241
+ # should not record sql in the current thread. Returns the
242
+ # previous value, if there is one
204
243
  def set_record_sql(should_record)
205
244
  prev = Thread::current[:record_sql]
206
245
  Thread::current[:record_sql] = should_record
207
246
  prev.nil? || prev
208
247
  end
209
248
 
249
+ # Sets a thread local variable as to whether we should or
250
+ # should not record transaction traces in the current
251
+ # thread. Returns the previous value, if there is one
210
252
  def set_record_tt(should_record)
211
253
  prev = Thread::current[:record_tt]
212
254
  Thread::current[:record_tt] = should_record
213
255
  prev.nil? || prev
214
256
  end
257
+
215
258
  # Push flag indicating whether we should be tracing in this
216
- # thread.
259
+ # thread. This uses a stack which allows us to disable tracing
260
+ # children of a transaction without affecting the tracing of
261
+ # the whole transaction
217
262
  def push_trace_execution_flag(should_trace=false)
218
263
  value = Thread.current[:newrelic_untraced]
219
264
  if (value.nil?)
@@ -229,18 +274,7 @@ module NewRelic
229
274
  Thread.current[:newrelic_untraced].pop if Thread.current[:newrelic_untraced]
230
275
  end
231
276
 
232
- def set_sql_obfuscator(type, &block)
233
- if type == :before
234
- @obfuscator = NewRelic::ChainedCall.new(block, @obfuscator)
235
- elsif type == :after
236
- @obfuscator = NewRelic::ChainedCall.new(@obfuscator, block)
237
- elsif type == :replace
238
- @obfuscator = block
239
- else
240
- fail "unknown sql_obfuscator type #{type}"
241
- end
242
- end
243
-
277
+ # Shorthand to the NewRelic::Agent.logger method
244
278
  def log
245
279
  NewRelic::Agent.logger
246
280
  end
@@ -248,6 +282,7 @@ module NewRelic
248
282
  # Herein lies the corpse of the former 'start' method. May
249
283
  # it's unmatched flog score rest in pieces.
250
284
  module Start
285
+ # Check whether we have already started, which is an error condition
251
286
  def already_started?
252
287
  if started?
253
288
  control.log!("Agent Started Already!", :error)
@@ -255,82 +290,38 @@ module NewRelic
255
290
  end
256
291
  end
257
292
 
293
+ # The agent is disabled when it is not force enabled by the
294
+ # 'agent_enabled' option (e.g. in a manual start), or
295
+ # enabled normally through the configuration file
258
296
  def disabled?
259
297
  !control.agent_enabled?
260
298
  end
261
299
 
300
+ # Logs the dispatcher to the log file to assist with
301
+ # debugging. When no debugger is present, logs this fact to
302
+ # assist with proper dispatcher detection
262
303
  def log_dispatcher
263
304
  dispatcher_name = control.dispatcher.to_s
264
305
  return if log_if(dispatcher_name.empty?, :info, "No dispatcher detected.")
265
306
  log.info "Dispatcher: #{dispatcher_name}"
266
307
  end
267
308
 
309
+ # Logs the configured application names
268
310
  def log_app_names
269
311
  log.info "Application: #{control.app_names.join(", ")}"
270
312
  end
271
-
272
- def apdex_f
273
- (4 * NewRelic::Control.instance.apdex_t).to_f
274
- end
275
-
276
- def apdex_f_threshold?
277
- sampler_config.fetch('transaction_threshold', '') =~ /apdex_f/i
278
- end
279
-
280
- def set_sql_recording!
281
- record_sql_config = sampler_config.fetch('record_sql', :obfuscated)
282
- case record_sql_config.to_s
283
- when 'off'
284
- @record_sql = :off
285
- when 'none'
286
- @record_sql = :off
287
- when 'false'
288
- @record_sql = :off
289
- when 'raw'
290
- @record_sql = :raw
291
- else
292
- @record_sql = :obfuscated
293
- end
294
-
295
- log_sql_transmission_warning?
296
- end
297
-
298
- def log_sql_transmission_warning?
299
- log_if((@record_sql == :raw), :warn, "Agent is configured to send raw SQL to the service")
300
- end
301
-
302
- def sampler_config
303
- control.fetch('transaction_tracer', {})
304
- end
305
-
306
- # this entire method should be done on the transaction
307
- # sampler object, rather than here. We should pass in the
308
- # sampler config.
309
- def config_transaction_tracer
310
- @should_send_samples = @config_should_send_samples = sampler_config.fetch('enabled', true)
311
- @should_send_random_samples = sampler_config.fetch('random_sample', false)
312
- @explain_threshold = sampler_config.fetch('explain_threshold', 0.5).to_f
313
- @explain_enabled = sampler_config.fetch('explain_enabled', true)
314
- set_sql_recording!
315
-
316
- # default to 2.0, string 'apdex_f' will turn into your
317
- # apdex * 4
318
- @slowest_transaction_threshold = sampler_config.fetch('transaction_threshold', 2.0).to_f
319
- @slowest_transaction_threshold = apdex_f if apdex_f_threshold?
320
- end
321
-
313
+
314
+ # Connecting in the foreground blocks further startup of the
315
+ # agent until we have a connection - useful in cases where
316
+ # you're trying to log a very-short-running process and want
317
+ # to get statistics from before a server connection
318
+ # (typically 20 seconds) exists
322
319
  def connect_in_foreground
323
320
  NewRelic::Agent.disable_all_tracing { connect(:keep_retrying => false) }
324
321
  end
325
322
 
326
- def using_rubinius?
327
- RUBY_VERSION =~ /rubinius/i
328
- end
329
-
330
- def using_jruby?
331
- defined?(JRuby)
332
- end
333
-
323
+ # If we're using sinatra, old versions run in an at_exit
324
+ # block so we should probably know that
334
325
  def using_sinatra?
335
326
  defined?(Sinatra::Application)
336
327
  end
@@ -338,9 +329,14 @@ module NewRelic
338
329
  # we should not set an at_exit block if people are using
339
330
  # these as they don't do standard at_exit behavior per MRI/YARV
340
331
  def weird_ruby?
341
- using_rubinius? || using_jruby? || using_sinatra?
332
+ NewRelic::LanguageSupport.using_engine?('rbx') ||
333
+ NewRelic::LanguageSupport.using_engine?('jruby') ||
334
+ using_sinatra?
342
335
  end
343
336
 
337
+ # Installs our exit handler, which exploits the weird
338
+ # behavior of at_exit blocks to make sure it runs last, by
339
+ # doing an at_exit within an at_exit block.
344
340
  def install_exit_handler
345
341
  if control.send_data_on_exit && !weird_ruby?
346
342
  # Our shutdown handler needs to run after other shutdown handlers
@@ -348,46 +344,73 @@ module NewRelic
348
344
  end
349
345
  end
350
346
 
347
+ # Tells us in the log file where the log file is
348
+ # located. This seems redundant, but can come in handy when
349
+ # we have some log file path set by the user which parses
350
+ # incorrectly, sending the log file to who-knows-where
351
351
  def notify_log_file_location
352
352
  log_file = NewRelic::Control.instance.log_file
353
- log_if(log_file, :info, "Agent Log found in #{log_file}")
353
+ log_if(File.exists?(log_file.to_s), :info,
354
+ "Agent Log at #{log_file}")
354
355
  end
355
356
 
357
+ # Classy logging of the agent version and the current pid,
358
+ # so we can disambiguate processes in the log file and make
359
+ # sure they're running a reasonable version
356
360
  def log_version_and_pid
357
361
  log.info "New Relic Ruby Agent #{NewRelic::VERSION::STRING} Initialized: pid = #{$$}"
358
362
  end
359
363
 
364
+ # A helper method that logs a condition if that condition is
365
+ # true. Mentally cleaner than having every method set a
366
+ # local and log if it is true
360
367
  def log_if(boolean, level, message)
361
368
  self.log.send(level, message) if boolean
362
369
  boolean
363
370
  end
364
371
 
372
+ # A helper method that logs a condition unless that
373
+ # condition is true. Mentally cleaner than having every
374
+ # method set a local and log unless it is true
365
375
  def log_unless(boolean, level, message)
366
376
  self.log.send(level, message) unless boolean
367
377
  boolean
368
378
  end
369
379
 
380
+ # Warn the user if they have configured their agent not to
381
+ # send data, that way we can see this clearly in the log file
370
382
  def monitoring?
371
383
  log_unless(control.monitor_mode?, :warn, "Agent configured not to send data in this environment - edit newrelic.yml to change this")
372
384
  end
373
385
 
386
+ # Tell the user when the license key is missing so they can
387
+ # fix it by adding it to the file
374
388
  def has_license_key?
375
389
  log_unless(control.license_key, :error, "No license key found. Please edit your newrelic.yml file and insert your license key.")
376
390
  end
377
391
 
392
+ # A correct license key exists and is of the proper length
378
393
  def has_correct_license_key?
379
394
  has_license_key? && correct_license_length
380
395
  end
381
396
 
397
+ # A license key is an arbitrary 40 character string,
398
+ # usually looks something like a SHA1 hash
382
399
  def correct_license_length
383
400
  key = control.license_key
384
401
  log_unless((key.length == 40), :error, "Invalid license key: #{key}")
385
402
  end
386
403
 
404
+ # If we're using a dispatcher that forks before serving
405
+ # requests, we need to wait until the children are forked
406
+ # before connecting, otherwise the parent process sends odd data
387
407
  def using_forking_dispatcher?
388
- log_if([:passenger, :unicorn].include?(control.dispatcher), :info, "Connecting workers after forking.")
408
+ log_if([:passenger, :unicorn, :rainbows].include?(control.dispatcher), :info, "Connecting workers after forking.")
389
409
  end
390
410
 
411
+ # Sanity-check the agent configuration and start the agent,
412
+ # setting up the worker thread and the exit handler to shut
413
+ # down the agent
391
414
  def check_config_and_start_agent
392
415
  return unless monitoring? && has_correct_license_key?
393
416
  return if using_forking_dispatcher?
@@ -399,6 +422,7 @@ module NewRelic
399
422
 
400
423
  include Start
401
424
 
425
+ # Logs a bunch of data and starts the agent, if needed
402
426
  def start
403
427
  return if already_started? || disabled?
404
428
  @started = true
@@ -411,7 +435,8 @@ module NewRelic
411
435
  notify_log_file_location
412
436
  end
413
437
 
414
- # Clear out the metric data, errors, and transaction traces. Reset the histogram data.
438
+ # Clear out the metric data, errors, and transaction traces,
439
+ # making sure the agent is in a fresh state
415
440
  def reset_stats
416
441
  @stats_engine.reset_stats
417
442
  @unsent_errors = []
@@ -419,71 +444,89 @@ module NewRelic
419
444
  @unsent_timeslice_data = {}
420
445
  @last_harvest_time = Time.now
421
446
  @launch_time = Time.now
422
- @histogram = NewRelic::Histogram.new(NewRelic::Control.instance.apdex_t / 10)
423
447
  end
424
448
 
425
449
  private
426
- def collector
427
- @collector ||= control.server
428
- end
429
-
450
+
451
+ # All of this module used to be contained in the
452
+ # start_worker_thread method - this is an artifact of
453
+ # refactoring and can be moved, renamed, etc at will
430
454
  module StartWorkerThread
431
455
 
456
+ # disable transaction sampling if disabled by the server
457
+ # and we're not in dev mode
432
458
  def check_transaction_sampler_status
433
- # disable transaction sampling if disabled by the server
434
- # and we're not in dev mode
435
459
  if control.developer_mode? || @should_send_samples
436
460
  @transaction_sampler.enable
437
461
  else
438
462
  @transaction_sampler.disable
439
463
  end
440
464
  end
465
+
466
+ def check_sql_sampler_status
467
+ # disable sql sampling if disabled by the server
468
+ # and we're not in dev mode
469
+ if @sql_sampler.config.fetch('enabled', true) && ['raw', 'obfuscated'].include?(@sql_sampler.config.fetch('record_sql', 'obfuscated').to_s) && @transaction_sampler.config.fetch('enabled', true)
470
+ @sql_sampler.enable
471
+ else
472
+ @sql_sampler.disable
473
+ end
474
+ end
441
475
 
476
+ # logs info about the worker loop so users can see when the
477
+ # agent actually begins running in the background
442
478
  def log_worker_loop_start
443
479
  log.info "Reporting performance data every #{@report_period} seconds."
444
480
  log.debug "Running worker loop"
445
481
  end
446
482
 
483
+ # Creates the worker loop and loads it with the instructions
484
+ # it should run every @report_period seconds
447
485
  def create_and_run_worker_loop
448
486
  @worker_loop = WorkerLoop.new
449
487
  @worker_loop.run(@report_period) do
450
- NewRelic::Agent.load_data
451
- harvest_and_send_errors
452
- harvest_and_send_slowest_sample
453
- harvest_and_send_timeslice_data
488
+ transmit_data
454
489
  end
455
490
  end
456
491
 
492
+ # Handles the case where the server tells us to restart -
493
+ # this clears the data, clears connection attempts, and
494
+ # waits a while to reconnect.
457
495
  def handle_force_restart(error)
458
496
  log.info error.message
459
- # disconnect and start over.
460
- # clear the stats engine
461
497
  reset_stats
462
498
  @metric_ids = {}
463
499
  @connected = nil
464
- # Wait a short time before trying to reconnect
465
500
  sleep 30
466
501
  end
467
502
 
503
+ # when a disconnect is requested, stop the current thread, which
504
+ # is the worker thread that gathers data and talks to the
505
+ # server.
468
506
  def handle_force_disconnect(error)
469
- # when a disconnect is requested, stop the current thread, which
470
- # is the worker thread that gathers data and talks to the
471
- # server.
472
507
  log.error "New Relic forced this agent to disconnect (#{error.message})"
473
508
  disconnect
474
509
  end
475
510
 
511
+ # there is a problem with connecting to the server, so we
512
+ # stop trying to connect and shut down the agent
476
513
  def handle_server_connection_problem(error)
477
514
  log.error "Unable to establish connection with the server. Run with log level set to debug for more information."
478
515
  log.debug("#{error.class.name}: #{error.message}\n#{error.backtrace.first}")
479
516
  disconnect
480
517
  end
481
518
 
519
+ # Handles an unknown error in the worker thread by logging
520
+ # it and disconnecting the agent, since we are now in an
521
+ # unknown state
482
522
  def handle_other_error(error)
483
523
  log.error "Terminating worker loop: #{error.class.name}: #{error.message}\n #{error.backtrace.join("\n ")}"
484
524
  disconnect
485
525
  end
486
526
 
527
+ # a wrapper method to handle all the errors that can happen
528
+ # in the connection and worker thread system. This
529
+ # guarantees a no-throw from the background thread.
487
530
  def catch_errors
488
531
  yield
489
532
  rescue NewRelic::Agent::ForceRestartException => e
@@ -493,10 +536,18 @@ module NewRelic
493
536
  handle_force_disconnect(e)
494
537
  rescue NewRelic::Agent::ServerConnectionException => e
495
538
  handle_server_connection_problem(e)
496
- rescue Exception => e
539
+ rescue => e
497
540
  handle_other_error(e)
498
541
  end
499
542
 
543
+ # This is the method that is run in a new thread in order to
544
+ # background the harvesting and sending of data during the
545
+ # normal operation of the agent.
546
+ #
547
+ # Takes connection options that determine how we should
548
+ # connect to the server, and loops endlessly - typically we
549
+ # never return from this method unless we're shutting down
550
+ # the agent
500
551
  def deferred_work!(connection_options)
501
552
  catch_errors do
502
553
  NewRelic::Agent.disable_all_tracing do
@@ -507,8 +558,11 @@ module NewRelic
507
558
  connect(connection_options)
508
559
  if @connected
509
560
  check_transaction_sampler_status
561
+ check_sql_sampler_status
510
562
  log_worker_loop_start
511
563
  create_and_run_worker_loop
564
+ # never reaches here unless there is a problem or
565
+ # the agent is exiting
512
566
  else
513
567
  log.debug "No connection. Worker thread ending."
514
568
  end
@@ -529,36 +583,59 @@ module NewRelic
529
583
  @worker_thread['newrelic_label'] = 'Worker Loop'
530
584
  end
531
585
 
586
+ # A shorthand for NewRelic::Control.instance
532
587
  def control
533
588
  NewRelic::Control.instance
534
589
  end
535
-
590
+
591
+ # This module is an artifact of a refactoring of the connect
592
+ # method - all of its methods are used in that context, so it
593
+ # can be refactored at will. It should be fully tested
536
594
  module Connect
595
+ # the frequency with which we should try to connect to the
596
+ # server at the moment.
537
597
  attr_accessor :connect_retry_period
598
+ # number of attempts we've made to contact the server
538
599
  attr_accessor :connect_attempts
539
600
 
601
+ # Disconnect just sets connected to false, which prevents
602
+ # the agent from trying to connect again
540
603
  def disconnect
541
604
  @connected = false
542
605
  true
543
606
  end
544
607
 
608
+ # We've tried to connect if @connected is not nil, or if we
609
+ # are forcing reconnection (i.e. in the case of an
610
+ # after_fork with long running processes)
545
611
  def tried_to_connect?(options)
546
612
  !(@connected.nil? || options[:force_reconnect])
547
613
  end
548
614
 
615
+ # We keep trying by default, but you can disable it with the
616
+ # :keep_retrying option set to false
549
617
  def should_keep_retrying?(options)
550
618
  @keep_retrying = (options[:keep_retrying].nil? || options[:keep_retrying])
551
619
  end
552
620
 
621
+ # Retry period is a minute for each failed attempt that
622
+ # we've made. This should probably do some sort of sane TCP
623
+ # backoff to prevent hammering the server, but a minute for
624
+ # each attempt seems to work reasonably well.
553
625
  def get_retry_period
554
626
  return 600 if self.connect_attempts > 6
555
627
  connect_attempts * 60
556
628
  end
557
629
 
558
- def increment_retry_period!
630
+ def increment_retry_period! #:nodoc:
559
631
  self.connect_retry_period=(get_retry_period)
560
632
  end
561
633
 
634
+ # We should only retry when there has not been a more
635
+ # serious condition that would prevent it. We increment the
636
+ # connect attempts and the retry period, to prevent constant
637
+ # connection attempts, and tell the user what we're doing by
638
+ # logging.
562
639
  def should_retry?
563
640
  if @keep_retrying
564
641
  self.connect_attempts=(connect_attempts + 1)
@@ -571,27 +648,46 @@ module NewRelic
571
648
  end
572
649
  end
573
650
 
651
+ # When we have a problem connecting to the server, we need
652
+ # to tell the user what happened, since this is not an error
653
+ # we can handle gracefully.
574
654
  def log_error(error)
575
655
  log.error "Error establishing connection with New Relic Service at #{control.server}: #{error.message}"
576
656
  log.debug error.backtrace.join("\n")
577
657
  end
578
658
 
659
+ # When the server sends us an error with the license key, we
660
+ # want to tell the user that something went wrong, and let
661
+ # them know where to go to get a valid license key
662
+ #
663
+ # After this runs, it disconnects the agent so that it will
664
+ # no longer try to connect to the server, saving the
665
+ # application and the server load
579
666
  def handle_license_error(error)
580
667
  log.error error.message
581
668
  log.info "Visit NewRelic.com to obtain a valid license key, or to upgrade your account."
582
669
  disconnect
583
670
  end
584
671
 
672
+ # If we are using a seed and token to validate the agent, we
673
+ # should debug log that fact so that debug logs include a
674
+ # clue that token authentication is what will be used
585
675
  def log_seed_token
586
676
  if control.validate_seed
587
677
  log.debug "Connecting with validation seed/token: #{control.validate_seed}/#{control.validate_token}"
588
678
  end
589
679
  end
590
680
 
681
+ # Checks whether we should send environment info, and if so,
682
+ # returns the snapshot from the local environment
591
683
  def environment_for_connect
592
684
  control['send_environment_info'] != false ? control.local_env.snapshot : []
593
685
  end
594
686
 
687
+ # These validation settings are used for cases where a
688
+ # dynamic server is spun up for clients - partners can
689
+ # include a seed and token to indicate that the host is
690
+ # allowed to connect, rather than setting a unique hostname
595
691
  def validate_settings
596
692
  {
597
693
  :seed => control.validate_seed,
@@ -599,6 +695,8 @@ module NewRelic
599
695
  }
600
696
  end
601
697
 
698
+ # Initializes the hash of settings that we send to the
699
+ # server. Returns a literal hash containing the options
602
700
  def connect_settings
603
701
  {
604
702
  :pid => $$,
@@ -611,12 +709,21 @@ module NewRelic
611
709
  :validate => validate_settings
612
710
  }
613
711
  end
712
+
713
+ # Does some simple logging to make sure that our seed and
714
+ # token for verification are correct, then returns the
715
+ # connect data passed back from the server
614
716
  def connect_to_server
615
717
  log_seed_token
616
- connect_data = invoke_remote(:connect, connect_settings)
718
+ @service.connect(connect_settings)
617
719
  end
618
720
 
721
+ # Configures the error collector if the server says that we
722
+ # are allowed to send errors. Pretty simple, and logs at
723
+ # debug whether errors will or will not be sent.
619
724
  def configure_error_collector!(server_enabled)
725
+ # Reinitialize the error collector
726
+ @error_collector = NewRelic::Agent::ErrorCollector.new
620
727
  # Ask for permission to collect error data
621
728
  enabled = if error_collector.config_enabled && server_enabled
622
729
  error_collector.enabled = true
@@ -626,80 +733,188 @@ module NewRelic
626
733
  log.debug "Errors will #{enabled ? '' : 'not '}be sent to the New Relic service."
627
734
  end
628
735
 
736
+ # Random sampling is enabled based on a sample rate, which
737
+ # is the n in "every 1/n transactions is added regardless of
738
+ # its length".
739
+ #
740
+ # uses a sane default for sampling rate if the sampling rate
741
+ # is zero, since the collector currently sends '0' as a
742
+ # sampling rate for all accounts, which is probably for
743
+ # legacy reasons
629
744
  def enable_random_samples!(sample_rate)
630
- sample_rate = 10 unless sample_rate.to_i > 0# a sane default for random sampling
745
+ sample_rate = 10 unless sample_rate.to_i > 0
631
746
  @transaction_sampler.random_sampling = true
632
747
  @transaction_sampler.sampling_rate = sample_rate
633
748
  log.info "Transaction sampling enabled, rate = #{@transaction_sampler.sampling_rate}"
634
749
  end
635
750
 
751
+ # this entire method should be done on the transaction
752
+ # sampler object, rather than here. We should pass in the
753
+ # sampler config.
754
+ def config_transaction_tracer
755
+ # Reconfigure the transaction tracer
756
+ @transaction_sampler.configure!
757
+ @sql_sampler.configure!
758
+ @should_send_samples = @config_should_send_samples = @transaction_sampler.config.fetch('enabled', true)
759
+ @should_send_random_samples = @transaction_sampler.config.fetch('random_sample', false)
760
+ set_sql_recording!
636
761
 
762
+ # default to 2.0, string 'apdex_f' will turn into your
763
+ # apdex * 4
764
+ @slowest_transaction_threshold = @transaction_sampler.config.fetch('transaction_threshold', 2.0).to_f
765
+ @slowest_transaction_threshold = apdex_f if apdex_f_threshold?
766
+ end
767
+
768
+ # Enables or disables the transaction tracer and sets its
769
+ # options based on the options provided to the
770
+ # method.
637
771
  def configure_transaction_tracer!(server_enabled, sample_rate)
638
772
  # Ask the server for permission to send transaction samples.
639
773
  # determined by subscription license.
774
+ @transaction_sampler.config['enabled'] = server_enabled
775
+ @sql_sampler.configure!
640
776
  @should_send_samples = @config_should_send_samples && server_enabled
641
-
777
+
642
778
  if @should_send_samples
643
779
  # I don't think this is ever true, but...
644
780
  enable_random_samples!(sample_rate) if @should_send_random_samples
781
+
782
+ @transaction_sampler.slow_capture_threshold = @slowest_transaction_threshold
783
+
645
784
  log.debug "Transaction tracing threshold is #{@slowest_transaction_threshold} seconds."
646
785
  else
647
786
  log.debug "Transaction traces will not be sent to the New Relic service."
648
787
  end
649
788
  end
650
789
 
651
- def set_collector_host!
652
- host = invoke_remote(:get_redirect_host)
653
- if host
654
- @collector = control.server_from_host(host)
790
+ # apdex_f is always 4 times the apdex_t
791
+ def apdex_f
792
+ (4 * NewRelic::Control.instance.apdex_t).to_f
793
+ end
794
+
795
+ # If the transaction threshold is set to the string
796
+ # 'apdex_f', we use 4 times the apdex_t value to record
797
+ # transactions. This gears well with using apdex since you
798
+ # will attempt to send any transactions that register as 'failing'
799
+ def apdex_f_threshold?
800
+ @transaction_sampler.config.fetch('transaction_threshold', '') =~ /apdex_f/i
801
+ end
802
+
803
+ # Sets the sql recording configuration by trying to detect
804
+ # any attempt to disable the sql collection - 'off',
805
+ # 'false', 'none', and friends. Otherwise, we accept 'raw',
806
+ # and unrecognized values default to 'obfuscated'
807
+ def set_sql_recording!
808
+ record_sql_config = @transaction_sampler.config.fetch('record_sql', :obfuscated)
809
+ case record_sql_config.to_s
810
+ when 'off'
811
+ @record_sql = :off
812
+ when 'none'
813
+ @record_sql = :off
814
+ when 'false'
815
+ @record_sql = :off
816
+ when 'raw'
817
+ @record_sql = :raw
818
+ else
819
+ @record_sql = :obfuscated
655
820
  end
821
+
822
+ log_sql_transmission_warning?
656
823
  end
657
824
 
825
+ # Warn the user when we are sending raw sql across the wire
826
+ # - they should probably be using ssl when this is true
827
+ def log_sql_transmission_warning?
828
+ log.warn("Agent is configured to send raw SQL to the service") if @record_sql == :raw
829
+ end
830
+
831
+ # Asks the collector to tell us which sub-collector we
832
+ # should be reporting to, and then does the name resolution
833
+ # on that host so we don't block on DNS during the normal
834
+ # course of agent processing
835
+ # def set_collector_host!
836
+ # host = invoke_remote(:get_redirect_host)
837
+ # if host
838
+ # @collector = control.server_from_host(host)
839
+ # end
840
+ # end
841
+
842
+ # Sets the collector host and connects to the server, then
843
+ # invokes the final configuration with the returned data
658
844
  def query_server_for_configuration
659
- set_collector_host!
660
-
661
845
  finish_setup(connect_to_server)
662
846
  end
847
+
848
+ # Takes a hash of configuration data returned from the
849
+ # server and uses it to set local variables and to
850
+ # initialize various parts of the agent that are configured
851
+ # separately.
852
+ #
853
+ # Can accommodate most arbitrary data - anything extra is
854
+ # ignored unless we say to do something with it here.
663
855
  def finish_setup(config_data)
664
- @agent_id = config_data['agent_run_id']
856
+ return if config_data == nil
857
+ @service.agent_id = config_data['agent_run_id']
665
858
  @report_period = config_data['data_report_period']
666
859
  @url_rules = config_data['url_rules']
667
860
  @beacon_configuration = BeaconConfiguration.new(config_data)
861
+ @server_side_config_enabled = config_data['listen_to_server_config']
668
862
 
863
+ if @server_side_config_enabled
864
+ log.info "Using config from server"
865
+ log.debug "Server provided config: #{config_data.inspect}"
866
+ end
867
+
868
+ control.merge_server_side_config(config_data) if @server_side_config_enabled
869
+ config_transaction_tracer
669
870
  log_connection!(config_data)
670
871
  configure_transaction_tracer!(config_data['collect_traces'], config_data['sample_rate'])
671
872
  configure_error_collector!(config_data['collect_errors'])
672
873
  end
673
-
874
+
875
+ # Logs when we connect to the server, for debugging purposes
876
+ # - makes sure we know if an agent has not connected
674
877
  def log_connection!(config_data)
675
- control.log! "Connected to NewRelic Service at #{@collector}"
676
- log.debug "Agent Run = #{@agent_id}."
878
+ control.log! "Connected to NewRelic Service at #{@service.collector.name}"
879
+ log.debug "Agent Run = #{@service.agent_id}."
677
880
  log.debug "Connection data = #{config_data.inspect}"
678
881
  end
679
882
  end
680
883
  include Connect
681
884
 
885
+
886
+ # Serialize all the important data that the agent might want
887
+ # to send to the server. We could be sending this to file (
888
+ # common in short-running background transactions ) or
889
+ # alternately we could serialize via a pipe or socket to a
890
+ # local aggregation device
682
891
  def serialize
683
892
  accumulator = []
684
893
  accumulator[1] = harvest_transaction_traces if @transaction_sampler
685
894
  accumulator[2] = harvest_errors if @error_collector
686
895
  accumulator[0] = harvest_timeslice_data
896
+ reset_stats
897
+ @metric_ids = {}
687
898
  accumulator
688
899
  end
689
-
690
900
  public :serialize
691
901
 
902
+ # Accepts data as provided by the serialize method and merges
903
+ # it into our current collection of data to send. Can be
904
+ # dangerous if we re-merge the same data more than once - it
905
+ # will be sent multiple times.
692
906
  def merge_data_from(data)
693
907
  metrics, transaction_traces, errors = data
694
908
  @stats_engine.merge_data(metrics) if metrics
695
- if transaction_traces
909
+ if transaction_traces && transaction_traces.respond_to?(:any?) &&
910
+ transaction_traces.any?
696
911
  if @traces
697
- @traces = @traces + transaction_traces
912
+ @traces += transaction_traces
698
913
  else
699
914
  @traces = transaction_traces
700
915
  end
701
916
  end
702
- if errors
917
+ if errors && errors.respond_to?(:any?) && errors.any?
703
918
  if @unsent_errors
704
919
  @unsent_errors = @unsent_errors + errors
705
920
  else
@@ -751,18 +966,26 @@ module NewRelic
751
966
  end
752
967
  end
753
968
 
969
+ # Who am I? Well, this method can tell you your hostname.
754
970
  def determine_host
755
971
  Socket.gethostname
756
972
  end
757
973
 
974
+ # Delegates to the control class to determine the root
975
+ # directory of this project
758
976
  def determine_home_directory
759
977
  control.root
760
978
  end
761
979
 
980
+ # Checks whether this process is a Passenger or Unicorn or Rainbows
981
+ # spawning server - if so, we probably don't intend to report
982
+ # statistics from this process
762
983
  def is_application_spawner?
763
- $0 =~ /ApplicationSpawner|^unicorn\S* master/
984
+ $0 =~ /ApplicationSpawner|^unicorn\S* master|^rainbows master/
764
985
  end
765
986
 
987
+ # calls the busy harvester and collects timeslice data to
988
+ # send later
766
989
  def harvest_timeslice_data(time=Time.now)
767
990
  # this creates timeslices that are harvested below
768
991
  NewRelic::Agent::BusyCalculator.harvest_busy
@@ -772,96 +995,111 @@ module NewRelic
772
995
  @unsent_timeslice_data
773
996
  end
774
997
 
998
+ # takes an array of arrays of spec and id, adds it into the
999
+ # metric cache so we can save the collector some work by
1000
+ # sending integers instead of strings
775
1001
  def fill_metric_id_cache(pairs_of_specs_and_ids)
776
1002
  Array(pairs_of_specs_and_ids).each do |metric_spec, metric_id|
777
1003
  @metric_ids[metric_spec] = metric_id
778
1004
  end
779
1005
  end
780
1006
 
1007
+ # note - exceptions are logged in invoke_remote. If an exception is encountered here,
1008
+ # then the metric data is downsampled for another
1009
+ # transmission later
781
1010
  def harvest_and_send_timeslice_data
782
1011
  now = Time.now
783
1012
  NewRelic::Agent.instance.stats_engine.get_stats_no_scope('Supportability/invoke_remote').record_data_point(0.0)
784
1013
  NewRelic::Agent.instance.stats_engine.get_stats_no_scope('Supportability/invoke_remote/metric_data').record_data_point(0.0)
785
1014
  harvest_timeslice_data(now)
786
- begin
787
- # In this version of the protocol, we get back an assoc array of spec to id.
788
- metric_specs_and_ids = invoke_remote(:metric_data, @agent_id,
789
- @last_harvest_time.to_f,
790
- now.to_f,
791
- @unsent_timeslice_data.values)
792
-
793
- rescue Timeout::Error
794
- # assume that the data was received. chances are that it was
795
- metric_specs_and_ids = []
796
- end
797
-
1015
+ # In this version of the protocol, we get back an assoc array of spec to id.
1016
+ metric_specs_and_ids = @service.metric_data(@last_harvest_time.to_f,
1017
+ now.to_f,
1018
+ @unsent_timeslice_data.values)
1019
+ metric_specs_and_ids ||= []
798
1020
  fill_metric_id_cache(metric_specs_and_ids)
799
1021
 
800
- log.debug "#{now}: sent #{@unsent_timeslice_data.length} timeslices (#{@agent_id}) in #{Time.now - now} seconds"
1022
+ log.debug "#{now}: sent #{@unsent_timeslice_data.length} timeslices (#{@service.agent_id}) in #{Time.now - now} seconds"
801
1023
 
802
1024
  # if we successfully invoked this web service, then clear the unsent message cache.
803
1025
  @unsent_timeslice_data = {}
804
1026
  @last_harvest_time = now
805
-
806
- # handle_messages
807
-
808
- # note - exceptions are logged in invoke_remote. If an exception is encountered here,
809
- # then the metric data is downsampled for another timeslices
810
1027
  end
811
1028
 
1029
+ # Fills the traces array with the harvested transactions from
1030
+ # the transaction sampler, subject to the setting for slowest
1031
+ # transaction threshold
812
1032
  def harvest_transaction_traces
813
1033
  @traces = @transaction_sampler.harvest(@traces, @slowest_transaction_threshold)
814
1034
  @traces
815
1035
  end
816
1036
 
1037
+ def harvest_and_send_slowest_sql
1038
+ # FIXME add the code to try to resend if our connection is down
1039
+ sql_traces = @sql_sampler.harvest
1040
+ unless sql_traces.empty?
1041
+ log.debug "Sending (#{sql_traces.size}) sql traces"
1042
+ begin
1043
+ @service.sql_trace_data(sql_traces)
1044
+ rescue
1045
+ @sql_sampler.merge sql_traces
1046
+ end
1047
+ end
1048
+ end
1049
+
1050
+ # This handles getting the transaction traces and then sending
1051
+ # them across the wire. This includes gathering SQL
1052
+ # explanations, stripping out stack traces, and normalizing
1053
+ # SQL. note that we explain only the sql statements whose
1054
+ # segments' execution times exceed our threshold (to avoid
1055
+ # unnecessary overhead of running explains on fast queries.)
817
1056
  def harvest_and_send_slowest_sample
818
1057
  harvest_transaction_traces
819
1058
  unless @traces.empty?
820
1059
  now = Time.now
821
1060
  log.debug "Sending (#{@traces.length}) transaction traces"
1061
+
822
1062
  begin
823
- # take the traces and prepare them for sending across the
824
- # wire. This includes gathering SQL explanations, stripping
825
- # out stack traces, and normalizing SQL. note that we
826
- # explain only the sql statements whose segments' execution
827
- # times exceed our threshold (to avoid unnecessary overhead
828
- # of running explains on fast queries.)
829
1063
  options = { :keep_backtraces => true }
830
1064
  options[:record_sql] = @record_sql unless @record_sql == :off
831
- options[:explain_sql] = @explain_threshold if @explain_enabled
1065
+ if @transaction_sampler.explain_enabled
1066
+ options[:explain_sql] = @transaction_sampler.explain_threshold
1067
+ end
832
1068
  traces = @traces.collect {|trace| trace.prepare_to_send(options)}
833
- invoke_remote :transaction_sample_data, @agent_id, traces
1069
+ @service.transaction_sample_data(traces)
834
1070
  rescue PostTooBigException
835
1071
  # we tried to send too much data, drop the first trace and
836
1072
  # try again
837
1073
  retry if @traces.shift
838
1074
  end
839
1075
 
840
- log.debug "Sent slowest sample (#{@agent_id}) in #{Time.now - now} seconds"
1076
+ log.debug "Sent slowest sample (#{@service.agent_id}) in #{Time.now - now} seconds"
841
1077
  end
842
1078
 
843
1079
  # if we succeed sending this sample, then we don't need to keep
844
1080
  # the slowest sample around - it has been sent already and we
845
- # can collect the next one
1081
+ # can clear the collection and move on
846
1082
  @traces = nil
847
-
848
- # note - exceptions are logged in invoke_remote. If an
849
- # exception is encountered here, then the slowest sample of is
850
- # determined of the entire period since the last reported
851
- # sample.
852
1083
  end
853
1084
 
1085
+ # Gets the collection of unsent errors from the error
1086
+ # collector. We pass back in an existing array of errors that
1087
+ # may be left over from a previous send
854
1088
  def harvest_errors
855
1089
  @unsent_errors = @error_collector.harvest_errors(@unsent_errors)
856
1090
  @unsent_errors
857
1091
  end
858
1092
 
1093
+ # Handles getting the errors from the error collector and
1094
+ # sending them to the server, and any error cases like trying
1095
+ # to send very large errors - we drop the oldest error on the
1096
+ # floor and try again
859
1097
  def harvest_and_send_errors
860
1098
  harvest_errors
861
1099
  if @unsent_errors && @unsent_errors.length > 0
862
1100
  log.debug "Sending #{@unsent_errors.length} errors"
863
1101
  begin
864
- invoke_remote :error_data, @agent_id, @unsent_errors
1102
+ @service.error_data(@unsent_errors)
865
1103
  rescue PostTooBigException
866
1104
  @unsent_errors.shift
867
1105
  retry
@@ -873,142 +1111,37 @@ module NewRelic
873
1111
  @unsent_errors = []
874
1112
  end
875
1113
  end
876
-
877
- def compress_data(object)
878
- dump = Marshal.dump(object)
879
-
880
- # this checks to make sure mongrel won't choke on big uploads
881
- check_post_size(dump)
882
-
883
- # we currently optimize for CPU here since we get roughly a 10x
884
- # reduction in message size with this, and CPU overhead is at a
885
- # premium. For extra-large posts, we use the higher compression
886
- # since otherwise it actually errors out.
887
-
888
- dump_size = dump.size
889
-
890
- # Compress if content is smaller than 64kb. There are problems
891
- # with bugs in Ruby in some versions that expose us to a risk of
892
- # segfaults if we compress aggressively.
893
- return [dump, 'identity'] if dump_size < (64*1024)
894
-
895
- # medium payloads get fast compression, to save CPU
896
- # big payloads get all the compression possible, to stay under
897
- # the 2,000,000 byte post threshold
898
- compression = dump_size < 2000000 ? Zlib::BEST_SPEED : Zlib::BEST_COMPRESSION
899
-
900
- [Zlib::Deflate.deflate(dump, compression), 'deflate']
901
- end
902
-
903
- def check_post_size(post_string)
904
- # TODO: define this as a config option on the server side
905
- return if post_string.size < control.post_size_limit
906
- log.warn "Tried to send too much data: #{post_string.size} bytes"
907
- raise PostTooBigException
908
- end
909
-
910
- def send_request(opts)
911
- request = Net::HTTP::Post.new(opts[:uri], 'CONTENT-ENCODING' => opts[:encoding], 'HOST' => opts[:collector].name)
912
- request['user-agent'] = user_agent
913
- request.content_type = "application/octet-stream"
914
- request.body = opts[:data]
915
-
916
- log.debug "Connect to #{opts[:collector]}#{opts[:uri]}"
917
-
918
- response = nil
919
- http = control.http_connection(collector)
920
- http.read_timeout = nil
921
- begin
922
- NewRelic::TimerLib.timeout(@request_timeout) do
923
- response = http.request(request)
924
- end
925
- rescue Timeout::Error
926
- log.warn "Timed out trying to post data to New Relic (timeout = #{@request_timeout} seconds)" unless @request_timeout < 30
927
- raise
928
- end
929
- if response.is_a? Net::HTTPServiceUnavailable
930
- raise NewRelic::Agent::ServerConnectionException, "Service unavailable (#{response.code}): #{response.message}"
931
- elsif response.is_a? Net::HTTPGatewayTimeOut
932
- log.debug("Timed out getting response: #{response.message}")
933
- raise Timeout::Error, response.message
934
- elsif response.is_a? Net::HTTPRequestEntityTooLarge
935
- raise PostTooBigException
936
- elsif !(response.is_a? Net::HTTPSuccess)
937
- raise NewRelic::Agent::ServerConnectionException, "Unexpected response from server (#{response.code}): #{response.message}"
938
- end
939
- response
940
- end
941
-
942
- def decompress_response(response)
943
- if response['content-encoding'] != 'gzip'
944
- log.debug "Uncompressed content returned"
945
- return response.body
946
- end
947
- log.debug "Decompressing return value"
948
- i = Zlib::GzipReader.new(StringIO.new(response.body))
949
- i.read
950
- end
951
-
952
- def check_for_exception(response)
953
- dump = decompress_response(response)
954
- value = Marshal.load(dump)
955
- raise value if value.is_a? Exception
956
- value
957
- end
958
-
959
- def remote_method_uri(method)
960
- uri = "/agent_listener/#{PROTOCOL_VERSION}/#{control.license_key}/#{method}"
961
- uri << "?run_id=#{@agent_id}" if @agent_id
962
- uri
963
- end
964
-
965
- def user_agent
966
- ruby_description = ''
967
- # note the trailing space!
968
- ruby_description << "(ruby #{::RUBY_VERSION} #{::RUBY_PLATFORM}) " if defined?(::RUBY_VERSION) && defined?(::RUBY_PLATFORM)
969
- zlib_version = ''
970
- zlib_version << "zlib/#{Zlib.zlib_version}" if defined?(::Zlib) && Zlib.respond_to?(:zlib_version)
971
- "NewRelic-RubyAgent/#{NewRelic::VERSION::STRING} #{ruby_description}#{zlib_version}"
972
- end
973
-
974
- # send a message via post
975
- def invoke_remote(method, *args)
976
- now = Time.now
977
- #determines whether to zip the data or send plain
978
- post_data, encoding = compress_data(args)
979
-
980
- response = send_request({:uri => remote_method_uri(method), :encoding => encoding, :collector => collector, :data => post_data})
981
-
982
- # raises the right exception if the remote server tells it to die
983
- return check_for_exception(response)
984
- rescue NewRelic::Agent::ForceRestartException => e
985
- log.info e.message
986
- raise
987
- rescue SystemCallError, SocketError => e
988
- # These include Errno connection errors
989
- raise NewRelic::Agent::ServerConnectionException, "Recoverable error connecting to the server: #{e}"
1114
+
1115
+ def transmit_data
1116
+ log.debug "Sending data to New Relic Service"
1117
+ harvest_and_send_errors
1118
+ harvest_and_send_slowest_sample
1119
+ harvest_and_send_slowest_sql
1120
+ harvest_and_send_timeslice_data
1121
+ rescue => e
1122
+ retry_count ||= 0
1123
+ retry_count += 1
1124
+ retry unless retry_count > 1
1125
+ raise e
990
1126
  ensure
991
- NewRelic::Agent.instance.stats_engine.get_stats_no_scope('Supportability/invoke_remote').record_data_point((Time.now - now).to_f)
992
- NewRelic::Agent.instance.stats_engine.get_stats_no_scope('Supportability/invoke_remote/' + method.to_s).record_data_point((Time.now - now).to_f)
1127
+ NewRelic::Agent::Database.close_connections unless forked?
993
1128
  end
994
1129
 
1130
+ # This method contacts the server to send remaining data and
1131
+ # let the server know that the agent is shutting down - this
1132
+ # allows us to do things like accurately set the end of the
1133
+ # lifetime of the process
1134
+ #
1135
+ # If this process comes from a parent process, it will not
1136
+ # disconnect, so that the parent process can continue to send data
995
1137
  def graceful_disconnect
996
1138
  if @connected
997
1139
  begin
998
- @request_timeout = 10
999
- if NewRelic::DataSerialization.should_send_data?
1000
- log.debug "Sending data to New Relic Service"
1001
- NewRelic::Agent.load_data
1002
- harvest_and_send_errors
1003
- harvest_and_send_slowest_sample
1004
- harvest_and_send_timeslice_data
1005
- else
1006
- log.debug "Serializing agent data to disk"
1007
- NewRelic::Agent.save_data
1008
- end
1009
- if @connected_pid == $$
1140
+ @service.request_timeout = 10
1141
+ transmit_data
1142
+ if @connected_pid == $$ && !@service.kind_of?(NewRelic::Agent::NewRelicService)
1010
1143
  log.debug "Sending New Relic service agent run shutdown message"
1011
- invoke_remote :shutdown, @agent_id, Time.now.to_f
1144
+ @service.shutdown(Time.now.to_f)
1012
1145
  else
1013
1146
  log.debug "This agent connected from parent process #{@connected_pid}--not sending shutdown"
1014
1147
  end
@@ -1019,19 +1152,8 @@ module NewRelic
1019
1152
  log.debug "Bypassing graceful disconnect - agent not connected"
1020
1153
  end
1021
1154
  end
1022
- def default_sql_obfuscator(sql)
1023
- sql = sql.dup
1024
- # This is hardly readable. Use the unit tests.
1025
- # remove single quoted strings:
1026
- sql.gsub!(/'(.*?[^\\'])??'(?!')/, '?')
1027
- # remove double quoted strings:
1028
- sql.gsub!(/"(.*?[^\\"])??"(?!")/, '?')
1029
- # replace all number literals
1030
- sql.gsub!(/\d+/, "?")
1031
- sql
1032
- end
1033
1155
  end
1034
-
1156
+
1035
1157
  extend ClassMethods
1036
1158
  include InstanceMethods
1037
1159
  include BrowserMonitoring