RubyGems - scout_apm - Versions diffs - 2.0.0.pre → 2.0.0.pre2 - Mend

scout_apm 2.0.0.pre → 2.0.0.pre2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

checksums.yaml +4 -4
data/.gitignore +1 -0
data/CHANGELOG.markdown +22 -5
data/Rakefile +5 -0
data/lib/scout_apm.rb +4 -0
data/lib/scout_apm/agent.rb +22 -8
data/lib/scout_apm/agent/reporting.rb +8 -3
data/lib/scout_apm/attribute_arranger.rb +4 -0
data/lib/scout_apm/bucket_name_splitter.rb +3 -3
data/lib/scout_apm/config.rb +5 -2
data/lib/scout_apm/histogram.rb +20 -0
data/lib/scout_apm/instant_reporting.rb +40 -0
data/lib/scout_apm/instruments/action_controller_rails_3_rails4.rb +11 -1
data/lib/scout_apm/instruments/percentile_sampler.rb +38 -0
data/lib/scout_apm/layaway.rb +1 -4
data/lib/scout_apm/layaway_file.rb +26 -2
data/lib/scout_apm/layer.rb +1 -1
data/lib/scout_apm/layer_converters/converter_base.rb +6 -4
data/lib/scout_apm/layer_converters/slow_job_converter.rb +21 -13
data/lib/scout_apm/layer_converters/slow_request_converter.rb +37 -24
data/lib/scout_apm/metric_meta.rb +5 -1
data/lib/scout_apm/metric_set.rb +15 -6
data/lib/scout_apm/reporter.rb +9 -3
data/lib/scout_apm/request_histograms.rb +46 -0
data/lib/scout_apm/scored_item_set.rb +79 -0
data/lib/scout_apm/serializers/payload_serializer_to_json.rb +2 -0
data/lib/scout_apm/serializers/slow_jobs_serializer_to_json.rb +2 -0
data/lib/scout_apm/slow_job_policy.rb +89 -19
data/lib/scout_apm/slow_job_record.rb +18 -1
data/lib/scout_apm/slow_request_policy.rb +80 -12
data/lib/scout_apm/slow_transaction.rb +22 -3
data/lib/scout_apm/store.rb +35 -13
data/lib/scout_apm/tracked_request.rb +63 -11
data/lib/scout_apm/utils/backtrace_parser.rb +4 -4
data/lib/scout_apm/utils/sql_sanitizer.rb +1 -1
data/lib/scout_apm/utils/sql_sanitizer_regex.rb +2 -2
data/lib/scout_apm/utils/sql_sanitizer_regex_1_8_7.rb +2 -2
data/lib/scout_apm/version.rb +1 -1
data/scout_apm.gemspec +1 -0
data/test/test_helper.rb +4 -3
data/test/unit/layaway_test.rb +5 -8
data/test/unit/metric_set_test.rb +101 -0
data/test/unit/scored_item_set_test.rb +65 -0
data/test/unit/serializers/payload_serializer_test.rb +2 -1
data/test/unit/slow_item_set_test.rb +2 -1
data/test/unit/slow_request_policy_test.rb +42 -0
data/test/unit/sql_sanitizer_test.rb +6 -0
metadata +28 -3

data/lib/scout_apm/slow_job_record.rb CHANGED

@@ -19,8 +19,9 @@ module ScoutApm
     attr_reader :allocations
     attr_reader :hostname
     attr_reader :seconds_since_startup
+    attr_reader :score
-    def initialize(queue_name, job_name, time, total_time, exclusive_time, context, metrics, allocation_metrics, mem_delta, allocations)
+    def initialize(queue_name, job_name, time, total_time, exclusive_time, context, metrics, allocation_metrics, mem_delta, allocations, score)
       @queue_name = queue_name
       @job_name = job_name
       @time = time
@@ -33,6 +34,7 @@ module ScoutApm
       @allocations = allocations
       @seconds_since_startup = (Time.now - ScoutApm::Agent.instance.process_start_time)
       @hostname = ScoutApm::Environment.instance.hostname
+      @score = score
       ScoutApm::Agent.instance.logger.debug { "Slow Job [#{metric_name}] - Call Time: #{total_call_time} Mem Delta: #{mem_delta}"}
     end
@@ -40,5 +42,20 @@ module ScoutApm
       "Job/#{queue_name}/#{job_name}"
     end
+    ########################
+    # Scorable interface
+    #
+    # Needed so we can merge ScoredItemSet instances
+    def call
+      self
+    end
+    def name
+      metric_name
+    end
+    def score
+      @score
+    end
   end
 end

data/lib/scout_apm/slow_request_policy.rb CHANGED

@@ -1,8 +1,5 @@
 # Long running class that determines if, and in how much detail a potentially
 # slow transaction should be recorded in
-#
-# Rules:
-#   - Runtime must be slower than a threshold
 module ScoutApm
   class SlowRequestPolicy
@@ -11,21 +8,92 @@ module ScoutApm
       CAPTURE_NONE    = "capture_none",
     ]
-    # It's not slow unless it's at least this slow
-    SLOW_REQUEST_TIME_THRESHOLD = 2.0 # seconds
+    # Adjust speed points. See the function
+    POINT_MULTIPLIER_SPEED = 0.25
-    def capture_type(time)
-      if !slow_enough?(time)
-        CAPTURE_NONE
-      else
-        CAPTURE_DETAIL
+    # For each minute we haven't seen an endpoint
+    POINT_MULTIPLIER_AGE = 0.25
+    # Outliers are worth up to "1000ms" of weight
+    POINT_MULTIPLIER_PERCENTILE = 1.0
+    # A hash of Endpoint Name to the last time we stored a slow transaction for it.
+    #
+    # Defaults to a start time that is pretty close to application boot time.
+    # So the "age" of an endpoint we've never seen is the time the application
+    # has been running.
+    attr_reader :last_seen
+    def initialize
+      zero_time = Time.now
+      @last_seen = Hash.new { |h, k| h[k] = zero_time }
+    end
+    def stored!(request)
+      last_seen[unique_name_for(request)] = Time.now
+    end
+    # Determine if this request trace should be fully analyzed by scoring it
+    # across several metrics, and then determining if that's good enough to
+    # make it into this minute's payload.
+    #
+    # Due to the combining nature of the agent & layaway file, there's no
+    # guarantee that a high scoring local champion will still be a winner when
+    # they go up to "regionals" and are compared against the other processes
+    # running on a node.
+    def score(request)
+      unique_name = request.unique_name
+      if unique_name == :unknown
+        return -1 # A negative score, should never be good enough to store.
       end
+      total_time = request.root_layer.total_call_time
+      # How long has it been since we've seen this?
+      age = Time.now - last_seen[unique_name]
+      # What approximate percentile was this request?
+      percentile = ScoutApm::Agent.instance.request_histograms.approximate_quantile_of_value(unique_name, total_time)
+      return speed_points(total_time) + percentile_points(percentile) + age_points(age)
     end
     private
-    def slow_enough?(time)
-      time > SLOW_REQUEST_TIME_THRESHOLD
+    def unique_name_for(request)
+      scope_layer = LayerConverters::ConverterBase.new(request).scope_layer
+      if scope_layer
+        scope_layer.legacy_metric_name
+      else
+        :unknown
+      end
+    end
+    # Time in seconds
+    # Logarithm keeps huge times from swamping the other metrics.
+    # 1+ is necessary to keep the log function in positive territory.
+    def speed_points(time)
+      Math.log(1 + time) * POINT_MULTIPLIER_SPEED
+    end
+    def percentile_points(percentile)
+      if percentile < 40
+        0.4 # Don't put much emphasis on capturing low percentiles.
+      elsif percentile < 60
+        1.4 # Highest here to get mean traces
+      elsif percentile < 90
+        0.7 # Between 60 & 90% is fine.
+      elsif percentile >= 90
+        1.4 # Highest here to get 90+%ile traces
+      else
+        # impossible.
+        percentile
+      end
+    end
+    def age_points(age)
+      age / 60.0 * POINT_MULTIPLIER_AGE
     end
   end
 end

data/lib/scout_apm/slow_transaction.rb CHANGED

@@ -17,7 +17,7 @@ module ScoutApm
     attr_accessor :hostname # hack - we need to reset these server side.
     attr_accessor :seconds_since_startup # hack - we need to reset these server side.
-    def initialize(uri, metric_name, total_call_time, metrics, allocation_metrics, context, time, raw_stackprof, mem_delta, allocations)
+    def initialize(uri, metric_name, total_call_time, metrics, allocation_metrics, context, time, raw_stackprof, mem_delta, allocations, score)
       @uri = uri
       @metric_name = metric_name
       @total_call_time = total_call_time
@@ -27,11 +27,14 @@ module ScoutApm
       @time = time
       @prof = ScoutApm::StackprofTreeCollapser.new(raw_stackprof).call
       @raw_prof = raw_stackprof # Send whole data up to server
       @mem_delta = mem_delta
       @allocations = allocations
       @seconds_since_startup = (Time.now - ScoutApm::Agent.instance.process_start_time)
       @hostname = ScoutApm::Environment.instance.hostname
-      ScoutApm::Agent.instance.logger.debug { "Slow Request [#{uri}] - Call Time: #{total_call_time} Mem Delta: #{mem_delta}"}
+      @score = score
+      ScoutApm::Agent.instance.logger.debug { "Slow Request [#{uri}] - Call Time: #{total_call_time} Mem Delta: #{mem_delta} Score: #{score}"}
     end
     # Used to remove metrics when the payload will be too large.
@@ -45,12 +48,28 @@ module ScoutApm
     end
     def as_json
-      json_attributes = [:key, :time, :total_call_time, :uri, [:context, :context_hash], :prof, :mem_delta, :allocations, :seconds_since_startup, :hostname]
+      json_attributes = [:key, :time, :total_call_time, :uri, [:context, :context_hash], :score, :prof, :mem_delta, :allocations, :seconds_since_startup, :hostname]
       ScoutApm::AttributeArranger.call(self, json_attributes)
     end
     def context_hash
       context.to_hash
     end
+    ########################
+    # Scorable interface
+    #
+    # Needed so we can merge ScoredItemSet instances
+    def call
+      self
+    end
+    def name
+      metric_name
+    end
+    def score
+      @score
+    end
   end
 end

data/lib/scout_apm/store.rb CHANGED

@@ -22,7 +22,7 @@ module ScoutApm
     # Save newly collected metrics
     def track!(metrics, options={})
       @mutex.synchronize {
-        current_period.merge_metrics!(metrics)
+        current_period.absorb_metrics!(metrics)
       }
     end
@@ -107,11 +107,12 @@ module ScoutApm
   # One period of Storage. Typically 1 minute
   class StoreReportingPeriod
-    # A SlowItemSet to store slow transactions in
-    attr_reader :slow_transactions
-    # A SlowItemSet to store slow jobs in
-    attr_reader :slow_jobs
+    # A ScoredItemSet holding the "best" traces for the period
+    attr_reader :request_traces
+    # A ScoredItemSet holding the "best" traces for the period
+    attr_reader :job_traces
     # A StoreReportingPeriodTimestamp representing the time that this
     # collection of metrics is for
@@ -122,31 +123,50 @@ module ScoutApm
     def initialize(timestamp)
       @timestamp = timestamp
-      @slow_transactions = SlowItemSet.new
-      @slow_jobs = SlowItemSet.new
+      @request_traces = ScoredItemSet.new
+      @job_traces = ScoredItemSet.new
       @metric_set = MetricSet.new
       @jobs = Hash.new
     end
+    # Merges another StoreReportingPeriod into this one
+    def merge(other)
+      self.
+        merge_metrics!(other.metric_set).
+        merge_slow_transactions!(other.slow_transactions_payload).
+        merge_jobs!(other.jobs).
+        merge_slow_jobs!(other.slow_jobs_payload)
+      self
+    end
     #################################
     # Add metrics as they are recorded
     #################################
-    def merge_metrics!(metrics)
+    # For absorbing an array of metric {Meta => Stat} records
+    def absorb_metrics!(metrics)
       metric_set.absorb_all(metrics)
       self
     end
+    # For merging when you have another metric_set object
+    # Makes sure that you don't duplicate error count records
+    def merge_metrics!(other_metric_set)
+      metric_set.combine!(other_metric_set)
+      self
+    end
     def merge_slow_transactions!(new_transactions)
       Array(new_transactions).each do |one_transaction|
-        slow_transactions << one_transaction
+        request_traces << one_transaction
       end
       self
     end
     def merge_jobs!(jobs)
-      jobs.each do |job|
+      Array(jobs).each do |job|
         if @jobs.has_key?(job)
           @jobs[job].combine!(job)
         else
@@ -159,8 +179,10 @@ module ScoutApm
     def merge_slow_jobs!(new_jobs)
       Array(new_jobs).each do |job|
-        slow_jobs << job
+        job_traces << job
       end
+      self
     end
     #################################
@@ -171,7 +193,7 @@ module ScoutApm
     end
     def slow_transactions_payload
-      slow_transactions.to_a
+      request_traces.to_a
     end
     def jobs
@@ -179,7 +201,7 @@ module ScoutApm
     end
     def slow_jobs_payload
-      slow_jobs.to_a
+      job_traces.to_a
     end
     #################################

data/lib/scout_apm/tracked_request.rb CHANGED

@@ -39,6 +39,10 @@ module ScoutApm
     # with same names across multiple types.
     attr_accessor :call_counts
+    # if there's an instant_key, pass the transaction trace on for immediate reporting (in addition to the usual background aggregation)
+    # this is set in the controller instumentation (ActionControllerRails3Rails4 according)
+    attr_accessor :instant_key
     BACKTRACE_THRESHOLD = 0.5 # the minimum threshold in seconds to record the backtrace for a metric.
     def initialize
@@ -50,6 +54,7 @@ module ScoutApm
       @root_layer = nil
       @stackprof = nil
       @error = false
+      @instant_key = nil
       @mem_start = mem_usage
     end
@@ -67,6 +72,17 @@ module ScoutApm
       return if ignoring_children?
       layer = @layers.pop
+      # Safeguard against a mismatch in the layer tracking in an instrument.
+      # This class works under the assumption that start & stop layers are
+      # lined up correctly. If stop_layer gets called twice, when it should
+      # only have been called once you'll end up with this error.
+      if layer.nil?
+        ScoutApm::Agent.instance.logger.warn("Error stopping layer, was nil. Root Layer: #{@root_layer.inspect}")
+        stop_request
+        return
+      end
       layer.record_stop_time!
       layer.record_allocations!
@@ -87,7 +103,7 @@ module ScoutApm
     # instrumentation early, and gradually learn more about the request that
     # actually happened as we go (for instance, the # of records found, or the
     # actual SQL generated).
-    #
+    #
     # Returns nil in the case there is no current layer. That would be normal
     # for a completed TrackedRequest
     def current_layer
@@ -202,6 +218,10 @@ module ScoutApm
       request_type == "web"
     end
+    def instant?
+      instant_key
+    end
     ###################################
     # Persist the Request
     ###################################
@@ -211,27 +231,59 @@ module ScoutApm
     def record!
       @recorded = true
+      # Update immediate and long-term histograms for both job and web requests
+      if unique_name != :unknown
+        ScoutApm::Agent.instance.request_histograms.add(unique_name, root_layer.total_call_time)
+        ScoutApm::Agent.instance.request_histograms_resettable.add(unique_name, root_layer.total_call_time)
+      end
       metrics = LayerConverters::MetricConverter.new(self).call
       ScoutApm::Agent.instance.store.track!(metrics)
-      slow, slow_metrics = LayerConverters::SlowRequestConverter.new(self).call
-      ScoutApm::Agent.instance.store.track_slow_transaction!(slow)
-      ScoutApm::Agent.instance.store.track!(slow_metrics)
       error_metrics = LayerConverters::ErrorConverter.new(self).call
       ScoutApm::Agent.instance.store.track!(error_metrics)
-      queue_time_metrics = LayerConverters::RequestQueueTimeConverter.new(self).call
-      ScoutApm::Agent.instance.store.track!(queue_time_metrics)
+      allocation_metrics = LayerConverters::AllocationMetricConverter.new(self).call
+      ScoutApm::Agent.instance.store.track!(allocation_metrics)
+      if web?
+        # Don't #call this - that's the job of the ScoredItemSet later.
+        slow_converter = LayerConverters::SlowRequestConverter.new(self)
+        ScoutApm::Agent.instance.store.track_slow_transaction!(slow_converter)
+        queue_time_metrics = LayerConverters::RequestQueueTimeConverter.new(self).call
+        ScoutApm::Agent.instance.store.track!(queue_time_metrics)
-      job = LayerConverters::JobConverter.new(self).call
-      ScoutApm::Agent.instance.store.track_job!(job)
+        # If there's an instant_key, it means we need to report this right away
+        if instant?
+          trace = slow_converter.call
+          ScoutApm::InstantReporting.new(trace, instant_key).call()
+        end
+      end
-      slow_job = LayerConverters::SlowJobConverter.new(self).call
-      ScoutApm::Agent.instance.store.track_slow_job!(slow_job)
+      if job?
+        job_metrics = LayerConverters::JobConverter.new(self).call
+        ScoutApm::Agent.instance.store.track_job!(job_metrics)
+        job_converter = LayerConverters::SlowJobConverter.new(self)
+        ScoutApm::Agent.instance.store.track_slow_job!(job_converter)
+      end
       allocation_metrics = LayerConverters::AllocationMetricConverter.new(self).call
       ScoutApm::Agent.instance.store.track!(allocation_metrics)
+    end
+    # Only call this after the request is complete
+    def unique_name
+      @unique_name ||= begin
+                         scope_layer = LayerConverters::ConverterBase.new(self).scope_layer
+                         if scope_layer
+                           scope_layer.legacy_metric_name
+                         else
+                           :unknown
+                         end
+                       end
     end
     # Have we already persisted this request?

data/lib/scout_apm/utils/backtrace_parser.rb CHANGED

@@ -6,21 +6,21 @@ module ScoutApm
   module Utils
     class BacktraceParser
+      APP_FRAMES = 3 # will return up to 3 frames from the app stack.
       def initialize(call_stack)
         @call_stack = call_stack
         # We can't use a constant as it'd be too early to fetch environment info
         @@app_dir_regex ||= /\A(#{ScoutApm::Environment.instance.root.to_s.gsub('/','\/')}\/)(app\/(.+))/.freeze
       end
-      # Given a call stack Array, grabs the first call within the application root directory.
+      # Given a call stack Array, grabs the first +APP_FRAMES+ callers within the application root directory.
       def call
-        # We used to return an array of up to 5 elements...this will return a single element-array for backwards compatibility.
-        # Only the first element is used in Github code display.
         stack = []
         @call_stack.each_with_index do |c,i|
           if m = c.match(@@app_dir_regex)
             stack << m[2]
-            break
+            break if stack.size == APP_FRAMES
           end
         end
         stack