RubyGems - scout_apm - Versions diffs - 1.5.5 → 1.6.0 - Mend

scout_apm 1.5.5 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

checksums.yaml +4 -4
data/CHANGELOG.markdown +8 -0
data/lib/scout_apm.rb +3 -0
data/lib/scout_apm/agent.rb +23 -25
data/lib/scout_apm/agent/reporting.rb +8 -3
data/lib/scout_apm/attribute_arranger.rb +4 -0
data/lib/scout_apm/bucket_name_splitter.rb +3 -3
data/lib/scout_apm/config.rb +4 -2
data/lib/scout_apm/histogram.rb +20 -0
data/lib/scout_apm/instruments/percentile_sampler.rb +37 -0
data/lib/scout_apm/instruments/process/process_cpu.rb +12 -0
data/lib/scout_apm/instruments/process/process_memory.rb +12 -0
data/lib/scout_apm/layer_converters/converter_base.rb +6 -4
data/lib/scout_apm/layer_converters/slow_job_converter.rb +21 -13
data/lib/scout_apm/layer_converters/slow_request_converter.rb +28 -22
data/lib/scout_apm/metric_meta.rb +5 -1
data/lib/scout_apm/metric_set.rb +1 -1
data/lib/scout_apm/reporter.rb +3 -1
data/lib/scout_apm/request_histograms.rb +46 -0
data/lib/scout_apm/scored_item_set.rb +79 -0
data/lib/scout_apm/serializers/slow_jobs_serializer_to_json.rb +2 -0
data/lib/scout_apm/slow_job_policy.rb +89 -19
data/lib/scout_apm/slow_job_record.rb +20 -1
data/lib/scout_apm/slow_request_policy.rb +80 -12
data/lib/scout_apm/slow_transaction.rb +19 -2
data/lib/scout_apm/store.rb +45 -15
data/lib/scout_apm/tracked_request.rb +33 -10
data/lib/scout_apm/version.rb +1 -1
data/test/test_helper.rb +4 -3
data/test/unit/layaway_test.rb +5 -8
data/test/unit/scored_item_set_test.rb +65 -0
data/test/unit/serializers/payload_serializer_test.rb +2 -1
data/test/unit/slow_item_set_test.rb +2 -1
data/test/unit/slow_request_policy_test.rb +42 -0
metadata +9 -2

data/lib/scout_apm/metric_meta.rb CHANGED Viewed

@@ -17,7 +17,11 @@ class MetricMeta
   # Unsure if type or bucket is a better name.
   def type
-    bucket
+    bucket_type
+  end
+  def name
+    bucket_name
   end
   # A key metric is the "core" of a request - either the Rails controller reached, or the background Job executed

data/lib/scout_apm/metric_set.rb CHANGED Viewed

@@ -2,7 +2,7 @@ module ScoutApm
   class MetricSet
     # We can't aggregate CPU, Memory, Capacity, or Controller, so pass through these metrics directly
     # TODO: Figure out a way to not have this duplicate what's in Samplers, and also on server's ingest
-    PASSTHROUGH_METRICS = ["CPU", "Memory", "Instance", "Controller", "SlowTransaction"]
+    PASSTHROUGH_METRICS = ["CPU", "Memory", "Instance", "Controller", "SlowTransaction", "Percentile", "Job"]
     attr_reader :metrics

data/lib/scout_apm/reporter.rb CHANGED Viewed

@@ -17,8 +17,10 @@ module ScoutApm
     # TODO: Parse & return a real response object, not the HTTP Response object
     def report(payload, headers = {})
-      Array(config.value('host')).each do |host|
+      # Some posts (typically ones under development) bypass the ingestion pipeline and go directly to the webserver. They use direct_host instead of host
+      hosts = [:deploy_hook, :instant_trace].include?(type) ? config.value('direct_host') : config.value('host')
+      Array(hosts).each do |host|
         full_uri = uri(host)
         response = post(full_uri, payload, headers)
         unless response && response.is_a?(Net::HTTPSuccess)

data/lib/scout_apm/request_histograms.rb ADDED Viewed

@@ -0,0 +1,46 @@
+module ScoutApm
+  class RequestHistograms
+    DEFAULT_HISTOGRAM_SIZE = 50
+    # Private Accessor:
+    # A hash of Endpoint Name to an approximate histogram
+    #
+    # Each time a new request is requested to see if it's slow or not, we
+    # should insert it into the histogram, and get the approximate percentile
+    # of that time
+    attr_reader :histograms
+    private :histograms
+    attr_reader :histogram_size
+    def initialize(histogram_size = DEFAULT_HISTOGRAM_SIZE)
+      @histogram_size = histogram_size
+      initialize_histograms_hash
+    end
+    def each_name
+      @histograms.keys.each { |n| yield n }
+    end
+    def add(item, value)
+      @histograms[item].add(value)
+    end
+    def approximate_quantile_of_value(item, value)
+      @histograms[item].approximate_quantile_of_value(value)
+    end
+    def quantile(item, q)
+      @histograms[item].quantile(q)
+    end
+    # Wipes all histograms, setting them back to empty
+    def reset_all!
+      initialize_histograms_hash
+    end
+    def initialize_histograms_hash
+      @histograms = Hash.new { |h, k| h[k] = NumericHistogram.new(histogram_size) }
+    end
+  end
+end

data/lib/scout_apm/scored_item_set.rb ADDED Viewed

@@ -0,0 +1,79 @@
+# Attempts to keep the highest score.
+#
+# Each item must respond to:
+#   #call to get the storable item
+#   #name to get a unique identifier of the storable
+#   #score to get a numeric score, where higher is better
+module ScoutApm
+  class ScoredItemSet
+    include Enumerable
+    # A number larger than any score we will actually get.
+    ARBITRARILY_LARGE = 100000000
+    # Without otherwise saying, default the size to this
+    DEFAULT_MAX_SIZE = 10
+    attr_reader :max_size
+    attr_reader :items
+    def initialize(max_size = DEFAULT_MAX_SIZE)
+      @items = {}
+      @max_size = max_size
+    end
+    def each
+      items.each do |(_, (_, item))|
+        yield item
+      end
+    end
+    # This function is a large if statement, with a few branches. See inline comments for each branch.
+    def <<(new_item)
+      return if new_item.name == :unknown
+      # If we have this item in the hash already, compare the new & old ones, and store
+      # the new one only if it's higher score.
+      if items.has_key?(new_item.name)
+        if new_item.score > items[new_item.name].first
+          store!(new_item)
+        end
+      # If the set is full, then we have to see if we evict anything to store
+      # this one
+      elsif full?
+        smallest_name, smallest_score = items.inject([nil, ARBITRARILY_LARGE]) do |(memo_name, memo_score), (name, (stored_score, _))|
+          if stored_score < memo_score
+            [name, stored_score]
+          else
+            [memo_name, memo_score]
+          end
+        end
+        if smallest_score < new_item.score
+          items.delete(smallest_name)
+          store!(new_item)
+        end
+      # Set isn't full, and we've not seen this new_item, so go ahead and store it.
+      else
+        store!(new_item)
+      end
+    end
+    private
+    def full?
+      items.size >= max_size
+    end
+    def store!(new_item)
+      if !new_item.name.nil? # Never store a nil name.
+        items[new_item.name] = [new_item.score, new_item.call]
+      end
+    end
+  end
+end

data/lib/scout_apm/serializers/slow_jobs_serializer_to_json.rb CHANGED Viewed

@@ -20,6 +20,8 @@ module ScoutApm
             "metrics" => MetricsToJsonSerializer.new(job.metrics).as_json, # New style of metrics
             "context" => job.context.to_hash,
+            "score" => job.score,
           }
         end
       end

data/lib/scout_apm/slow_job_policy.rb CHANGED Viewed

@@ -1,29 +1,99 @@
-# Create one of these at startup time, and ask it if a certain worker's
-# processing time is slow enough for us to collect a slow trace.
-#
-# Keeps track of a histogram of times for each worker class (spearately), and
-# uses a percentile of normal to mark individual runs as "slow".
-#
-# This assumes that all worker calls will be requested once to `slow?`, so that
-# the data can be stored
+# Long running class that determines if, and in how much detail a potentially
+# slow job should be recorded in
 module ScoutApm
   class SlowJobPolicy
-    DEFAULT_HISTOGRAM_SIZE = 50
+    CAPTURE_TYPES = [
+      CAPTURE_DETAIL  = "capture_detail",
+      CAPTURE_NONE    = "capture_none",
+    ]
+    # Adjust speed points. See the function
+    POINT_MULTIPLIER_SPEED = 0.25
+    # For each minute we haven't seen an endpoint
+    POINT_MULTIPLIER_AGE = 0.25
+    # Outliers are worth up to "1000ms" of weight
+    POINT_MULTIPLIER_PERCENTILE = 1.0
+    # A hash of Job Names to the last time we stored a slow trace for it.
+    #
+    # Defaults to a start time that is pretty close to application boot time.
+    # So the "age" of an endpoint we've never seen is the time the application
+    # has been running.
+    attr_reader :last_seen
-    QUANTILE = 95
-    def initialize(histogram_size = DEFAULT_HISTOGRAM_SIZE)
-      @histograms = Hash.new { |h, k| h[k] = NumericHistogram.new(histogram_size) }
+    def initialize
+      zero_time = Time.now
+      @last_seen = Hash.new { |h, k| h[k] = zero_time }
     end
-    # worker: just the worker class name. "PasswordResetJob" or similar
-    # total_time: runtime of the job in seconds
-    # returns true if this request should be stored in higher trace detail, false otherwise
-    def slow?(worker, total_time)
-      @histograms[worker].add(total_time)
-      return false if @histograms[worker].total == 1 # First call is never slow
+    def stored!(request)
+      last_seen[unique_name_for(request)] = Time.now
+    end
+    # Determine if this job trace should be fully analyzed by scoring it
+    # across several metrics, and then determining if that's good enough to
+    # make it into this minute's payload.
+    #
+    # Due to the combining nature of the agent & layaway file, there's no
+    # guarantee that a high scoring local champion will still be a winner when
+    # they go up to "regionals" and are compared against the other processes
+    # running on a node.
+    def score(request)
+      unique_name = request.unique_name
+      if unique_name == :unknown
+        return -1 # A negative score, should never be good enough to store.
+      end
+      total_time = request.root_layer.total_call_time
+      # How long has it been since we've seen this?
+      age = Time.now - last_seen[unique_name]
+      # What approximate percentile was this request?
+      percentile = ScoutApm::Agent.instance.request_histograms.approximate_quantile_of_value(unique_name, total_time)
+      return speed_points(total_time) + percentile_points(percentile) + age_points(age)
+    end
+    private
+    def unique_name_for(request)
+      scope_layer = LayerConverters::ConverterBase.new(request).scope_layer
+      if scope_layer
+        scope_layer.legacy_metric_name
+      else
+        :unknown
+      end
+    end
+    # Time in seconds
+    # Logarithm keeps huge times from swamping the other metrics.
+    # 1+ is necessary to keep the log function in positive territory.
+    def speed_points(time)
+      Math.log(1 + time) * POINT_MULTIPLIER_SPEED
+    end
+    def percentile_points(percentile)
+      if percentile < 40
+        0.4 # Don't put much emphasis on capturing low percentiles.
+      elsif percentile < 60
+        1.4 # Highest here to get mean traces
+      elsif percentile < 90
+        0.7 # Between 60 & 90% is fine.
+      elsif percentile >= 90
+        1.4 # Highest here to get 90+%ile traces
+      else
+        # impossible.
+        percentile
+      end
+    end
-      total_time >= @histograms[worker].quantile(QUANTILE)
+    def age_points(age)
+      age / 60.0 * POINT_MULTIPLIER_AGE
     end
   end
 end

data/lib/scout_apm/slow_job_record.rb CHANGED Viewed

@@ -15,7 +15,9 @@ module ScoutApm
     attr_reader :metrics
-    def initialize(queue_name, job_name, time, total_time, exclusive_time, context, metrics)
+    attr_reader :score
+    def initialize(queue_name, job_name, time, total_time, exclusive_time, context, metrics, score)
       @queue_name = queue_name
       @job_name = job_name
       @time = time
@@ -23,11 +25,28 @@ module ScoutApm
       @exclusive_time = exclusive_time
       @context = context
       @metrics = metrics
+      @score = score
     end
     def metric_name
       "Job/#{queue_name}/#{job_name}"
     end
+    ########################
+    # Scorable interface
+    #
+    # Needed so we can merge ScoredItemSet instances
+    def call
+      self
+    end
+    def name
+      metric_name
+    end
+    def score
+      @score
+    end
   end
 end

data/lib/scout_apm/slow_request_policy.rb CHANGED Viewed

@@ -1,8 +1,5 @@
 # Long running class that determines if, and in how much detail a potentially
 # slow transaction should be recorded in
-#
-# Rules:
-#   - Runtime must be slower than a threshold
 module ScoutApm
   class SlowRequestPolicy
@@ -11,21 +8,92 @@ module ScoutApm
       CAPTURE_NONE    = "capture_none",
     ]
-    # It's not slow unless it's at least this slow
-    SLOW_REQUEST_TIME_THRESHOLD = 2.0 # seconds
+    # Adjust speed points. See the function
+    POINT_MULTIPLIER_SPEED = 0.25
-    def capture_type(time)
-      if !slow_enough?(time)
-        CAPTURE_NONE
-      else
-        CAPTURE_DETAIL
+    # For each minute we haven't seen an endpoint
+    POINT_MULTIPLIER_AGE = 0.25
+    # Outliers are worth up to "1000ms" of weight
+    POINT_MULTIPLIER_PERCENTILE = 1.0
+    # A hash of Endpoint Name to the last time we stored a slow transaction for it.
+    #
+    # Defaults to a start time that is pretty close to application boot time.
+    # So the "age" of an endpoint we've never seen is the time the application
+    # has been running.
+    attr_reader :last_seen
+    def initialize
+      zero_time = Time.now
+      @last_seen = Hash.new { |h, k| h[k] = zero_time }
+    end
+    def stored!(request)
+      last_seen[unique_name_for(request)] = Time.now
+    end
+    # Determine if this request trace should be fully analyzed by scoring it
+    # across several metrics, and then determining if that's good enough to
+    # make it into this minute's payload.
+    #
+    # Due to the combining nature of the agent & layaway file, there's no
+    # guarantee that a high scoring local champion will still be a winner when
+    # they go up to "regionals" and are compared against the other processes
+    # running on a node.
+    def score(request)
+      unique_name = request.unique_name
+      if unique_name == :unknown
+        return -1 # A negative score, should never be good enough to store.
       end
+      total_time = request.root_layer.total_call_time
+      # How long has it been since we've seen this?
+      age = Time.now - last_seen[unique_name]
+      # What approximate percentile was this request?
+      percentile = ScoutApm::Agent.instance.request_histograms.approximate_quantile_of_value(unique_name, total_time)
+      return speed_points(total_time) + percentile_points(percentile) + age_points(age)
     end
     private
-    def slow_enough?(time)
-      time > SLOW_REQUEST_TIME_THRESHOLD
+    def unique_name_for(request)
+      scope_layer = LayerConverters::ConverterBase.new(request).scope_layer
+      if scope_layer
+        scope_layer.legacy_metric_name
+      else
+        :unknown
+      end
+    end
+    # Time in seconds
+    # Logarithm keeps huge times from swamping the other metrics.
+    # 1+ is necessary to keep the log function in positive territory.
+    def speed_points(time)
+      Math.log(1 + time) * POINT_MULTIPLIER_SPEED
+    end
+    def percentile_points(percentile)
+      if percentile < 40
+        0.4 # Don't put much emphasis on capturing low percentiles.
+      elsif percentile < 60
+        1.4 # Highest here to get mean traces
+      elsif percentile < 90
+        0.7 # Between 60 & 90% is fine.
+      elsif percentile >= 90
+        1.4 # Highest here to get 90+%ile traces
+      else
+        # impossible.
+        percentile
+      end
+    end
+    def age_points(age)
+      age / 60.0 * POINT_MULTIPLIER_AGE
     end
   end
 end

data/lib/scout_apm/slow_transaction.rb CHANGED Viewed

@@ -12,7 +12,7 @@ module ScoutApm
     attr_reader :prof
     attr_reader :raw_prof
-    def initialize(uri, metric_name, total_call_time, metrics, context, time, raw_stackprof)
+    def initialize(uri, metric_name, total_call_time, metrics, context, time, raw_stackprof, score)
       @uri = uri
       @metric_name = metric_name
       @total_call_time = total_call_time
@@ -21,6 +21,7 @@ module ScoutApm
       @time = time
       @prof = ScoutApm::StackprofTreeCollapser.new(raw_stackprof).call
       @raw_prof = raw_stackprof # Send whole data up to server
+      @score = score
     end
     # Used to remove metrics when the payload will be too large.
@@ -34,12 +35,28 @@ module ScoutApm
     end
     def as_json
-      json_attributes = [:key, :time, :total_call_time, :uri, [:context, :context_hash], :prof]
+      json_attributes = [:key, :time, :total_call_time, :uri, [:context, :context_hash], :prof, :score]
       ScoutApm::AttributeArranger.call(self, json_attributes)
     end
     def context_hash
       context.to_hash
     end
+    ########################
+    # Scorable interface
+    #
+    # Needed so we can merge ScoredItemSet instances
+    def call
+      self
+    end
+    def name
+      metric_name
+    end
+    def score
+      @score
+    end
   end
 end