RubyGems - scout_apm - Versions diffs - 2.0.0.pre → 2.0.0.pre2 - Mend

scout_apm 2.0.0.pre → 2.0.0.pre2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

checksums.yaml +4 -4
data/.gitignore +1 -0
data/CHANGELOG.markdown +22 -5
data/Rakefile +5 -0
data/lib/scout_apm.rb +4 -0
data/lib/scout_apm/agent.rb +22 -8
data/lib/scout_apm/agent/reporting.rb +8 -3
data/lib/scout_apm/attribute_arranger.rb +4 -0
data/lib/scout_apm/bucket_name_splitter.rb +3 -3
data/lib/scout_apm/config.rb +5 -2
data/lib/scout_apm/histogram.rb +20 -0
data/lib/scout_apm/instant_reporting.rb +40 -0
data/lib/scout_apm/instruments/action_controller_rails_3_rails4.rb +11 -1
data/lib/scout_apm/instruments/percentile_sampler.rb +38 -0
data/lib/scout_apm/layaway.rb +1 -4
data/lib/scout_apm/layaway_file.rb +26 -2
data/lib/scout_apm/layer.rb +1 -1
data/lib/scout_apm/layer_converters/converter_base.rb +6 -4
data/lib/scout_apm/layer_converters/slow_job_converter.rb +21 -13
data/lib/scout_apm/layer_converters/slow_request_converter.rb +37 -24
data/lib/scout_apm/metric_meta.rb +5 -1
data/lib/scout_apm/metric_set.rb +15 -6
data/lib/scout_apm/reporter.rb +9 -3
data/lib/scout_apm/request_histograms.rb +46 -0
data/lib/scout_apm/scored_item_set.rb +79 -0
data/lib/scout_apm/serializers/payload_serializer_to_json.rb +2 -0
data/lib/scout_apm/serializers/slow_jobs_serializer_to_json.rb +2 -0
data/lib/scout_apm/slow_job_policy.rb +89 -19
data/lib/scout_apm/slow_job_record.rb +18 -1
data/lib/scout_apm/slow_request_policy.rb +80 -12
data/lib/scout_apm/slow_transaction.rb +22 -3
data/lib/scout_apm/store.rb +35 -13
data/lib/scout_apm/tracked_request.rb +63 -11
data/lib/scout_apm/utils/backtrace_parser.rb +4 -4
data/lib/scout_apm/utils/sql_sanitizer.rb +1 -1
data/lib/scout_apm/utils/sql_sanitizer_regex.rb +2 -2
data/lib/scout_apm/utils/sql_sanitizer_regex_1_8_7.rb +2 -2
data/lib/scout_apm/version.rb +1 -1
data/scout_apm.gemspec +1 -0
data/test/test_helper.rb +4 -3
data/test/unit/layaway_test.rb +5 -8
data/test/unit/metric_set_test.rb +101 -0
data/test/unit/scored_item_set_test.rb +65 -0
data/test/unit/serializers/payload_serializer_test.rb +2 -1
data/test/unit/slow_item_set_test.rb +2 -1
data/test/unit/slow_request_policy_test.rb +42 -0
data/test/unit/sql_sanitizer_test.rb +6 -0
metadata +28 -3

data/lib/scout_apm/layer_converters/converter_base.rb CHANGED

@@ -19,10 +19,12 @@ module ScoutApm
       #       render :update
       #     end
       def scope_layer
-        @scope_layer ||= walker.walk do |layer|
-          if layer.type == "Controller"
-            break layer
-          end
+        @scope_layer ||= find_first_layer_of_type("Controller") || find_first_layer_of_type("Job")
+      end
+      def find_first_layer_of_type(layer_type)
+        walker.walk do |layer|
+          return layer if layer.type == layer_type
         end
       end
     end

data/lib/scout_apm/layer_converters/slow_job_converter.rb CHANGED

@@ -4,15 +4,29 @@ module ScoutApm
       def initialize(*)
         @backtraces = []
         super
+        # After call to super, so @request is populated
+        @points = if request.job?
+                    ScoutApm::Agent.instance.slow_job_policy.score(request)
+                  else
+                    -1
+                  end
       end
-      def call
-        return unless request.job?
+      def name
+        request.unique_name
+      end
+      def score
+        @points
+      end
-        job_name = [queue_layer.name, job_layer.name]
+      def call
+        return nil unless request.job?
+        return nil unless queue_layer
+        return nil unless job_layer
-        slow_enough = ScoutApm::Agent.instance.slow_job_policy.slow?(job_name, root_layer.total_call_time)
-        return unless slow_enough
+        ScoutApm::Agent.instance.slow_job_policy.stored!(request)
         # record the change in memory usage
         mem_delta = ScoutApm::Instruments::Process::ProcessMemory.rss_to_mb(request.capture_mem_delta!)
@@ -32,8 +46,8 @@ module ScoutApm
           timing_metrics,
           allocation_metrics,
           mem_delta,
-          job_layer.total_allocations
-        )
+          job_layer.total_allocations,
+          score)
       end
       def queue_layer
@@ -44,12 +58,6 @@ module ScoutApm
         @job_layer ||= find_first_layer_of_type("Job")
       end
-      def find_first_layer_of_type(layer_type)
-        walker.walk do |layer|
-          return layer if layer.type == layer_type
-        end
-      end
       def create_metrics
         metric_hash = Hash.new
         allocation_metric_hash = Hash.new

data/lib/scout_apm/layer_converters/slow_request_converter.rb CHANGED

@@ -4,25 +4,34 @@ module ScoutApm
       def initialize(*)
         @backtraces = [] # An Array of MetricMetas that have a backtrace
         super
+        # After call to super, so @request is populated
+        @points = if request.web?
+                    ScoutApm::Agent.instance.slow_request_policy.score(request)
+                  else
+                    -1
+                  end
+      end
+      def name
+        request.unique_name
+      end
+      def score
+        @points
       end
+      # Unconditionally attempts to convert this into a SlowTransaction object.
+      # Can return nil if the request didn't have any scope_layer.
       def call
         scope = scope_layer
-        return [nil, {}] unless scope
+        return nil unless scope
-        policy = ScoutApm::Agent.instance.slow_request_policy.capture_type(root_layer.total_call_time)
-        if policy == ScoutApm::SlowRequestPolicy::CAPTURE_NONE
-          return [nil, {}]
-        end
+        ScoutApm::Agent.instance.slow_request_policy.stored!(request)
         # record the change in memory usage
         mem_delta = ScoutApm::Instruments::Process::ProcessMemory.rss_to_mb(@request.capture_mem_delta!)
-        # increment the slow transaction count if this is a slow transaction.
-        meta = MetricMeta.new("SlowTransaction/#{scope.legacy_metric_name}")
-        stat = MetricStats.new
-        stat.update!(1)
         uri = request.annotations[:uri] || ""
         timing_metrics, allocation_metrics = create_metrics
@@ -30,23 +39,27 @@ module ScoutApm
           allocation_metrics = {}
         end
+        ScoutApm::Agent.instance.config.value("ignore_traces").each do |pattern|
+          if /#{pattern}/ =~ uri
+            ScoutApm::Agent.instance.logger.debug("Skipped recording a trace for #{uri} due to `ignore_traces` pattern: #{pattern}")
+            return nil
+          end
+        end
         # Disable stackprof output for now
         stackprof = [] # request.stackprof
-        [
-          SlowTransaction.new(uri,
-                              scope.legacy_metric_name,
-                              root_layer.total_call_time,
-                              timing_metrics,
-                              allocation_metrics,
-                              request.context,
-                              root_layer.stop_time,
-                              stackprof,
-                              mem_delta,
-                              root_layer.total_allocations
-                              ),
-          { meta => stat }
-        ]
+        SlowTransaction.new(uri,
+                            scope.legacy_metric_name,
+                            root_layer.total_call_time,
+                            timing_metrics,
+                            allocation_metrics,
+                            request.context,
+                            root_layer.stop_time,
+                            stackprof,
+                            mem_delta,
+                            root_layer.total_allocations,
+                            @points)
       end
       # Iterates over the TrackedRequest's MetricMetas that have backtraces and attaches each to correct MetricMeta in the Metric Hash.

data/lib/scout_apm/metric_meta.rb CHANGED

@@ -17,7 +17,11 @@ class MetricMeta
   # Unsure if type or bucket is a better name.
   def type
-    bucket
+    bucket_type
+  end
+  def name
+    bucket_name
   end
   # A key metric is the "core" of a request - either the Rails controller reached, or the background Job executed

data/lib/scout_apm/metric_set.rb CHANGED

@@ -2,7 +2,7 @@ module ScoutApm
   class MetricSet
     # We can't aggregate CPU, Memory, Capacity, or Controller, so pass through these metrics directly
     # TODO: Figure out a way to not have this duplicate what's in Samplers, and also on server's ingest
-    PASSTHROUGH_METRICS = ["CPU", "Memory", "Instance", "Controller", "SlowTransaction"]
+    PASSTHROUGH_METRICS = ["CPU", "Memory", "Instance", "Controller", "SlowTransaction", "Percentile", "Job"]
     attr_reader :metrics
@@ -23,11 +23,15 @@ module ScoutApm
         @metrics[meta].combine!(stat)
       elsif meta.type == "Errors" # Sadly special cased, we want both raw and aggregate values
-        @metrics[meta] ||= MetricStats.new
-        @metrics[meta].combine!(stat)
-        agg_meta = MetricMeta.new("Errors/Request", :scope => meta.scope)
-        @metrics[agg_meta] ||= MetricStats.new
-        @metrics[agg_meta].combine!(stat)
+        # When combining MetricSets between different
+          @metrics[meta] ||= MetricStats.new
+          @metrics[meta].combine!(stat)
+        if !@combine_in_progress
+          agg_meta = MetricMeta.new("Errors/Request", :scope => meta.scope)
+          @metrics[agg_meta] ||= MetricStats.new
+          @metrics[agg_meta].combine!(stat)
+        end
       else # Combine down to a single /all key
         agg_meta = MetricMeta.new("#{meta.type}/all", :scope => meta.scope)
@@ -36,8 +40,13 @@ module ScoutApm
       end
     end
+    # Sets a combine_in_progress flag to prevent double-counting Error metrics.
+    # Without it, the Errors/Request number would be increasingly off as
+    # metric_sets get merged in.
     def combine!(other)
+      @combine_in_progress = true
       absorb_all(other.metrics)
+      @combine_in_progress = false
       self
     end
   end

data/lib/scout_apm/reporter.rb CHANGED

@@ -8,17 +8,21 @@ module ScoutApm
     attr_reader :config
     attr_reader :logger
     attr_reader :type
+    attr_reader :instant_key
-    def initialize(type = :checkin, config=Agent.instance.config, logger=Agent.instance.logger)
+    def initialize(type = :checkin, config=Agent.instance.config, logger=Agent.instance.logger, instant_key=nil)
       @config = config
       @logger = logger
       @type = type
+      @instant_key = instant_key
     end
     # TODO: Parse & return a real response object, not the HTTP Response object
     def report(payload, headers = {})
-      Array(config.value('host')).each do |host|
+      # Some posts (typically ones under development) bypass the ingestion pipeline and go directly to the webserver. They use direct_host instead of host
+      hosts = [:deploy_hook, :instant_trace].include?(type) ? config.value('direct_host') : config.value('host')
+      Array(hosts).each do |host|
         full_uri = uri(host)
         response = post(full_uri, payload, headers)
         unless response && response.is_a?(Net::HTTPSuccess)
@@ -34,7 +38,9 @@ module ScoutApm
       when :app_server_load
         URI.parse("#{host}/apps/app_server_load.scout?key=#{config.value('key')}&name=#{CGI.escape(Environment.instance.application_name)}")
       when :deploy_hook
-        URI.parse("https://apm.scoutapp.com/apps/deploy.scout?key=#{config.value('key')}&name=#{CGI.escape(config.value('name'))}")
+        URI.parse("#{host}/apps/deploy.scout?key=#{config.value('key')}&name=#{CGI.escape(config.value('name'))}")
+      when :instant_trace
+        URI.parse("#{host}/apps/instant_trace.scout?key=#{config.value('key')}&name=#{CGI.escape(config.value('name'))}&instant_key=#{instant_key}")
       end.tap{|u| logger.debug("Posting to #{u.to_s}")}
     end

data/lib/scout_apm/request_histograms.rb ADDED

@@ -0,0 +1,46 @@
+module ScoutApm
+  class RequestHistograms
+    DEFAULT_HISTOGRAM_SIZE = 50
+    # Private Accessor:
+    # A hash of Endpoint Name to an approximate histogram
+    #
+    # Each time a new request is requested to see if it's slow or not, we
+    # should insert it into the histogram, and get the approximate percentile
+    # of that time
+    attr_reader :histograms
+    private :histograms
+    attr_reader :histogram_size
+    def initialize(histogram_size = DEFAULT_HISTOGRAM_SIZE)
+      @histogram_size = histogram_size
+      initialize_histograms_hash
+    end
+    def each_name
+      @histograms.keys.each { |n| yield n }
+    end
+    def add(item, value)
+      @histograms[item].add(value)
+    end
+    def approximate_quantile_of_value(item, value)
+      @histograms[item].approximate_quantile_of_value(value)
+    end
+    def quantile(item, q)
+      @histograms[item].quantile(q)
+    end
+    # Wipes all histograms, setting them back to empty
+    def reset_all!
+      initialize_histograms_hash
+    end
+    def initialize_histograms_hash
+      @histograms = Hash.new { |h, k| h[k] = NumericHistogram.new(histogram_size) }
+    end
+  end
+end

data/lib/scout_apm/scored_item_set.rb ADDED

@@ -0,0 +1,79 @@
+# Attempts to keep the highest score.
+#
+# Each item must respond to:
+#   #call to get the storable item
+#   #name to get a unique identifier of the storable
+#   #score to get a numeric score, where higher is better
+module ScoutApm
+  class ScoredItemSet
+    include Enumerable
+    # A number larger than any score we will actually get.
+    ARBITRARILY_LARGE = 100000000
+    # Without otherwise saying, default the size to this
+    DEFAULT_MAX_SIZE = 10
+    attr_reader :max_size
+    attr_reader :items
+    def initialize(max_size = DEFAULT_MAX_SIZE)
+      @items = {}
+      @max_size = max_size
+    end
+    def each
+      items.each do |(_, (_, item))|
+        yield item
+      end
+    end
+    # This function is a large if statement, with a few branches. See inline comments for each branch.
+    def <<(new_item)
+      return if new_item.name == :unknown
+      # If we have this item in the hash already, compare the new & old ones, and store
+      # the new one only if it's higher score.
+      if items.has_key?(new_item.name)
+        if new_item.score > items[new_item.name].first
+          store!(new_item)
+        end
+      # If the set is full, then we have to see if we evict anything to store
+      # this one
+      elsif full?
+        smallest_name, smallest_score = items.inject([nil, ARBITRARILY_LARGE]) do |(memo_name, memo_score), (name, (stored_score, _))|
+          if stored_score < memo_score
+            [name, stored_score]
+          else
+            [memo_name, memo_score]
+          end
+        end
+        if smallest_score < new_item.score
+          items.delete(smallest_name)
+          store!(new_item)
+        end
+      # Set isn't full, and we've not seen this new_item, so go ahead and store it.
+      else
+        store!(new_item)
+      end
+    end
+    private
+    def full?
+      items.size >= max_size
+    end
+    def store!(new_item)
+      if !new_item.name.nil? # Never store a nil name.
+        items[new_item.name] = [new_item.score, new_item.call]
+      end
+    end
+  end
+end

data/lib/scout_apm/serializers/payload_serializer_to_json.rb CHANGED

@@ -59,6 +59,8 @@ module ScoutApm
             "[#{all_the_elements.join(",")}]"
           when Numeric
             formatee
+          when Time
+            %Q["#{formatee.iso8601}"]
           when nil
             "null"
           else # strings and everything

data/lib/scout_apm/serializers/slow_jobs_serializer_to_json.rb CHANGED

@@ -24,6 +24,8 @@ module ScoutApm
             "metrics" => MetricsToJsonSerializer.new(job.metrics).as_json, # New style of metrics
             "allocation_metrics" => MetricsToJsonSerializer.new(job.allocation_metrics).as_json, # New style of metrics
             "context" => job.context.to_hash,
+            "score" => job.score,
           }
         end
       end

data/lib/scout_apm/slow_job_policy.rb CHANGED

@@ -1,29 +1,99 @@
-# Create one of these at startup time, and ask it if a certain worker's
-# processing time is slow enough for us to collect a slow trace.
-#
-# Keeps track of a histogram of times for each worker class (spearately), and
-# uses a percentile of normal to mark individual runs as "slow".
-#
-# This assumes that all worker calls will be requested once to `slow?`, so that
-# the data can be stored
+# Long running class that determines if, and in how much detail a potentially
+# slow job should be recorded in
 module ScoutApm
   class SlowJobPolicy
-    DEFAULT_HISTOGRAM_SIZE = 50
+    CAPTURE_TYPES = [
+      CAPTURE_DETAIL  = "capture_detail",
+      CAPTURE_NONE    = "capture_none",
+    ]
+    # Adjust speed points. See the function
+    POINT_MULTIPLIER_SPEED = 0.25
+    # For each minute we haven't seen an endpoint
+    POINT_MULTIPLIER_AGE = 0.25
+    # Outliers are worth up to "1000ms" of weight
+    POINT_MULTIPLIER_PERCENTILE = 1.0
+    # A hash of Job Names to the last time we stored a slow trace for it.
+    #
+    # Defaults to a start time that is pretty close to application boot time.
+    # So the "age" of an endpoint we've never seen is the time the application
+    # has been running.
+    attr_reader :last_seen
-    QUANTILE = 95
-    def initialize(histogram_size = DEFAULT_HISTOGRAM_SIZE)
-      @histograms = Hash.new { |h, k| h[k] = NumericHistogram.new(histogram_size) }
+    def initialize
+      zero_time = Time.now
+      @last_seen = Hash.new { |h, k| h[k] = zero_time }
     end
-    # worker: just the worker class name. "PasswordResetJob" or similar
-    # total_time: runtime of the job in seconds
-    # returns true if this request should be stored in higher trace detail, false otherwise
-    def slow?(worker, total_time)
-      @histograms[worker].add(total_time)
-      return false if @histograms[worker].total == 1 # First call is never slow
+    def stored!(request)
+      last_seen[unique_name_for(request)] = Time.now
+    end
+    # Determine if this job trace should be fully analyzed by scoring it
+    # across several metrics, and then determining if that's good enough to
+    # make it into this minute's payload.
+    #
+    # Due to the combining nature of the agent & layaway file, there's no
+    # guarantee that a high scoring local champion will still be a winner when
+    # they go up to "regionals" and are compared against the other processes
+    # running on a node.
+    def score(request)
+      unique_name = request.unique_name
+      if unique_name == :unknown
+        return -1 # A negative score, should never be good enough to store.
+      end
+      total_time = request.root_layer.total_call_time
+      # How long has it been since we've seen this?
+      age = Time.now - last_seen[unique_name]
+      # What approximate percentile was this request?
+      percentile = ScoutApm::Agent.instance.request_histograms.approximate_quantile_of_value(unique_name, total_time)
+      return speed_points(total_time) + percentile_points(percentile) + age_points(age)
+    end
+    private
+    def unique_name_for(request)
+      scope_layer = LayerConverters::ConverterBase.new(request).scope_layer
+      if scope_layer
+        scope_layer.legacy_metric_name
+      else
+        :unknown
+      end
+    end
+    # Time in seconds
+    # Logarithm keeps huge times from swamping the other metrics.
+    # 1+ is necessary to keep the log function in positive territory.
+    def speed_points(time)
+      Math.log(1 + time) * POINT_MULTIPLIER_SPEED
+    end
+    def percentile_points(percentile)
+      if percentile < 40
+        0.4 # Don't put much emphasis on capturing low percentiles.
+      elsif percentile < 60
+        1.4 # Highest here to get mean traces
+      elsif percentile < 90
+        0.7 # Between 60 & 90% is fine.
+      elsif percentile >= 90
+        1.4 # Highest here to get 90+%ile traces
+      else
+        # impossible.
+        percentile
+      end
+    end
-      total_time >= @histograms[worker].quantile(QUANTILE)
+    def age_points(age)
+      age / 60.0 * POINT_MULTIPLIER_AGE
     end
   end
 end