RubyGems - puma-plugin-telemetry_too - Versions diffs - 0.0.1.alpha1 - Mend

puma-plugin-telemetry_too 0.0.1.alpha1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +7 -0
data/.rspec +3 -0
data/.rubocop.yml +13 -0
data/.tool-versions +1 -0
data/CHANGELOG.md +115 -0
data/CODE_OF_CONDUCT.md +84 -0
data/LICENSE.txt +21 -0
data/README.md +190 -0
data/Rakefile +10 -0
data/docs/example-datadog_backlog_size.png +0 -0
data/docs/example-datadog_queue_time.png +0 -0
data/docs/examples.md +163 -0
data/lib/puma/plugin/telemetry_too/config.rb +132 -0
data/lib/puma/plugin/telemetry_too/data.rb +287 -0
data/lib/puma/plugin/telemetry_too/formatters/json_formatter.rb +18 -0
data/lib/puma/plugin/telemetry_too/formatters/logfmt_formatter.rb +16 -0
data/lib/puma/plugin/telemetry_too/formatters/passthrough_formatter.rb +16 -0
data/lib/puma/plugin/telemetry_too/targets/base_formatting_target.rb +46 -0
data/lib/puma/plugin/telemetry_too/targets/datadog_statsd_target.rb +51 -0
data/lib/puma/plugin/telemetry_too/targets/io_target.rb +27 -0
data/lib/puma/plugin/telemetry_too/targets/log_target.rb +29 -0
data/lib/puma/plugin/telemetry_too/transforms/cloud_watch_transform.rb +23 -0
data/lib/puma/plugin/telemetry_too/transforms/l2met_transform.rb +46 -0
data/lib/puma/plugin/telemetry_too/transforms/passthrough_transform.rb +16 -0
data/lib/puma/plugin/telemetry_too/version.rb +9 -0
data/lib/puma/plugin/telemetry_too.rb +118 -0
data/lib/rack/request_queue_time_middleware.rb +60 -0
metadata +97 -0

data/lib/puma/plugin/telemetry_too/config.rb ADDED Viewed

@@ -0,0 +1,132 @@
+# frozen_string_literal: true
+module Puma
+  class Plugin
+    module TelemetryToo
+      # Configuration object for plugin
+      class Config
+        DEFAULT_PUMA_TELEMETRY = [
+          # Total booted workers.
+          'workers.booted',
+          # Total number of workers configured.
+          'workers.total',
+          # Current number of threads spawned.
+          'workers.spawned_threads',
+          # Maximum number of threads that can run .
+          'workers.max_threads',
+          # Number of requests performed so far.
+          'workers.requests_count',
+          # Number of requests waiting to be processed.
+          'queue.backlog',
+          # Maximum number of requests held in Puma's Reactor which is used for
+          # asyncronously buffering request bodies. This stat is reset on every
+          # call, so it's the maximum value observed since the last call
+          'queue.backlog_max',
+          # Maximum number of requests that have been fully buffered by the
+          # Reactor and placed in a ready queue, but have not yet been picked
+          # up by a server thread. This stat is reset on every call, so it's
+          # the maximum value observed since the last stat call.
+          'queue.reactor_max',
+          # Free capacity that could be utilized, i.e. if backlog
+          # is growing, and we still have capacity available, it
+          # could mean that load balancing is not performing well.
+          'queue.capacity'
+        ].freeze
+        TARGETS = {
+          dogstatsd: TelemetryToo::Targets::DatadogStatsdTarget,
+          io: TelemetryToo::Targets::IOTarget,
+          log: TelemetryToo::Targets::LogTarget
+        }.freeze
+        # Whenever telemetry should run with puma
+        # - default: false
+        attr_accessor :enabled
+        # Number of seconds to delay first telemetry
+        # - default: 5
+        attr_accessor :initial_delay
+        # Seconds between publishing telemetry
+        # - default: 5
+        attr_accessor :frequency
+        # List of targets which are meant to publish telemetry.
+        # Target should implement `#call` method accepting
+        # a single argument - so it can be even a simple proc.
+        # - default: []
+        attr_accessor :targets
+        # Which metrics to publish from puma stats. You can select
+        # a subset from default ones that interest you the most.
+        # - default: DEFAULT_PUMA_TELEMETRY
+        attr_accessor :puma_telemetry
+        # Whenever to publish socket telemetry.
+        # - default: false
+        attr_accessor :socket_telemetry
+        # Symbol representing method to parse the `Socket::Option`, or
+        # the whole implementation as a lambda. Available options:
+        # - `:inspect`, based on the `Socket::Option#inspect` method,
+        #   it's the safest and slowest way to extract the info. `inspect`
+        #   output might not be available, i.e. on AWS Fargate
+        # - `:unpack`, parse binary data given by `Socket::Option`. Fastest
+        #   way (12x compared to `inspect`) but depends on kernel headers
+        #   and fields ordering within the struct. It should almost always
+        #   match though. DEFAULT
+        # - proc/lambda, `Socket::Option` will be given as an argument, it
+        #   should return the value of `unacked` field as an integer.
+        #
+        attr_accessor :socket_parser
+        def initialize
+          @enabled = false
+          @initial_delay = 5
+          @frequency = 5
+          @targets = []
+          @puma_telemetry = DEFAULT_PUMA_TELEMETRY
+          @socket_telemetry = false
+          @socket_parser = :unpack
+        end
+        def enabled?
+          !!@enabled
+        end
+        def socket_telemetry!
+          # These structs are platform specific, and not available on macOS,
+          # for example. If they're undefined, then we cannot capture socket
+          # telemetry. We'll warn in that case.
+          if defined?(Socket::SOL_TCP) && defined?(Socket::TCP_INFO)
+            @socket_telemetry = true
+          else
+            warn("Cannot capture socket telemetry on this platform (#{RUBY_PLATFORM}); socket_telemetry is disabled.")
+          end
+        end
+        def socket_telemetry?
+          @socket_telemetry
+        end
+        def add_target(name_or_target, **args)
+          return @targets.push(name_or_target) unless name_or_target.is_a?(Symbol)
+          target = TARGETS.fetch(name_or_target) do
+            raise TelemetryToo::Error, "Unknown Target: #{name_or_target.inspect}, #{args.inspect}"
+          end
+          @targets.push(target.new(**args))
+        end
+      end
+    end
+  end
+end

data/lib/puma/plugin/telemetry_too/data.rb ADDED Viewed

@@ -0,0 +1,287 @@
+# frozen_string_literal: true
+module Puma
+  class Plugin
+    module TelemetryToo
+      # Helper for working with Puma stats
+      module CommonData
+        TELEMETRY_TO_METHODS = {
+          'workers.booted' => :workers_booted,
+          'workers.total' => :workers_total,
+          'workers.spawned_threads' => :workers_spawned_threads,
+          'workers.max_threads' => :workers_max_threads,
+          'workers.requests_count' => :workers_requests_count,
+          'queue.backlog' => :queue_backlog,
+          'queue.backlog_max' => :queue_backlog_max,
+          'queue.reactor_max' => :queue_reactor_max,
+          'queue.capacity' => :queue_capacity
+        }.freeze
+        def initialize(stats)
+          @stats = stats
+        end
+        def workers_booted
+          @stats.fetch(:booted_workers, 1)
+        end
+        def workers_total
+          @stats.fetch(:workers, 1)
+        end
+        def metrics(selected)
+          selected.each_with_object({}) do |metric, obj|
+            next unless TELEMETRY_TO_METHODS.key?(metric)
+            obj[metric] = public_send(TELEMETRY_TO_METHODS[metric])
+          end
+        end
+      end
+      # Handles the case of non clustered mode, where `workers` isn't configured
+      class WorkerData
+        include CommonData
+        def workers_max_threads
+          @stats.fetch(:max_threads, 0)
+        end
+        def workers_requests_count
+          @stats.fetch(:requests_count, 0)
+        end
+        def workers_spawned_threads
+          @stats.fetch(:running, 0)
+        end
+        def queue_backlog
+          @stats.fetch(:backlog, 0)
+        end
+        def queue_capacity
+          @stats.fetch(:pool_capacity, 0)
+        end
+        def queue_reactor_max
+          @stats.fetch(:reactor_max, 0)
+        end
+        def queue_backlog_max
+          @stats.fetch(:backlog_max, 0)
+        end
+      end
+      # Handles the case of clustered mode, where we have statistics
+      # for all the workers. This class takes care of summing all
+      # relevant data.
+      class ClusteredData
+        include CommonData
+        def workers_max_threads
+          sum_stat(:max_threads)
+        end
+        def workers_requests_count
+          sum_stat(:requests_count)
+        end
+        def workers_spawned_threads
+          sum_stat(:running)
+        end
+        def queue_backlog
+          sum_stat(:backlog)
+        end
+        def queue_capacity
+          sum_stat(:pool_capacity)
+        end
+        def queue_reactor_max
+          sum_stat(:reactor_max)
+        end
+        def queue_backlog_max
+          sum_stat(:backlog_max)
+        end
+        private
+        def sum_stat(stat)
+          @stats[:worker_status].reduce(0) do |sum, data|
+            (data.dig(:last_status, stat) || 0) + sum
+          end
+        end
+      end
+      # Pulls TCP INFO data from socket
+      class SocketData
+        UNACKED_REGEXP = /\ unacked=(?<unacked>\d+)\ /
+        def initialize(ios, parser)
+          @sockets = ios.select { |io| io.respond_to?(:getsockopt) && io.is_a?(TCPSocket) }
+          @parser =
+            case parser
+            when :inspect then method(:parse_with_inspect)
+            when :unpack then method(:parse_with_unpack)
+            when Proc then parser
+            end
+        end
+        # Number of unacknowledged connections in the sockets, which
+        # we know as socket backlog.
+        #
+        def unacked
+          @sockets.sum do |socket|
+            @parser.call(socket.getsockopt(Socket::SOL_TCP,
+                                           Socket::TCP_INFO))
+          end
+        end
+        def metrics
+          {
+            'sockets.backlog' => unacked
+          }
+        end
+        private
+        # The Socket::Option returned by `getsockopt` doesn't provide
+        # any kind of accessors for data inside. It decodes it on demand
+        # for `inspect` as strings in C implementation. It looks like
+        #
+        #     #<Socket::Option: INET TCP INFO state=LISTEN
+        #                                     ca_state=Open
+        #                                     retransmits=0
+        #                                     probes=0
+        #                                     backoff=0
+        #                                     options=0
+        #                                     rto=0.000000s
+        #                                     ato=0.000000s
+        #                                     snd_mss=0
+        #                                     rcv_mss=0
+        #                                     unacked=0
+        #                                     sacked=5
+        #                                     lost=0
+        #                                     retrans=0
+        #                                     fackets=0
+        #                                     last_data_sent=0.000s
+        #                                     last_ack_sent=0.000s
+        #                                     last_data_recv=0.000s
+        #                                     last_ack_recv=0.000s
+        #                                     pmtu=0
+        #                                     rcv_ssthresh=0
+        #                                     rtt=0.000000s
+        #                                     rttvar=0.000000s
+        #                                     snd_ssthresh=0
+        #                                     snd_cwnd=10
+        #                                     advmss=0
+        #                                     reordering=3
+        #                                     rcv_rtt=0.000000s
+        #                                     rcv_space=0
+        #                                     total_retrans=0
+        #                                     (128 bytes too long)>
+        #
+        # That's why pulling the `unacked` field by parsing
+        # `inspect` output is one of the ways to retrieve it.
+        #
+        def parse_with_inspect(tcp_info)
+          tcp_match = tcp_info.inspect.match(UNACKED_REGEXP)
+          return 0 if tcp_match.nil?
+          tcp_match[:unacked].to_i
+        end
+        # The above inspect data might not be available everywhere (looking at you
+        # AWS Fargate Host running on kernel 4.14!), but we might still recover it
+        # by manually unpacking the binary data based on linux headers. For example
+        # below is tcp info struct from `linux/tcp.h` header file, from problematic
+        # host rocking kernel 4.14.
+        #
+        #     struct tcp_info {
+        #         __u8    tcpi_state;
+        #         __u8    tcpi_ca_state;
+        #         __u8    tcpi_retransmits;
+        #         __u8    tcpi_probes;
+        #         __u8    tcpi_backoff;
+        #         __u8    tcpi_options;
+        #         __u8    tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4;
+        #         __u8    tcpi_delivery_rate_app_limited:1;
+        #
+        #         __u32   tcpi_rto;
+        #         __u32   tcpi_ato;
+        #         __u32   tcpi_snd_mss;
+        #         __u32   tcpi_rcv_mss;
+        #
+        #         __u32   tcpi_unacked;
+        #         __u32   tcpi_sacked;
+        #         __u32   tcpi_lost;
+        #         __u32   tcpi_retrans;
+        #         __u32   tcpi_fackets;
+        #
+        #         /* Times. */
+        #         __u32   tcpi_last_data_sent;
+        #         __u32   tcpi_last_ack_sent;     /* Not remembered, sorry. */
+        #         __u32   tcpi_last_data_recv;
+        #         __u32   tcpi_last_ack_recv;
+        #
+        #         /* Metrics. */
+        #         __u32   tcpi_pmtu;
+        #         __u32   tcpi_rcv_ssthresh;
+        #         __u32   tcpi_rtt;
+        #         __u32   tcpi_rttvar;
+        #         __u32   tcpi_snd_ssthresh;
+        #         __u32   tcpi_snd_cwnd;
+        #         __u32   tcpi_advmss;
+        #         __u32   tcpi_reordering;
+        #
+        #         __u32   tcpi_rcv_rtt;
+        #         __u32   tcpi_rcv_space;
+        #
+        #         __u32   tcpi_total_retrans;
+        #
+        #         __u64   tcpi_pacing_rate;
+        #         __u64   tcpi_max_pacing_rate;
+        #         __u64   tcpi_bytes_acked;    /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
+        #         __u64   tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */
+        #         __u32   tcpi_segs_out;       /* RFC4898 tcpEStatsPerfSegsOut */
+        #         __u32   tcpi_segs_in;        /* RFC4898 tcpEStatsPerfSegsIn */
+        #
+        #         __u32   tcpi_notsent_bytes;
+        #         __u32   tcpi_min_rtt;
+        #         __u32   tcpi_data_segs_in;      /* RFC4898 tcpEStatsDataSegsIn */
+        #         __u32   tcpi_data_segs_out;     /* RFC4898 tcpEStatsDataSegsOut */
+        #
+        #         __u64   tcpi_delivery_rate;
+        #
+        #         __u64   tcpi_busy_time;      /* Time (usec) busy sending data */
+        #         __u64   tcpi_rwnd_limited;   /* Time (usec) limited by receive window */
+        #         __u64   tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */
+        #     };
+        #
+        # Now nowing types and order of fields we can easily parse binary data
+        # by using
+        # - `C` flag for `__u8` type - 8-bit unsigned (unsigned char)
+        # - `L` flag for `__u32` type - 32-bit unsigned, native endian (uint32_t)
+        # - `Q` flag for `__u64` type - 64-bit unsigned, native endian (uint64_t)
+        #
+        # Complete `unpack` would look like `C8 L24 Q4 L6 Q4`, but we are only
+        # interested in `unacked` field at the moment, that's why we only parse
+        # till this field by unpacking with `C8 L5`.
+        #
+        # If you find that it's not giving correct results, then please fall back
+        # to inspect, or update this code to accept unpack sequence. But in the
+        # end unpack is preferable, as it's 12x faster than inspect.
+        #
+        # Tested against:
+        # - Amazon Linux 2 with kernel 4.14 & 5.10
+        # - Ubuntu 20.04 with kernel 5.13
+        #
+        def parse_with_unpack(tcp_info)
+          tcp_info.unpack('C8L5').last
+        end
+      end
+    end
+  end
+end

data/lib/puma/plugin/telemetry_too/formatters/json_formatter.rb ADDED Viewed

@@ -0,0 +1,18 @@
+# frozen_string_literal: true
+require 'json'
+module Puma
+  class Plugin
+    module TelemetryToo
+      module Formatters
+        # JSON formatter, expects `call` method accepting telemetry hash
+        class JSONFormatter
+          def self.call(telemetry)
+            ::JSON.dump(telemetry)
+          end
+        end
+      end
+    end
+  end
+end

data/lib/puma/plugin/telemetry_too/formatters/logfmt_formatter.rb ADDED Viewed

@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+module Puma
+  class Plugin
+    module TelemetryToo
+      module Formatters
+        # Logfmt formatter, expects `call` method accepting telemetry hash
+        class LogfmtFormatter
+          def self.call(telemetry)
+            telemetry.map { |k, v| "#{String(k)}=#{v.inspect}" }.join(' ')
+          end
+        end
+      end
+    end
+  end
+end

data/lib/puma/plugin/telemetry_too/formatters/passthrough_formatter.rb ADDED Viewed

@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+module Puma
+  class Plugin
+    module TelemetryToo
+      module Formatters
+        # A pass-through formatter - it returns the telemetry Hash it was given
+        class PassthroughFormatter
+          def self.call(telemetry)
+            telemetry
+          end
+        end
+      end
+    end
+  end
+end

data/lib/puma/plugin/telemetry_too/targets/base_formatting_target.rb ADDED Viewed

@@ -0,0 +1,46 @@
+# frozen_string_literal: true
+require_relative '../formatters/json_formatter'
+require_relative '../formatters/logfmt_formatter'
+require_relative '../formatters/passthrough_formatter'
+require_relative '../transforms/cloud_watch_transform'
+require_relative '../transforms/l2met_transform'
+require_relative '../transforms/passthrough_transform'
+module Puma
+  class Plugin
+    module TelemetryToo
+      module Targets
+        # A base class for other Targets concerned with formatting telemetry
+        class BaseFormattingTarget
+          def initialize(formatter: :json, transform: :cloud_watch)
+            @formatter = FORMATTERS.fetch(formatter) { formatter }
+            @transform = TRANSFORMS.fetch(transform) { transform }
+          end
+          def call(_telemetry)
+            raise "#{__method__} must be implemented by #{self.class.name}"
+          end
+          private
+          attr_reader :formatter, :transform
+          FORMATTERS = {
+            json: Formatters::JSONFormatter,
+            logfmt: Formatters::LogfmtFormatter,
+            passthrough: Formatters::PassthroughFormatter
+          }.freeze
+          private_constant :FORMATTERS
+          TRANSFORMS = {
+            cloud_watch: Transforms::CloudWatchTranform,
+            l2met: Transforms::L2metTransform,
+            passthrough: Transforms::PassthroughTransform
+          }.freeze
+          private_constant :TRANSFORMS
+        end
+      end
+    end
+  end
+end

data/lib/puma/plugin/telemetry_too/targets/datadog_statsd_target.rb ADDED Viewed

@@ -0,0 +1,51 @@
+# frozen_string_literal: true
+module Puma
+  class Plugin
+    module TelemetryToo
+      module Targets
+        # Target wrapping Datadog Statsd client. You can configure
+        # all details like _metrics prefix_ and _tags_ in the client
+        # itself.
+        #
+        # ## Example
+        #
+        #     require "datadog/statsd"
+        #
+        #     client = Datadog::Statsd.new(namespace: "ruby.puma",
+        #                                  tags: {
+        #                                    service: "my-webapp",
+        #                                    env: ENV["RAILS_ENV"],
+        #                                    version: ENV["CODE_VERSION"]
+        #                                  })
+        #
+        #     DatadogStatsdTarget.new(client: client)
+        #
+        class DatadogStatsdTarget
+          def initialize(client:)
+            @client = client
+          end
+          # We are using `gauge` metric type, which means that only the last
+          # value will get send to datadog. DD Statsd client is using extra
+          # thread since v5 for aggregating metrics before it sends them.
+          #
+          # This means that we could publish metrics from here several times
+          # before they get flushed from the aggregation thread, and when they
+          # do, only the last values will get sent.
+          #
+          # That's why we are explicitly calling flush here, in order to persist
+          # all metrics, and not only the most recent ones.
+          #
+          def call(telemetry)
+            telemetry.each do |metric, value|
+              @client.gauge(metric, value)
+            end
+            @client.flush(sync: true)
+          end
+        end
+      end
+    end
+  end
+end

data/lib/puma/plugin/telemetry_too/targets/io_target.rb ADDED Viewed

@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+require_relative 'base_formatting_target'
+module Puma
+  class Plugin
+    module TelemetryToo
+      module Targets
+        # Simple IO Target, publishing metrics to STDOUT or logs
+        class IOTarget < BaseFormattingTarget
+          def initialize(io: $stdout, formatter: :json, transform: :cloud_watch)
+            super(formatter: formatter, transform: transform)
+            @io = io
+          end
+          def call(telemetry)
+            io.puts(formatter.call(transform.call(telemetry)))
+          end
+          private
+          attr_reader :io
+        end
+      end
+    end
+  end
+end

data/lib/puma/plugin/telemetry_too/targets/log_target.rb ADDED Viewed

@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+require 'logger'
+require_relative 'base_formatting_target'
+module Puma
+  class Plugin
+    module TelemetryToo
+      module Targets
+        # Simple Log Target, publishing metrics to a Ruby ::Logger at stdout
+        # at the INFO log level
+        class LogTarget < BaseFormattingTarget
+          def initialize(logger: ::Logger.new($stdout), formatter: :logfmt, transform: :passthrough)
+            super(formatter: formatter, transform: transform)
+            @logger = logger
+          end
+          def call(telemetry)
+            logger.info(formatter.call(transform.call(telemetry)))
+          end
+          private
+          attr_reader :logger
+        end
+      end
+    end
+  end
+end

data/lib/puma/plugin/telemetry_too/transforms/cloud_watch_transform.rb ADDED Viewed

@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+require 'json'
+module Puma
+  class Plugin
+    module TelemetryToo
+      module Transforms
+        # Replace dots with dashes for better support of AWS CloudWatch Log
+        # Metric filters, as they don't support dots in key names.
+        # Expects `call` method accepting telemetry Hash
+        class CloudWatchTranform
+          def self.call(telemetry)
+            telemetry.transform_keys { |k| String(k).tr('.', '-') }.tap do |data|
+              data['name'] = 'Puma::Plugin::TelemetryToo'
+              data['message'] = 'Publish telemetry'
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/puma/plugin/telemetry_too/transforms/l2met_transform.rb ADDED Viewed

@@ -0,0 +1,46 @@
+# frozen_string_literal: true
+require 'English'
+require 'pathname'
+module Puma
+  class Plugin
+    module TelemetryToo
+      module Transforms
+        # L2Met (Logs to Metrics) transform that makes all keys a `sample#` in the L2Met format.
+        class L2metTransform
+          def self.call(telemetry)
+            new.call(telemetry)
+          end
+          def initialize(host_env: ENV, program_name: $PROGRAM_NAME, socket: Socket)
+            @host_env = host_env
+            @program_name = program_name
+            @socket = socket
+          end
+          def call(telemetry)
+            telemetry.transform_keys { |k| "sample##{k}" }.tap do |data|
+              data['name'] ||= 'Puma::Plugin::TelemetryToo'
+              data['source'] ||= source
+            end
+          end
+          private
+          attr_reader :host_env, :program_name, :socket
+          def source
+            @source ||= host_env['L2MET_SOURCE'] ||
+                        host_env['DYNO'] || # For Heroku
+                        host_with_exe_name # Last-ditch effort
+          end
+          def host_with_exe_name
+            "#{socket.gethostname}/#{Pathname(program_name).basename}"
+          end
+        end
+      end
+    end
+  end
+end