puma-plugin-telemetry 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/CODEOWNERS +5 -0
- data/.github/workflows/build.yml +46 -0
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/.rubocop.yml +10 -0
- data/.tool-versions +1 -0
- data/CHANGELOG.md +76 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +67 -0
- data/LICENSE.txt +21 -0
- data/README.md +136 -0
- data/Rakefile +10 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/docs/example-datadog_backlog_size.png +0 -0
- data/docs/example-datadog_queue_time.png +0 -0
- data/docs/examples.md +163 -0
- data/lib/puma/plugin/telemetry/config.rb +113 -0
- data/lib/puma/plugin/telemetry/data.rb +269 -0
- data/lib/puma/plugin/telemetry/targets/datadog_statsd_target.rb +51 -0
- data/lib/puma/plugin/telemetry/targets/io_target.rb +42 -0
- data/lib/puma/plugin/telemetry/version.rb +9 -0
- data/lib/puma/plugin/telemetry.rb +106 -0
- data/lib/rack/request_queue_time_middleware.rb +57 -0
- data/puma-plugin-telemetry.gemspec +34 -0
- metadata +88 -0
data/docs/examples.md
ADDED
@@ -0,0 +1,163 @@
|
|
1
|
+
## Keeping track of waiting requests
|
2
|
+
|
3
|
+
There are requests waiting in 2 places:
|
4
|
+
- socket
|
5
|
+
- queue
|
6
|
+
|
7
|
+
Their sum is a total number of accepted requests waiting.
|
8
|
+
|
9
|
+
Puma configuration
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
plugin :telemetry
|
13
|
+
|
14
|
+
Puma::Plugin::Telemetry.configure do |config|
|
15
|
+
config.enabled = true
|
16
|
+
config.initial_delay = 10
|
17
|
+
|
18
|
+
config.puma_telemetry = %w[queue.backlog]
|
19
|
+
|
20
|
+
config.socket_telemetry!
|
21
|
+
|
22
|
+
config.add_target :dogstatsd, client: Datadog::Statsd.new(tags: %w[your tags], namespace: "ruby.puma")
|
23
|
+
end
|
24
|
+
```
|
25
|
+
|
26
|
+
Example Datadog widget and it's configuration. Depending on what you prefer to see, you might replace `rollup(max)` with `rollup(sum)` whenever you want to see maximum value or sum across the aggregated time frame.
|
27
|
+
|
28
|
+
| :point_up: | Remember to update tags after initial setup! |
|
29
|
+
|---------------|:---------------------------------------------|
|
30
|
+
|
31
|
+
![Datadog Widget, barchart showcasing sockets & queue backlog sizes stacked up](example-datadog_backlog_size.png "Datadog Widget")
|
32
|
+
|
33
|
+
```json
|
34
|
+
{
|
35
|
+
"viz": "timeseries",
|
36
|
+
"requests": [
|
37
|
+
{
|
38
|
+
"style": {
|
39
|
+
"palette": "dog_classic",
|
40
|
+
"type": "solid",
|
41
|
+
"width": "normal"
|
42
|
+
},
|
43
|
+
"type": "bars",
|
44
|
+
"formulas": [
|
45
|
+
{
|
46
|
+
"alias": "queue",
|
47
|
+
"formula": "query1"
|
48
|
+
},
|
49
|
+
{
|
50
|
+
"alias": "socket",
|
51
|
+
"formula": "query2"
|
52
|
+
}
|
53
|
+
],
|
54
|
+
"response_format": "timeseries",
|
55
|
+
"on_right_yaxis": false,
|
56
|
+
"queries": [
|
57
|
+
{
|
58
|
+
"query": "max:ruby.puma.queue.backlog{}.rollup(max)",
|
59
|
+
"data_source": "metrics",
|
60
|
+
"name": "query1"
|
61
|
+
},
|
62
|
+
{
|
63
|
+
"query": "max:ruby.puma.sockets.backlog{}.rollup(max)",
|
64
|
+
"data_source": "metrics",
|
65
|
+
"name": "query2"
|
66
|
+
}
|
67
|
+
]
|
68
|
+
}
|
69
|
+
],
|
70
|
+
"yaxis": {
|
71
|
+
"include_zero": true,
|
72
|
+
"max": "auto",
|
73
|
+
"scale": "linear",
|
74
|
+
"min": "auto",
|
75
|
+
"label": ""
|
76
|
+
},
|
77
|
+
"markers": []
|
78
|
+
}
|
79
|
+
```
|
80
|
+
|
81
|
+
## Keeping track of request queue time
|
82
|
+
|
83
|
+
The time request spent waiting to be processed, between it's accepted by Load Balancer till it starts going through Rack Middleware in your application. Holy grail of autoscaling.
|
84
|
+
|
85
|
+
Example configuration of middleware, i.e. in case of Rails it could be placed under `config/initializers/request_queue_time.rb`
|
86
|
+
|
87
|
+
```ruby
|
88
|
+
Rails.application.config.middleware.insert_after(
|
89
|
+
0,
|
90
|
+
RequestQueueTimeMiddleware,
|
91
|
+
statsd: Datadog::Statsd.new(namespace: "ruby.puma", tags: %w[your tags])
|
92
|
+
)
|
93
|
+
```
|
94
|
+
|
95
|
+
If you are utilizing tags in your logs, you might also want to add this measurement as follows:
|
96
|
+
|
97
|
+
```ruby
|
98
|
+
Rails.application.config.log_tags ||= {}
|
99
|
+
Rails.application.config.log_tags[:queue_time] = ->(req) { req.env[::RequestQueueTimeMiddleware::ENV_KEY] }
|
100
|
+
```
|
101
|
+
|
102
|
+
Example Datadog widget with configuration.
|
103
|
+
|
104
|
+
| :point_up: | Remember to update tags after initial setup! |
|
105
|
+
|---------------|:---------------------------------------------|
|
106
|
+
|
107
|
+
![Datadog Widget, barchart showcasing sockets & queue backlog sizes stacked up](example-datadog_queue_time.png "Datadog Widget")
|
108
|
+
|
109
|
+
```json
|
110
|
+
{
|
111
|
+
"viz": "timeseries",
|
112
|
+
"requests": [
|
113
|
+
{
|
114
|
+
"style": {
|
115
|
+
"palette": "dog_classic",
|
116
|
+
"type": "solid",
|
117
|
+
"width": "normal"
|
118
|
+
},
|
119
|
+
"type": "line",
|
120
|
+
"response_format": "timeseries",
|
121
|
+
"queries": [
|
122
|
+
{
|
123
|
+
"query": "max:ruby.puma.queue.time.max{}",
|
124
|
+
"data_source": "metrics",
|
125
|
+
"name": "query1"
|
126
|
+
},
|
127
|
+
{
|
128
|
+
"query": "max:ruby.puma.queue.time.95percentile{}",
|
129
|
+
"data_source": "metrics",
|
130
|
+
"name": "query2"
|
131
|
+
},
|
132
|
+
{
|
133
|
+
"query": "max:ruby.puma.queue.time.median{}",
|
134
|
+
"data_source": "metrics",
|
135
|
+
"name": "query3"
|
136
|
+
}
|
137
|
+
],
|
138
|
+
"formulas": [
|
139
|
+
{
|
140
|
+
"alias": "max",
|
141
|
+
"formula": "query1"
|
142
|
+
},
|
143
|
+
{
|
144
|
+
"alias": "p95",
|
145
|
+
"formula": "query2"
|
146
|
+
},
|
147
|
+
{
|
148
|
+
"alias": "median",
|
149
|
+
"formula": "query3"
|
150
|
+
}
|
151
|
+
]
|
152
|
+
}
|
153
|
+
],
|
154
|
+
"yaxis": {
|
155
|
+
"include_zero": true,
|
156
|
+
"max": "auto",
|
157
|
+
"scale": "linear",
|
158
|
+
"min": "auto",
|
159
|
+
"label": ""
|
160
|
+
},
|
161
|
+
"markers": []
|
162
|
+
}
|
163
|
+
```
|
@@ -0,0 +1,113 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Puma
|
4
|
+
class Plugin
|
5
|
+
module Telemetry
|
6
|
+
# Configuration object for plugin
|
7
|
+
class Config
|
8
|
+
DEFAULT_PUMA_TELEMETRY = [
|
9
|
+
# Total booted workers.
|
10
|
+
'workers.booted',
|
11
|
+
|
12
|
+
# Total number of workers configured.
|
13
|
+
'workers.total',
|
14
|
+
|
15
|
+
# Current number of threads spawned.
|
16
|
+
'workers.spawned_threads',
|
17
|
+
|
18
|
+
# Maximum number of threads that can run .
|
19
|
+
'workers.max_threads',
|
20
|
+
|
21
|
+
# Number of requests performed so far.
|
22
|
+
'workers.requests_count',
|
23
|
+
|
24
|
+
# Number of requests waiting to be processed.
|
25
|
+
'queue.backlog',
|
26
|
+
|
27
|
+
# Free capacity that could be utilized, i.e. if backlog
|
28
|
+
# is growing, and we still have capacity available, it
|
29
|
+
# could mean that load balancing is not performing well.
|
30
|
+
'queue.capacity'
|
31
|
+
].freeze
|
32
|
+
|
33
|
+
TARGETS = {
|
34
|
+
dogstatsd: Telemetry::Targets::DatadogStatsdTarget,
|
35
|
+
io: Telemetry::Targets::IOTarget
|
36
|
+
}.freeze
|
37
|
+
|
38
|
+
# Whenever telemetry should run with puma
|
39
|
+
# - default: false
|
40
|
+
attr_accessor :enabled
|
41
|
+
|
42
|
+
# Number of seconds to delay first telemetry
|
43
|
+
# - default: 5
|
44
|
+
attr_accessor :initial_delay
|
45
|
+
|
46
|
+
# Seconds between publishing telemetry
|
47
|
+
# - default: 5
|
48
|
+
attr_accessor :frequency
|
49
|
+
|
50
|
+
# List of targets which are meant to publish telemetry.
|
51
|
+
# Target should implement `#call` method accepting
|
52
|
+
# a single argument - so it can be even a simple proc.
|
53
|
+
# - default: []
|
54
|
+
attr_accessor :targets
|
55
|
+
|
56
|
+
# Which metrics to publish from puma stats. You can select
|
57
|
+
# a subset from default ones that interest you the most.
|
58
|
+
# - default: DEFAULT_PUMA_TELEMETRY
|
59
|
+
attr_accessor :puma_telemetry
|
60
|
+
|
61
|
+
# Whenever to publish socket telemetry.
|
62
|
+
# - default: false
|
63
|
+
attr_accessor :socket_telemetry
|
64
|
+
|
65
|
+
# Symbol representing method to parse the `Socket::Option`, or
|
66
|
+
# the whole implementation as a lambda. Available options:
|
67
|
+
# - `:inspect`, based on the `Socket::Option#inspect` method,
|
68
|
+
# it's the safest and slowest way to extract the info. `inspect`
|
69
|
+
# output might not be available, i.e. on AWS Fargate
|
70
|
+
# - `:unpack`, parse binary data given by `Socket::Option`. Fastest
|
71
|
+
# way (12x compared to `inspect`) but depends on kernel headers
|
72
|
+
# and fields ordering within the struct. It should almost always
|
73
|
+
# match though. DEFAULT
|
74
|
+
# - proc/lambda, `Socket::Option` will be given as an argument, it
|
75
|
+
# should return the value of `unacked` field as an integer.
|
76
|
+
#
|
77
|
+
attr_accessor :socket_parser
|
78
|
+
|
79
|
+
def initialize
|
80
|
+
@enabled = false
|
81
|
+
@initial_delay = 5
|
82
|
+
@frequency = 5
|
83
|
+
@targets = []
|
84
|
+
@puma_telemetry = DEFAULT_PUMA_TELEMETRY
|
85
|
+
@socket_telemetry = false
|
86
|
+
@socket_parser = :unpack
|
87
|
+
end
|
88
|
+
|
89
|
+
def enabled?
|
90
|
+
!!@enabled
|
91
|
+
end
|
92
|
+
|
93
|
+
def socket_telemetry!
|
94
|
+
@socket_telemetry = true
|
95
|
+
end
|
96
|
+
|
97
|
+
def socket_telemetry?
|
98
|
+
@socket_telemetry
|
99
|
+
end
|
100
|
+
|
101
|
+
def add_target(name_or_target, **args)
|
102
|
+
return @targets.push(name_or_target) unless name_or_target.is_a?(Symbol)
|
103
|
+
|
104
|
+
target = TARGETS.fetch(name_or_target) do
|
105
|
+
raise Telemetry::Error, "Unknown Target: #{name_or_target.inspect}, #{args.inspect}"
|
106
|
+
end
|
107
|
+
|
108
|
+
@targets.push(target.new(**args))
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,269 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Puma
|
4
|
+
class Plugin
|
5
|
+
module Telemetry
|
6
|
+
# Helper for working with Puma stats
|
7
|
+
module CommonData
|
8
|
+
TELEMETRY_TO_METHODS = {
|
9
|
+
'workers.booted' => :workers_booted,
|
10
|
+
'workers.total' => :workers_total,
|
11
|
+
'workers.spawned_threads' => :workers_spawned_threads,
|
12
|
+
'workers.max_threads' => :workers_max_threads,
|
13
|
+
'workers.requests_count' => :workers_requests_count,
|
14
|
+
'queue.backlog' => :queue_backlog,
|
15
|
+
'queue.capacity' => :queue_capacity
|
16
|
+
}.freeze
|
17
|
+
|
18
|
+
def initialize(stats)
|
19
|
+
@stats = stats
|
20
|
+
end
|
21
|
+
|
22
|
+
def workers_booted
|
23
|
+
@stats.fetch(:booted_workers, 1)
|
24
|
+
end
|
25
|
+
|
26
|
+
def workers_total
|
27
|
+
@stats.fetch(:workers, 1)
|
28
|
+
end
|
29
|
+
|
30
|
+
def metrics(selected)
|
31
|
+
selected.each_with_object({}) do |metric, obj|
|
32
|
+
next unless TELEMETRY_TO_METHODS.key?(metric)
|
33
|
+
|
34
|
+
obj[metric] = public_send(TELEMETRY_TO_METHODS[metric])
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Handles the case of non clustered mode, where `workers` isn't configured
|
40
|
+
class WorkerData
|
41
|
+
include CommonData
|
42
|
+
|
43
|
+
def workers_max_threads
|
44
|
+
@stats.fetch(:max_threads, 0)
|
45
|
+
end
|
46
|
+
|
47
|
+
def workers_requests_count
|
48
|
+
@stats.fetch(:requests_count, 0)
|
49
|
+
end
|
50
|
+
|
51
|
+
def workers_spawned_threads
|
52
|
+
@stats.fetch(:running, 0)
|
53
|
+
end
|
54
|
+
|
55
|
+
def queue_backlog
|
56
|
+
@stats.fetch(:backlog, 0)
|
57
|
+
end
|
58
|
+
|
59
|
+
def queue_capacity
|
60
|
+
@stats.fetch(:pool_capacity, 0)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Handles the case of clustered mode, where we have statistics
|
65
|
+
# for all the workers. This class takes care of summing all
|
66
|
+
# relevant data.
|
67
|
+
class ClusteredData
|
68
|
+
include CommonData
|
69
|
+
|
70
|
+
def workers_max_threads
|
71
|
+
sum_stat(:max_threads)
|
72
|
+
end
|
73
|
+
|
74
|
+
def workers_requests_count
|
75
|
+
sum_stat(:requests_count)
|
76
|
+
end
|
77
|
+
|
78
|
+
def workers_spawned_threads
|
79
|
+
sum_stat(:running)
|
80
|
+
end
|
81
|
+
|
82
|
+
def queue_backlog
|
83
|
+
sum_stat(:backlog)
|
84
|
+
end
|
85
|
+
|
86
|
+
def queue_capacity
|
87
|
+
sum_stat(:pool_capacity)
|
88
|
+
end
|
89
|
+
|
90
|
+
private
|
91
|
+
|
92
|
+
def sum_stat(stat)
|
93
|
+
@stats[:worker_status].reduce(0) do |sum, data|
|
94
|
+
(data.dig(:last_status, stat) || 0) + sum
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Pulls TCP INFO data from socket
|
100
|
+
class SocketData
|
101
|
+
UNACKED_REGEXP = /\ unacked=(?<unacked>\d+)\ /.freeze
|
102
|
+
|
103
|
+
def initialize(ios, parser)
|
104
|
+
@sockets = ios.select { |io| io.respond_to?(:getsockopt) && io.is_a?(TCPSocket) }
|
105
|
+
@parser =
|
106
|
+
case parser
|
107
|
+
when :inspect then method(:parse_with_inspect)
|
108
|
+
when :unpack then method(:parse_with_unpack)
|
109
|
+
when Proc then parser
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# Number of unacknowledged connections in the sockets, which
|
114
|
+
# we know as socket backlog.
|
115
|
+
#
|
116
|
+
def unacked
|
117
|
+
@sockets.sum do |socket|
|
118
|
+
@parser.call(socket.getsockopt(Socket::SOL_TCP,
|
119
|
+
Socket::TCP_INFO))
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def metrics
|
124
|
+
{
|
125
|
+
'sockets.backlog' => unacked
|
126
|
+
}
|
127
|
+
end
|
128
|
+
|
129
|
+
private
|
130
|
+
|
131
|
+
# The Socket::Option returned by `getsockopt` doesn't provide
|
132
|
+
# any kind of accessors for data inside. It decodes it on demand
|
133
|
+
# for `inspect` as strings in C implementation. It looks like
|
134
|
+
#
|
135
|
+
# #<Socket::Option: INET TCP INFO state=LISTEN
|
136
|
+
# ca_state=Open
|
137
|
+
# retransmits=0
|
138
|
+
# probes=0
|
139
|
+
# backoff=0
|
140
|
+
# options=0
|
141
|
+
# rto=0.000000s
|
142
|
+
# ato=0.000000s
|
143
|
+
# snd_mss=0
|
144
|
+
# rcv_mss=0
|
145
|
+
# unacked=0
|
146
|
+
# sacked=5
|
147
|
+
# lost=0
|
148
|
+
# retrans=0
|
149
|
+
# fackets=0
|
150
|
+
# last_data_sent=0.000s
|
151
|
+
# last_ack_sent=0.000s
|
152
|
+
# last_data_recv=0.000s
|
153
|
+
# last_ack_recv=0.000s
|
154
|
+
# pmtu=0
|
155
|
+
# rcv_ssthresh=0
|
156
|
+
# rtt=0.000000s
|
157
|
+
# rttvar=0.000000s
|
158
|
+
# snd_ssthresh=0
|
159
|
+
# snd_cwnd=10
|
160
|
+
# advmss=0
|
161
|
+
# reordering=3
|
162
|
+
# rcv_rtt=0.000000s
|
163
|
+
# rcv_space=0
|
164
|
+
# total_retrans=0
|
165
|
+
# (128 bytes too long)>
|
166
|
+
#
|
167
|
+
# That's why pulling the `unacked` field by parsing
|
168
|
+
# `inspect` output is one of the ways to retrieve it.
|
169
|
+
#
|
170
|
+
def parse_with_inspect(tcp_info)
|
171
|
+
tcp_match = tcp_info.inspect.match(UNACKED_REGEXP)
|
172
|
+
|
173
|
+
return 0 if tcp_match.nil?
|
174
|
+
|
175
|
+
tcp_match[:unacked].to_i
|
176
|
+
end
|
177
|
+
|
178
|
+
# The above inspect data might not be available everywhere (looking at you
|
179
|
+
# AWS Fargate Host running on kernel 4.14!), but we might still recover it
|
180
|
+
# by manually unpacking the binary data based on linux headers. For example
|
181
|
+
# below is tcp info struct from `linux/tcp.h` header file, from problematic
|
182
|
+
# host rocking kernel 4.14.
|
183
|
+
#
|
184
|
+
# struct tcp_info {
|
185
|
+
# __u8 tcpi_state;
|
186
|
+
# __u8 tcpi_ca_state;
|
187
|
+
# __u8 tcpi_retransmits;
|
188
|
+
# __u8 tcpi_probes;
|
189
|
+
# __u8 tcpi_backoff;
|
190
|
+
# __u8 tcpi_options;
|
191
|
+
# __u8 tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4;
|
192
|
+
# __u8 tcpi_delivery_rate_app_limited:1;
|
193
|
+
#
|
194
|
+
# __u32 tcpi_rto;
|
195
|
+
# __u32 tcpi_ato;
|
196
|
+
# __u32 tcpi_snd_mss;
|
197
|
+
# __u32 tcpi_rcv_mss;
|
198
|
+
#
|
199
|
+
# __u32 tcpi_unacked;
|
200
|
+
# __u32 tcpi_sacked;
|
201
|
+
# __u32 tcpi_lost;
|
202
|
+
# __u32 tcpi_retrans;
|
203
|
+
# __u32 tcpi_fackets;
|
204
|
+
#
|
205
|
+
# /* Times. */
|
206
|
+
# __u32 tcpi_last_data_sent;
|
207
|
+
# __u32 tcpi_last_ack_sent; /* Not remembered, sorry. */
|
208
|
+
# __u32 tcpi_last_data_recv;
|
209
|
+
# __u32 tcpi_last_ack_recv;
|
210
|
+
#
|
211
|
+
# /* Metrics. */
|
212
|
+
# __u32 tcpi_pmtu;
|
213
|
+
# __u32 tcpi_rcv_ssthresh;
|
214
|
+
# __u32 tcpi_rtt;
|
215
|
+
# __u32 tcpi_rttvar;
|
216
|
+
# __u32 tcpi_snd_ssthresh;
|
217
|
+
# __u32 tcpi_snd_cwnd;
|
218
|
+
# __u32 tcpi_advmss;
|
219
|
+
# __u32 tcpi_reordering;
|
220
|
+
#
|
221
|
+
# __u32 tcpi_rcv_rtt;
|
222
|
+
# __u32 tcpi_rcv_space;
|
223
|
+
#
|
224
|
+
# __u32 tcpi_total_retrans;
|
225
|
+
#
|
226
|
+
# __u64 tcpi_pacing_rate;
|
227
|
+
# __u64 tcpi_max_pacing_rate;
|
228
|
+
# __u64 tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
|
229
|
+
# __u64 tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */
|
230
|
+
# __u32 tcpi_segs_out; /* RFC4898 tcpEStatsPerfSegsOut */
|
231
|
+
# __u32 tcpi_segs_in; /* RFC4898 tcpEStatsPerfSegsIn */
|
232
|
+
#
|
233
|
+
# __u32 tcpi_notsent_bytes;
|
234
|
+
# __u32 tcpi_min_rtt;
|
235
|
+
# __u32 tcpi_data_segs_in; /* RFC4898 tcpEStatsDataSegsIn */
|
236
|
+
# __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */
|
237
|
+
#
|
238
|
+
# __u64 tcpi_delivery_rate;
|
239
|
+
#
|
240
|
+
# __u64 tcpi_busy_time; /* Time (usec) busy sending data */
|
241
|
+
# __u64 tcpi_rwnd_limited; /* Time (usec) limited by receive window */
|
242
|
+
# __u64 tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */
|
243
|
+
# };
|
244
|
+
#
|
245
|
+
# Now nowing types and order of fields we can easily parse binary data
|
246
|
+
# by using
|
247
|
+
# - `C` flag for `__u8` type - 8-bit unsigned (unsigned char)
|
248
|
+
# - `L` flag for `__u32` type - 32-bit unsigned, native endian (uint32_t)
|
249
|
+
# - `Q` flag for `__u64` type - 64-bit unsigned, native endian (uint64_t)
|
250
|
+
#
|
251
|
+
# Complete `unpack` would look like `C8 L24 Q4 L6 Q4`, but we are only
|
252
|
+
# interested in `unacked` field at the moment, that's why we only parse
|
253
|
+
# till this field by unpacking with `C8 L5`.
|
254
|
+
#
|
255
|
+
# If you find that it's not giving correct results, then please fall back
|
256
|
+
# to inspect, or update this code to accept unpack sequence. But in the
|
257
|
+
# end unpack is preferable, as it's 12x faster than inspect.
|
258
|
+
#
|
259
|
+
# Tested against:
|
260
|
+
# - Amazon Linux 2 with kernel 4.14 & 5.10
|
261
|
+
# - Ubuntu 20.04 with kernel 5.13
|
262
|
+
#
|
263
|
+
def parse_with_unpack(tcp_info)
|
264
|
+
tcp_info.unpack('C8L5').last
|
265
|
+
end
|
266
|
+
end
|
267
|
+
end
|
268
|
+
end
|
269
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Puma
|
4
|
+
class Plugin
|
5
|
+
module Telemetry
|
6
|
+
module Targets
|
7
|
+
# Target wrapping Datadog Statsd client. You can configure
|
8
|
+
# all details like _metrics prefix_ and _tags_ in the client
|
9
|
+
# itself.
|
10
|
+
#
|
11
|
+
# ## Example
|
12
|
+
#
|
13
|
+
# require "datadog/statsd"
|
14
|
+
#
|
15
|
+
# client = Datadog::Statsd.new(namespace: "ruby.puma",
|
16
|
+
# tags: {
|
17
|
+
# service: "my-webapp",
|
18
|
+
# env: ENV["RAILS_ENV"],
|
19
|
+
# version: ENV["CODE_VERSION"]
|
20
|
+
# })
|
21
|
+
#
|
22
|
+
# DatadogStatsdTarget.new(client: client)
|
23
|
+
#
|
24
|
+
class DatadogStatsdTarget
|
25
|
+
def initialize(client:)
|
26
|
+
@client = client
|
27
|
+
end
|
28
|
+
|
29
|
+
# We are using `gauge` metric type, which means that only the last
|
30
|
+
# value will get send to datadog. DD Statsd client is using extra
|
31
|
+
# thread since v5 for aggregating metrics before it sends them.
|
32
|
+
#
|
33
|
+
# This means that we could publish metrics from here several times
|
34
|
+
# before they get flushed from the aggregation thread, and when they
|
35
|
+
# do, only the last values will get sent.
|
36
|
+
#
|
37
|
+
# That's why we are explicitly calling flush here, in order to persist
|
38
|
+
# all metrics, and not only the most recent ones.
|
39
|
+
#
|
40
|
+
def call(telemetry)
|
41
|
+
telemetry.each do |metric, value|
|
42
|
+
@client.gauge(metric, value)
|
43
|
+
end
|
44
|
+
|
45
|
+
@client.flush(sync: true)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
module Puma
|
6
|
+
class Plugin
|
7
|
+
module Telemetry
|
8
|
+
module Targets
|
9
|
+
# Simple IO Target, publishing metrics to STDOUT or logs
|
10
|
+
#
|
11
|
+
class IOTarget
|
12
|
+
# JSON formatter for IO, expects `call` method accepting telemetry hash
|
13
|
+
#
|
14
|
+
class JSONFormatter
|
15
|
+
# NOTE: Replace dots with dashes for better support of AWS CloudWatch
|
16
|
+
# Log Metric filters, as they don't support dots in key names.
|
17
|
+
def self.call(telemetry)
|
18
|
+
log = telemetry.transform_keys { |k| k.tr('.', '-') }
|
19
|
+
|
20
|
+
log['name'] = 'Puma::Plugin::Telemetry'
|
21
|
+
log['message'] = 'Publish telemetry'
|
22
|
+
|
23
|
+
::JSON.dump(log)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def initialize(io: $stdout, formatter: :json)
|
28
|
+
@io = io
|
29
|
+
@formatter = case formatter
|
30
|
+
when :json then JSONFormatter
|
31
|
+
else formatter
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def call(telemetry)
|
36
|
+
@io.puts(@formatter.call(telemetry))
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|