fluent-plugin-prometheus-smarter 1.8.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.rspec +2 -0
- data/.travis.yml +14 -0
- data/Gemfile +4 -0
- data/LICENSE +202 -0
- data/README.md +484 -0
- data/Rakefile +7 -0
- data/fluent-plugin-prometheus.gemspec +22 -0
- data/lib/fluent/plugin/filter_prometheus.rb +30 -0
- data/lib/fluent/plugin/in_prometheus.rb +222 -0
- data/lib/fluent/plugin/in_prometheus_monitor.rb +99 -0
- data/lib/fluent/plugin/in_prometheus_output_monitor.rb +202 -0
- data/lib/fluent/plugin/in_prometheus_tail_monitor.rb +95 -0
- data/lib/fluent/plugin/metric_prometheus.rb +71 -0
- data/lib/fluent/plugin/out_prometheus.rb +29 -0
- data/lib/fluent/plugin/prometheus.rb +296 -0
- data/lib/fluent/plugin/prometheus/placeholder_expander.rb +132 -0
- data/lib/fluent/plugin/prometheus_metrics.rb +77 -0
- data/misc/fluentd_sample.conf +170 -0
- data/misc/nginx_proxy.conf +22 -0
- data/misc/prometheus.yaml +13 -0
- data/misc/prometheus_alerts.yaml +59 -0
- data/spec/fluent/plugin/filter_prometheus_spec.rb +48 -0
- data/spec/fluent/plugin/in_prometheus_monitor_spec.rb +42 -0
- data/spec/fluent/plugin/in_prometheus_spec.rb +225 -0
- data/spec/fluent/plugin/in_prometheus_tail_monitor_spec.rb +42 -0
- data/spec/fluent/plugin/out_prometheus_spec.rb +43 -0
- data/spec/fluent/plugin/prometheus/placeholder_expander_spec.rb +110 -0
- data/spec/fluent/plugin/prometheus_metrics_spec.rb +138 -0
- data/spec/fluent/plugin/shared.rb +249 -0
- data/spec/spec_helper.rb +10 -0
- metadata +174 -0
data/Rakefile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Gem::Specification.new do |spec|
|
2
|
+
spec.name = "fluent-plugin-prometheus-smarter"
|
3
|
+
spec.version = "1.8.4"
|
4
|
+
spec.authors = ["Masahiro Sano", "Josh Minor"]
|
5
|
+
spec.email = ["sabottenda@gmail.com"]
|
6
|
+
spec.summary = %q{A fluent plugin that collects metrics and exposes for Prometheus.}
|
7
|
+
spec.description = %q{A fluent plugin that collects metrics and exposes for Prometheus.}
|
8
|
+
spec.homepage = "https://github.com/jishminor/fluent-plugin-prometheus"
|
9
|
+
spec.license = "Apache-2.0"
|
10
|
+
|
11
|
+
spec.files = `git ls-files -z`.split("\x0")
|
12
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
13
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
14
|
+
spec.require_paths = ["lib"]
|
15
|
+
|
16
|
+
spec.add_dependency "fluentd", ">= 1.9.1", "< 2"
|
17
|
+
spec.add_dependency "prometheus-client", "< 0.10"
|
18
|
+
spec.add_development_dependency "bundler"
|
19
|
+
spec.add_development_dependency "rake"
|
20
|
+
spec.add_development_dependency "rspec"
|
21
|
+
spec.add_development_dependency "test-unit"
|
22
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'fluent/plugin/prometheus'
|
2
|
+
require 'fluent/plugin/filter'
|
3
|
+
|
4
|
+
module Fluent::Plugin
|
5
|
+
class PrometheusFilter < Fluent::Plugin::Filter
|
6
|
+
Fluent::Plugin.register_filter('prometheus', self)
|
7
|
+
include Fluent::Plugin::PrometheusLabelParser
|
8
|
+
include Fluent::Plugin::Prometheus
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
super
|
12
|
+
@registry = ::Prometheus::Client.registry
|
13
|
+
end
|
14
|
+
|
15
|
+
def multi_workers_ready?
|
16
|
+
true
|
17
|
+
end
|
18
|
+
|
19
|
+
def configure(conf)
|
20
|
+
super
|
21
|
+
labels = parse_labels_elements(conf)
|
22
|
+
@metrics = Fluent::Plugin::Prometheus.parse_metrics_elements(conf, @registry, labels)
|
23
|
+
end
|
24
|
+
|
25
|
+
def filter(tag, time, record)
|
26
|
+
instrument_single(tag, time, record, @metrics)
|
27
|
+
record
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,222 @@
|
|
1
|
+
require 'fluent/plugin/input'
|
2
|
+
require 'fluent/plugin/prometheus'
|
3
|
+
require 'fluent/plugin/prometheus_metrics'
|
4
|
+
require 'net/http'
|
5
|
+
require 'openssl'
|
6
|
+
|
7
|
+
module Fluent::Plugin
|
8
|
+
class PrometheusInput < Fluent::Plugin::Input
|
9
|
+
Fluent::Plugin.register_input('prometheus', self)
|
10
|
+
|
11
|
+
helpers :thread, :http_server
|
12
|
+
|
13
|
+
config_param :bind, :string, default: '0.0.0.0'
|
14
|
+
config_param :port, :integer, default: 24231
|
15
|
+
config_param :metrics_path, :string, default: '/metrics'
|
16
|
+
config_param :aggregated_metrics_path, :string, default: '/aggregated_metrics'
|
17
|
+
|
18
|
+
desc 'Enable ssl configuration for the server'
|
19
|
+
config_section :ssl, required: false, multi: false do
|
20
|
+
config_param :enable, :bool, default: false, deprecated: 'Use <transport tls> section'
|
21
|
+
|
22
|
+
desc 'Path to the ssl certificate in PEM format. Read from file and added to conf as "SSLCertificate"'
|
23
|
+
config_param :certificate_path, :string, default: nil, deprecated: 'Use cert_path in <transport tls> section'
|
24
|
+
|
25
|
+
desc 'Path to the ssl private key in PEM format. Read from file and added to conf as "SSLPrivateKey"'
|
26
|
+
config_param :private_key_path, :string, default: nil, deprecated: 'Use private_key_path in <transport tls> section'
|
27
|
+
|
28
|
+
desc 'Path to CA in PEM format. Read from file and added to conf as "SSLCACertificateFile"'
|
29
|
+
config_param :ca_path, :string, default: nil, deprecated: 'Use ca_path in <transport tls> section'
|
30
|
+
|
31
|
+
desc 'Additional ssl conf for the server. Ref: https://github.com/ruby/webrick/blob/master/lib/webrick/ssl.rb'
|
32
|
+
config_param :extra_conf, :hash, default: nil, symbolize_keys: true, deprecated: 'See http helper config'
|
33
|
+
end
|
34
|
+
|
35
|
+
def initialize
|
36
|
+
super
|
37
|
+
@registry = ::Prometheus::Client.registry
|
38
|
+
@secure = nil
|
39
|
+
end
|
40
|
+
|
41
|
+
def configure(conf)
|
42
|
+
super
|
43
|
+
|
44
|
+
# Get how many workers we have
|
45
|
+
sysconf = if self.respond_to?(:owner) && owner.respond_to?(:system_config)
|
46
|
+
owner.system_config
|
47
|
+
elsif self.respond_to?(:system_config)
|
48
|
+
self.system_config
|
49
|
+
else
|
50
|
+
nil
|
51
|
+
end
|
52
|
+
@num_workers = sysconf && sysconf.workers ? sysconf.workers : 1
|
53
|
+
@secure = @transport_config.protocol == :tls || (@ssl && @ssl['enable'])
|
54
|
+
|
55
|
+
@base_port = @port
|
56
|
+
@port += fluentd_worker_id
|
57
|
+
end
|
58
|
+
|
59
|
+
def multi_workers_ready?
|
60
|
+
true
|
61
|
+
end
|
62
|
+
|
63
|
+
def start
|
64
|
+
super
|
65
|
+
|
66
|
+
scheme = @secure ? 'https' : 'http'
|
67
|
+
log.debug "listening prometheus http server on #{scheme}:://#{@bind}:#{@port}/#{@metrics_path} for worker#{fluentd_worker_id}"
|
68
|
+
|
69
|
+
proto = @secure ? :tls : :tcp
|
70
|
+
|
71
|
+
if @ssl && @ssl['enable'] && @ssl['extra_conf']
|
72
|
+
start_webrick
|
73
|
+
return
|
74
|
+
end
|
75
|
+
|
76
|
+
tls_opt = if @ssl && @ssl['enable']
|
77
|
+
ssl_config = {}
|
78
|
+
|
79
|
+
if (@ssl['certificate_path'] && @ssl['private_key_path'].nil?) || (@ssl['certificate_path'].nil? && @ssl['private_key_path'])
|
80
|
+
raise Fluent::ConfigError.new('both certificate_path and private_key_path must be defined')
|
81
|
+
end
|
82
|
+
|
83
|
+
if @ssl['certificate_path']
|
84
|
+
ssl_config['cert_path'] = @ssl['certificate_path']
|
85
|
+
end
|
86
|
+
|
87
|
+
if @ssl['private_key_path']
|
88
|
+
ssl_config['private_key_path'] = @ssl['private_key_path']
|
89
|
+
end
|
90
|
+
|
91
|
+
if @ssl['ca_path']
|
92
|
+
ssl_config['ca_path'] = @ssl['ca_path']
|
93
|
+
# Only ca_path is insecure in fluentd
|
94
|
+
# https://github.com/fluent/fluentd/blob/2236ad45197ba336fd9faf56f442252c8b226f25/lib/fluent/plugin_helper/cert_option.rb#L68
|
95
|
+
ssl_config['insecure'] = true
|
96
|
+
end
|
97
|
+
|
98
|
+
ssl_config
|
99
|
+
end
|
100
|
+
|
101
|
+
http_server_create_http_server(:in_prometheus_server, addr: @bind, port: @port, logger: log, proto: proto, tls_opts: tls_opt) do |server|
|
102
|
+
server.get(@metrics_path) { |_req| all_metrics }
|
103
|
+
server.get(@aggregated_metrics_path) { |_req| all_workers_metrics }
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def shutdown
|
108
|
+
if @webrick_server
|
109
|
+
@webrick_server.shutdown
|
110
|
+
@webrick_server = nil
|
111
|
+
end
|
112
|
+
super
|
113
|
+
end
|
114
|
+
|
115
|
+
private
|
116
|
+
|
117
|
+
# For compatiblity because http helper can't support extra_conf option
|
118
|
+
def start_webrick
|
119
|
+
require 'webrick/https'
|
120
|
+
require 'webrick'
|
121
|
+
|
122
|
+
config = {
|
123
|
+
BindAddress: @bind,
|
124
|
+
Port: @port,
|
125
|
+
MaxClients: 5,
|
126
|
+
Logger: WEBrick::Log.new(STDERR, WEBrick::Log::FATAL),
|
127
|
+
AccessLog: [],
|
128
|
+
}
|
129
|
+
if (@ssl['certificate_path'] && @ssl['private_key_path'].nil?) || (@ssl['certificate_path'].nil? && @ssl['private_key_path'])
|
130
|
+
raise RuntimeError.new("certificate_path and private_key_path most both be defined")
|
131
|
+
end
|
132
|
+
|
133
|
+
ssl_config = {
|
134
|
+
SSLEnable: true,
|
135
|
+
SSLCertName: [['CN', 'nobody'], ['DC', 'example']]
|
136
|
+
}
|
137
|
+
|
138
|
+
if @ssl['certificate_path']
|
139
|
+
cert = OpenSSL::X509::Certificate.new(File.read(@ssl['certificate_path']))
|
140
|
+
ssl_config[:SSLCertificate] = cert
|
141
|
+
end
|
142
|
+
|
143
|
+
if @ssl['private_key_path']
|
144
|
+
key = OpenSSL::PKey.read(@ssl['private_key_path'])
|
145
|
+
ssl_config[:SSLPrivateKey] = key
|
146
|
+
end
|
147
|
+
|
148
|
+
ssl_config[:SSLCACertificateFile] = @ssl['ca_path'] if @ssl['ca_path']
|
149
|
+
ssl_config = ssl_config.merge(@ssl['extra_conf']) if @ssl['extra_conf']
|
150
|
+
config = ssl_config.merge(config)
|
151
|
+
|
152
|
+
@log.on_debug do
|
153
|
+
@log.debug("WEBrick conf: #{config}")
|
154
|
+
end
|
155
|
+
|
156
|
+
@webrick_server = WEBrick::HTTPServer.new(config)
|
157
|
+
@webrick_server.mount_proc(@metrics_path) do |_req, res|
|
158
|
+
status, header, body = all_metrics
|
159
|
+
res.status = status
|
160
|
+
res['Content-Type'] = header['Content-Type']
|
161
|
+
res.body = body
|
162
|
+
res
|
163
|
+
end
|
164
|
+
|
165
|
+
@webrick_server.mount_proc(@aggregated_metrics_path) do |_req, res|
|
166
|
+
status, header, body = all_workers_metrics
|
167
|
+
res.status = status
|
168
|
+
res['Content-Type'] = header['Content-Type']
|
169
|
+
res.body = body
|
170
|
+
res
|
171
|
+
end
|
172
|
+
|
173
|
+
thread_create(:in_prometheus_webrick) do
|
174
|
+
@webrick_server.start
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def all_metrics
|
179
|
+
[200, { 'Content-Type' => ::Prometheus::Client::Formats::Text::CONTENT_TYPE }, ::Prometheus::Client::Formats::Text.marshal(@registry)]
|
180
|
+
rescue => e
|
181
|
+
[500, { 'Content-Type' => 'text/plain' }, e.to_s]
|
182
|
+
end
|
183
|
+
|
184
|
+
def all_workers_metrics
|
185
|
+
full_result = PromMetricsAggregator.new
|
186
|
+
|
187
|
+
send_request_to_each_worker do |resp|
|
188
|
+
if resp.is_a?(Net::HTTPSuccess)
|
189
|
+
full_result.add_metrics(resp.body)
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
[200, { 'Content-Type' => ::Prometheus::Client::Formats::Text::CONTENT_TYPE }, full_result.get_metrics]
|
194
|
+
rescue => e
|
195
|
+
[500, { 'Content-Type' => 'text/plain' }, e.to_s]
|
196
|
+
end
|
197
|
+
|
198
|
+
def send_request_to_each_worker
|
199
|
+
bind = (@bind == '0.0.0.0') ? '127.0.0.1' : @bind
|
200
|
+
req = Net::HTTP::Get.new(@metrics_path)
|
201
|
+
[*(@base_port...(@base_port + @num_workers))].each do |worker_port|
|
202
|
+
do_request(host: bind, port: worker_port, secure: @secure) do |http|
|
203
|
+
yield(http.request(req))
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
def do_request(host:, port:, secure:)
|
209
|
+
http = Net::HTTP.new(host, port)
|
210
|
+
|
211
|
+
if secure
|
212
|
+
http.use_ssl = true
|
213
|
+
# target is our child process. so it's secure.
|
214
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
215
|
+
end
|
216
|
+
|
217
|
+
http.start do
|
218
|
+
yield(http)
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
require 'fluent/plugin/input'
|
2
|
+
require 'fluent/plugin/in_monitor_agent'
|
3
|
+
require 'fluent/plugin/prometheus'
|
4
|
+
|
5
|
+
module Fluent::Plugin
|
6
|
+
class PrometheusMonitorInput < Fluent::Plugin::Input
|
7
|
+
Fluent::Plugin.register_input('prometheus_monitor', self)
|
8
|
+
include Fluent::Plugin::PrometheusLabelParser
|
9
|
+
|
10
|
+
helpers :timer
|
11
|
+
|
12
|
+
config_param :interval, :time, default: 5
|
13
|
+
attr_reader :registry
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
super
|
17
|
+
@registry = ::Prometheus::Client.registry
|
18
|
+
end
|
19
|
+
|
20
|
+
def multi_workers_ready?
|
21
|
+
true
|
22
|
+
end
|
23
|
+
|
24
|
+
def configure(conf)
|
25
|
+
super
|
26
|
+
hostname = Socket.gethostname
|
27
|
+
expander_builder = Fluent::Plugin::Prometheus.placeholder_expander(log)
|
28
|
+
expander = expander_builder.build({ 'hostname' => hostname, 'worker_id' => fluentd_worker_id })
|
29
|
+
@base_labels = parse_labels_elements(conf)
|
30
|
+
@base_labels.each do |key, value|
|
31
|
+
unless value.is_a?(String)
|
32
|
+
raise Fluent::ConfigError, "record accessor syntax is not available in prometheus_monitor"
|
33
|
+
end
|
34
|
+
@base_labels[key] = expander.expand(value)
|
35
|
+
end
|
36
|
+
|
37
|
+
if defined?(Fluent::Plugin) && defined?(Fluent::Plugin::MonitorAgentInput)
|
38
|
+
# from v0.14.6
|
39
|
+
@monitor_agent = Fluent::Plugin::MonitorAgentInput.new
|
40
|
+
else
|
41
|
+
@monitor_agent = Fluent::MonitorAgentInput.new
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
def start
|
47
|
+
super
|
48
|
+
|
49
|
+
@buffer_newest_timekey = @registry.gauge(
|
50
|
+
:fluentd_status_buffer_newest_timekey,
|
51
|
+
'Newest timekey in buffer.')
|
52
|
+
@buffer_oldest_timekey = @registry.gauge(
|
53
|
+
:fluentd_status_buffer_oldest_timekey,
|
54
|
+
'Oldest timekey in buffer.')
|
55
|
+
buffer_queue_length = @registry.gauge(
|
56
|
+
:fluentd_status_buffer_queue_length,
|
57
|
+
'Current buffer queue length.')
|
58
|
+
buffer_total_queued_size = @registry.gauge(
|
59
|
+
:fluentd_status_buffer_total_bytes,
|
60
|
+
'Current total size of queued buffers.')
|
61
|
+
retry_counts = @registry.gauge(
|
62
|
+
:fluentd_status_retry_count,
|
63
|
+
'Current retry counts.')
|
64
|
+
|
65
|
+
@monitor_info = {
|
66
|
+
'buffer_queue_length' => buffer_queue_length,
|
67
|
+
'buffer_total_queued_size' => buffer_total_queued_size,
|
68
|
+
'retry_count' => retry_counts,
|
69
|
+
}
|
70
|
+
timer_execute(:in_prometheus_monitor, @interval, &method(:update_monitor_info))
|
71
|
+
end
|
72
|
+
|
73
|
+
def update_monitor_info
|
74
|
+
@monitor_agent.plugins_info_all.each do |info|
|
75
|
+
label = labels(info)
|
76
|
+
|
77
|
+
@monitor_info.each do |name, metric|
|
78
|
+
if info[name]
|
79
|
+
metric.set(label, info[name])
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
timekeys = info["buffer_timekeys"]
|
84
|
+
if timekeys && !timekeys.empty?
|
85
|
+
@buffer_newest_timekey.set(label, timekeys.max)
|
86
|
+
@buffer_oldest_timekey.set(label, timekeys.min)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def labels(plugin_info)
|
92
|
+
@base_labels.merge(
|
93
|
+
plugin_id: plugin_info["plugin_id"],
|
94
|
+
plugin_category: plugin_info["plugin_category"],
|
95
|
+
type: plugin_info["type"],
|
96
|
+
)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,202 @@
|
|
1
|
+
require 'fluent/input'
|
2
|
+
require 'fluent/plugin/in_monitor_agent'
|
3
|
+
require 'fluent/plugin/prometheus'
|
4
|
+
|
5
|
+
module Fluent::Plugin
|
6
|
+
class PrometheusOutputMonitorInput < Fluent::Input
|
7
|
+
Fluent::Plugin.register_input('prometheus_output_monitor', self)
|
8
|
+
include Fluent::Plugin::PrometheusLabelParser
|
9
|
+
|
10
|
+
helpers :timer
|
11
|
+
|
12
|
+
config_param :interval, :time, default: 5
|
13
|
+
attr_reader :registry
|
14
|
+
|
15
|
+
MONITOR_IVARS = [
|
16
|
+
:retry,
|
17
|
+
|
18
|
+
:num_errors,
|
19
|
+
:emit_count,
|
20
|
+
|
21
|
+
# for v0.12
|
22
|
+
:last_retry_time,
|
23
|
+
|
24
|
+
# from v0.14
|
25
|
+
:emit_records,
|
26
|
+
:write_count,
|
27
|
+
:rollback_count,
|
28
|
+
|
29
|
+
# from v1.6.0
|
30
|
+
:flush_time_count,
|
31
|
+
:slow_flush_count,
|
32
|
+
]
|
33
|
+
|
34
|
+
def initialize
|
35
|
+
super
|
36
|
+
@registry = ::Prometheus::Client.registry
|
37
|
+
end
|
38
|
+
|
39
|
+
def multi_workers_ready?
|
40
|
+
true
|
41
|
+
end
|
42
|
+
|
43
|
+
def configure(conf)
|
44
|
+
super
|
45
|
+
hostname = Socket.gethostname
|
46
|
+
expander_builder = Fluent::Plugin::Prometheus.placeholder_expander(log)
|
47
|
+
expander = expander_builder.build({ 'hostname' => hostname, 'worker_id' => fluentd_worker_id })
|
48
|
+
@base_labels = parse_labels_elements(conf)
|
49
|
+
@base_labels.each do |key, value|
|
50
|
+
unless value.is_a?(String)
|
51
|
+
raise Fluent::ConfigError, "record accessor syntax is not available in prometheus_output_monitor"
|
52
|
+
end
|
53
|
+
@base_labels[key] = expander.expand(value)
|
54
|
+
end
|
55
|
+
|
56
|
+
if defined?(Fluent::Plugin) && defined?(Fluent::Plugin::MonitorAgentInput)
|
57
|
+
# from v0.14.6
|
58
|
+
@monitor_agent = Fluent::Plugin::MonitorAgentInput.new
|
59
|
+
else
|
60
|
+
@monitor_agent = Fluent::MonitorAgentInput.new
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def start
|
65
|
+
super
|
66
|
+
|
67
|
+
@metrics = {
|
68
|
+
# Buffer metrics
|
69
|
+
buffer_total_queued_size: @registry.gauge(
|
70
|
+
:fluentd_output_status_buffer_total_bytes,
|
71
|
+
'Current total size of stage and queue buffers.'),
|
72
|
+
buffer_stage_length: @registry.gauge(
|
73
|
+
:fluentd_output_status_buffer_stage_length,
|
74
|
+
'Current length of stage buffers.'),
|
75
|
+
buffer_stage_byte_size: @registry.gauge(
|
76
|
+
:fluentd_output_status_buffer_stage_byte_size,
|
77
|
+
'Current total size of stage buffers.'),
|
78
|
+
buffer_queue_length: @registry.gauge(
|
79
|
+
:fluentd_output_status_buffer_queue_length,
|
80
|
+
'Current length of queue buffers.'),
|
81
|
+
buffer_queue_byte_size: @registry.gauge(
|
82
|
+
:fluentd_output_status_queue_byte_size,
|
83
|
+
'Current total size of queue buffers.'),
|
84
|
+
buffer_available_buffer_space_ratios: @registry.gauge(
|
85
|
+
:fluentd_output_status_buffer_available_space_ratio,
|
86
|
+
'Ratio of available space in buffer.'),
|
87
|
+
buffer_newest_timekey: @registry.gauge(
|
88
|
+
:fluentd_output_status_buffer_newest_timekey,
|
89
|
+
'Newest timekey in buffer.'),
|
90
|
+
buffer_oldest_timekey: @registry.gauge(
|
91
|
+
:fluentd_output_status_buffer_oldest_timekey,
|
92
|
+
'Oldest timekey in buffer.'),
|
93
|
+
|
94
|
+
# Output metrics
|
95
|
+
retry_counts: @registry.gauge(
|
96
|
+
:fluentd_output_status_retry_count,
|
97
|
+
'Current retry counts.'),
|
98
|
+
num_errors: @registry.gauge(
|
99
|
+
:fluentd_output_status_num_errors,
|
100
|
+
'Current number of errors.'),
|
101
|
+
emit_count: @registry.gauge(
|
102
|
+
:fluentd_output_status_emit_count,
|
103
|
+
'Current emit counts.'),
|
104
|
+
emit_records: @registry.gauge(
|
105
|
+
:fluentd_output_status_emit_records,
|
106
|
+
'Current emit records.'),
|
107
|
+
write_count: @registry.gauge(
|
108
|
+
:fluentd_output_status_write_count,
|
109
|
+
'Current write counts.'),
|
110
|
+
rollback_count: @registry.gauge(
|
111
|
+
:fluentd_output_status_rollback_count,
|
112
|
+
'Current rollback counts.'),
|
113
|
+
flush_time_count: @registry.gauge(
|
114
|
+
:fluentd_output_status_flush_time_count,
|
115
|
+
'Total flush time.'),
|
116
|
+
slow_flush_count: @registry.gauge(
|
117
|
+
:fluentd_output_status_slow_flush_count,
|
118
|
+
'Current slow flush counts.'),
|
119
|
+
retry_wait: @registry.gauge(
|
120
|
+
:fluentd_output_status_retry_wait,
|
121
|
+
'Current retry wait'),
|
122
|
+
}
|
123
|
+
timer_execute(:in_prometheus_output_monitor, @interval, &method(:update_monitor_info))
|
124
|
+
end
|
125
|
+
|
126
|
+
def update_monitor_info
|
127
|
+
opts = {
|
128
|
+
ivars: MONITOR_IVARS,
|
129
|
+
with_retry: true,
|
130
|
+
}
|
131
|
+
|
132
|
+
agent_info = @monitor_agent.plugins_info_all(opts).select {|info|
|
133
|
+
info['plugin_category'] == 'output'.freeze
|
134
|
+
}
|
135
|
+
|
136
|
+
monitor_info = {
|
137
|
+
# buffer metrics
|
138
|
+
'buffer_total_queued_size' => @metrics[:buffer_total_queued_size],
|
139
|
+
'buffer_stage_length' => @metrics[:buffer_stage_length],
|
140
|
+
'buffer_stage_byte_size' => @metrics[:buffer_stage_byte_size],
|
141
|
+
'buffer_queue_length' => @metrics[:buffer_queue_length],
|
142
|
+
'buffer_queue_byte_size' => @metrics[:buffer_queue_byte_size],
|
143
|
+
'buffer_available_buffer_space_ratios' => @metrics[:buffer_available_buffer_space_ratios],
|
144
|
+
'buffer_newest_timekey' => @metrics[:buffer_newest_timekey],
|
145
|
+
'buffer_oldest_timekey' => @metrics[:buffer_oldest_timekey],
|
146
|
+
|
147
|
+
# output metrics
|
148
|
+
'retry_count' => @metrics[:retry_counts],
|
149
|
+
}
|
150
|
+
instance_vars_info = {
|
151
|
+
num_errors: @metrics[:num_errors],
|
152
|
+
write_count: @metrics[:write_count],
|
153
|
+
emit_count: @metrics[:emit_count],
|
154
|
+
emit_records: @metrics[:emit_records],
|
155
|
+
rollback_count: @metrics[:rollback_count],
|
156
|
+
flush_time_count: @metrics[:flush_time_count],
|
157
|
+
slow_flush_count: @metrics[:slow_flush_count],
|
158
|
+
}
|
159
|
+
|
160
|
+
agent_info.each do |info|
|
161
|
+
label = labels(info)
|
162
|
+
|
163
|
+
monitor_info.each do |name, metric|
|
164
|
+
if info[name]
|
165
|
+
metric.set(label, info[name])
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
if info['instance_variables']
|
170
|
+
instance_vars_info.each do |name, metric|
|
171
|
+
if info['instance_variables'][name]
|
172
|
+
metric.set(label, info['instance_variables'][name])
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# compute current retry_wait
|
178
|
+
if info['retry']
|
179
|
+
next_time = info['retry']['next_time']
|
180
|
+
start_time = info['retry']['start']
|
181
|
+
if start_time.nil? && info['instance_variables']
|
182
|
+
# v0.12 does not include start, use last_retry_time instead
|
183
|
+
start_time = info['instance_variables'][:last_retry_time]
|
184
|
+
end
|
185
|
+
|
186
|
+
wait = 0
|
187
|
+
if next_time && start_time
|
188
|
+
wait = next_time - start_time
|
189
|
+
end
|
190
|
+
@metrics[:retry_wait].set(label, wait.to_f)
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def labels(plugin_info)
|
196
|
+
@base_labels.merge(
|
197
|
+
plugin_id: plugin_info["plugin_id"],
|
198
|
+
type: plugin_info["type"],
|
199
|
+
)
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|