phihos-fluent-plugin-prometheus 2.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/linux.yml +34 -0
- data/.gitignore +16 -0
- data/.rspec +2 -0
- data/.travis.yml +14 -0
- data/ChangeLog +43 -0
- data/Gemfile +4 -0
- data/LICENSE +202 -0
- data/README.md +537 -0
- data/Rakefile +7 -0
- data/fluent-plugin-prometheus.gemspec +22 -0
- data/lib/fluent/plugin/filter_prometheus.rb +43 -0
- data/lib/fluent/plugin/in_prometheus/async_wrapper.rb +47 -0
- data/lib/fluent/plugin/in_prometheus.rb +230 -0
- data/lib/fluent/plugin/in_prometheus_monitor.rb +107 -0
- data/lib/fluent/plugin/in_prometheus_output_monitor.rb +234 -0
- data/lib/fluent/plugin/in_prometheus_tail_monitor.rb +98 -0
- data/lib/fluent/plugin/out_prometheus.rb +42 -0
- data/lib/fluent/plugin/prometheus/data_store.rb +93 -0
- data/lib/fluent/plugin/prometheus/placeholder_expander.rb +132 -0
- data/lib/fluent/plugin/prometheus.rb +418 -0
- data/lib/fluent/plugin/prometheus_metrics.rb +77 -0
- data/misc/fluentd_sample.conf +170 -0
- data/misc/nginx_proxy.conf +22 -0
- data/misc/prometheus.yaml +13 -0
- data/misc/prometheus_alerts.yaml +59 -0
- data/spec/fluent/plugin/filter_prometheus_spec.rb +118 -0
- data/spec/fluent/plugin/in_prometheus_monitor_spec.rb +42 -0
- data/spec/fluent/plugin/in_prometheus_spec.rb +225 -0
- data/spec/fluent/plugin/in_prometheus_tail_monitor_spec.rb +42 -0
- data/spec/fluent/plugin/out_prometheus_spec.rb +139 -0
- data/spec/fluent/plugin/prometheus/placeholder_expander_spec.rb +110 -0
- data/spec/fluent/plugin/prometheus_metrics_spec.rb +138 -0
- data/spec/fluent/plugin/shared.rb +248 -0
- data/spec/spec_helper.rb +10 -0
- metadata +176 -0
@@ -0,0 +1,230 @@
|
|
1
|
+
require 'fluent/plugin/input'
|
2
|
+
require 'fluent/plugin/prometheus'
|
3
|
+
require 'fluent/plugin/prometheus_metrics'
|
4
|
+
require 'net/http'
|
5
|
+
require 'openssl'
|
6
|
+
|
7
|
+
module Fluent::Plugin
|
8
|
+
class PrometheusInput < Fluent::Plugin::Input
|
9
|
+
Fluent::Plugin.register_input('prometheus', self)
|
10
|
+
|
11
|
+
helpers :thread, :http_server
|
12
|
+
|
13
|
+
config_param :bind, :string, default: '0.0.0.0'
|
14
|
+
config_param :port, :integer, default: 24231
|
15
|
+
config_param :metrics_path, :string, default: '/metrics'
|
16
|
+
config_param :aggregated_metrics_path, :string, default: '/aggregated_metrics'
|
17
|
+
|
18
|
+
desc 'Enable ssl configuration for the server'
|
19
|
+
config_section :ssl, required: false, multi: false do
|
20
|
+
config_param :enable, :bool, default: false, deprecated: 'Use <transport tls> section'
|
21
|
+
|
22
|
+
desc 'Path to the ssl certificate in PEM format. Read from file and added to conf as "SSLCertificate"'
|
23
|
+
config_param :certificate_path, :string, default: nil, deprecated: 'Use cert_path in <transport tls> section'
|
24
|
+
|
25
|
+
desc 'Path to the ssl private key in PEM format. Read from file and added to conf as "SSLPrivateKey"'
|
26
|
+
config_param :private_key_path, :string, default: nil, deprecated: 'Use private_key_path in <transport tls> section'
|
27
|
+
|
28
|
+
desc 'Path to CA in PEM format. Read from file and added to conf as "SSLCACertificateFile"'
|
29
|
+
config_param :ca_path, :string, default: nil, deprecated: 'Use ca_path in <transport tls> section'
|
30
|
+
|
31
|
+
desc 'Additional ssl conf for the server. Ref: https://github.com/ruby/webrick/blob/master/lib/webrick/ssl.rb'
|
32
|
+
config_param :extra_conf, :hash, default: nil, symbolize_keys: true, deprecated: 'See http helper config'
|
33
|
+
end
|
34
|
+
|
35
|
+
def initialize
|
36
|
+
super
|
37
|
+
@registry = ::Prometheus::Client.registry
|
38
|
+
@secure = nil
|
39
|
+
end
|
40
|
+
|
41
|
+
def configure(conf)
|
42
|
+
super
|
43
|
+
|
44
|
+
# Get how many workers we have
|
45
|
+
sysconf = if self.respond_to?(:owner) && owner.respond_to?(:system_config)
|
46
|
+
owner.system_config
|
47
|
+
elsif self.respond_to?(:system_config)
|
48
|
+
self.system_config
|
49
|
+
else
|
50
|
+
nil
|
51
|
+
end
|
52
|
+
@num_workers = sysconf && sysconf.workers ? sysconf.workers : 1
|
53
|
+
@secure = @transport_config.protocol == :tls || (@ssl && @ssl['enable'])
|
54
|
+
|
55
|
+
@base_port = @port
|
56
|
+
@port += fluentd_worker_id
|
57
|
+
end
|
58
|
+
|
59
|
+
def multi_workers_ready?
|
60
|
+
true
|
61
|
+
end
|
62
|
+
|
63
|
+
def start
|
64
|
+
super
|
65
|
+
|
66
|
+
scheme = @secure ? 'https' : 'http'
|
67
|
+
log.debug "listening prometheus http server on #{scheme}:://#{@bind}:#{@port}/#{@metrics_path} for worker#{fluentd_worker_id}"
|
68
|
+
|
69
|
+
proto = @secure ? :tls : :tcp
|
70
|
+
|
71
|
+
if @ssl && @ssl['enable'] && @ssl['extra_conf']
|
72
|
+
start_webrick
|
73
|
+
return
|
74
|
+
end
|
75
|
+
|
76
|
+
begin
|
77
|
+
require 'async'
|
78
|
+
require 'fluent/plugin/in_prometheus/async_wrapper'
|
79
|
+
extend AsyncWrapper
|
80
|
+
rescue LoadError => _
|
81
|
+
# ignore
|
82
|
+
end
|
83
|
+
|
84
|
+
tls_opt = if @ssl && @ssl['enable']
|
85
|
+
ssl_config = {}
|
86
|
+
|
87
|
+
if (@ssl['certificate_path'] && @ssl['private_key_path'].nil?) || (@ssl['certificate_path'].nil? && @ssl['private_key_path'])
|
88
|
+
raise Fluent::ConfigError.new('both certificate_path and private_key_path must be defined')
|
89
|
+
end
|
90
|
+
|
91
|
+
if @ssl['certificate_path']
|
92
|
+
ssl_config['cert_path'] = @ssl['certificate_path']
|
93
|
+
end
|
94
|
+
|
95
|
+
if @ssl['private_key_path']
|
96
|
+
ssl_config['private_key_path'] = @ssl['private_key_path']
|
97
|
+
end
|
98
|
+
|
99
|
+
if @ssl['ca_path']
|
100
|
+
ssl_config['ca_path'] = @ssl['ca_path']
|
101
|
+
# Only ca_path is insecure in fluentd
|
102
|
+
# https://github.com/fluent/fluentd/blob/2236ad45197ba336fd9faf56f442252c8b226f25/lib/fluent/plugin_helper/cert_option.rb#L68
|
103
|
+
ssl_config['insecure'] = true
|
104
|
+
end
|
105
|
+
|
106
|
+
ssl_config
|
107
|
+
end
|
108
|
+
|
109
|
+
http_server_create_http_server(:in_prometheus_server, addr: @bind, port: @port, logger: log, proto: proto, tls_opts: tls_opt) do |server|
|
110
|
+
server.get(@metrics_path) { |_req| all_metrics }
|
111
|
+
server.get(@aggregated_metrics_path) { |_req| all_workers_metrics }
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def shutdown
|
116
|
+
if @webrick_server
|
117
|
+
@webrick_server.shutdown
|
118
|
+
@webrick_server = nil
|
119
|
+
end
|
120
|
+
super
|
121
|
+
end
|
122
|
+
|
123
|
+
private
|
124
|
+
|
125
|
+
# For compatiblity because http helper can't support extra_conf option
|
126
|
+
def start_webrick
|
127
|
+
require 'webrick/https'
|
128
|
+
require 'webrick'
|
129
|
+
|
130
|
+
config = {
|
131
|
+
BindAddress: @bind,
|
132
|
+
Port: @port,
|
133
|
+
MaxClients: 5,
|
134
|
+
Logger: WEBrick::Log.new(STDERR, WEBrick::Log::FATAL),
|
135
|
+
AccessLog: [],
|
136
|
+
}
|
137
|
+
if (@ssl['certificate_path'] && @ssl['private_key_path'].nil?) || (@ssl['certificate_path'].nil? && @ssl['private_key_path'])
|
138
|
+
raise RuntimeError.new("certificate_path and private_key_path most both be defined")
|
139
|
+
end
|
140
|
+
|
141
|
+
ssl_config = {
|
142
|
+
SSLEnable: true,
|
143
|
+
SSLCertName: [['CN', 'nobody'], ['DC', 'example']]
|
144
|
+
}
|
145
|
+
|
146
|
+
if @ssl['certificate_path']
|
147
|
+
cert = OpenSSL::X509::Certificate.new(File.read(@ssl['certificate_path']))
|
148
|
+
ssl_config[:SSLCertificate] = cert
|
149
|
+
end
|
150
|
+
|
151
|
+
if @ssl['private_key_path']
|
152
|
+
key = OpenSSL::PKey.read(@ssl['private_key_path'])
|
153
|
+
ssl_config[:SSLPrivateKey] = key
|
154
|
+
end
|
155
|
+
|
156
|
+
ssl_config[:SSLCACertificateFile] = @ssl['ca_path'] if @ssl['ca_path']
|
157
|
+
ssl_config = ssl_config.merge(@ssl['extra_conf']) if @ssl['extra_conf']
|
158
|
+
config = ssl_config.merge(config)
|
159
|
+
|
160
|
+
@log.on_debug do
|
161
|
+
@log.debug("WEBrick conf: #{config}")
|
162
|
+
end
|
163
|
+
|
164
|
+
@webrick_server = WEBrick::HTTPServer.new(config)
|
165
|
+
@webrick_server.mount_proc(@metrics_path) do |_req, res|
|
166
|
+
status, header, body = all_metrics
|
167
|
+
res.status = status
|
168
|
+
res['Content-Type'] = header['Content-Type']
|
169
|
+
res.body = body
|
170
|
+
res
|
171
|
+
end
|
172
|
+
|
173
|
+
@webrick_server.mount_proc(@aggregated_metrics_path) do |_req, res|
|
174
|
+
status, header, body = all_workers_metrics
|
175
|
+
res.status = status
|
176
|
+
res['Content-Type'] = header['Content-Type']
|
177
|
+
res.body = body
|
178
|
+
res
|
179
|
+
end
|
180
|
+
|
181
|
+
thread_create(:in_prometheus_webrick) do
|
182
|
+
@webrick_server.start
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
def all_metrics
|
187
|
+
[200, { 'Content-Type' => ::Prometheus::Client::Formats::Text::CONTENT_TYPE }, ::Prometheus::Client::Formats::Text.marshal(@registry)]
|
188
|
+
rescue => e
|
189
|
+
[500, { 'Content-Type' => 'text/plain' }, e.to_s]
|
190
|
+
end
|
191
|
+
|
192
|
+
def all_workers_metrics
|
193
|
+
full_result = PromMetricsAggregator.new
|
194
|
+
|
195
|
+
send_request_to_each_worker do |resp|
|
196
|
+
if resp.code.to_s == '200'
|
197
|
+
full_result.add_metrics(resp.body)
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
[200, { 'Content-Type' => ::Prometheus::Client::Formats::Text::CONTENT_TYPE }, full_result.get_metrics]
|
202
|
+
rescue => e
|
203
|
+
[500, { 'Content-Type' => 'text/plain' }, e.to_s]
|
204
|
+
end
|
205
|
+
|
206
|
+
def send_request_to_each_worker
|
207
|
+
bind = (@bind == '0.0.0.0') ? '127.0.0.1' : @bind
|
208
|
+
[*(@base_port...(@base_port + @num_workers))].each do |worker_port|
|
209
|
+
do_request(host: bind, port: worker_port, secure: @secure) do |http|
|
210
|
+
yield(http.get(@metrics_path))
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
# might be replaced by AsyncWrapper if async gem is installed
|
216
|
+
def do_request(host:, port:, secure:)
|
217
|
+
http = Net::HTTP.new(host, port)
|
218
|
+
|
219
|
+
if secure
|
220
|
+
http.use_ssl = true
|
221
|
+
# target is our child process. so it's secure.
|
222
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
223
|
+
end
|
224
|
+
|
225
|
+
http.start do
|
226
|
+
yield(http)
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'fluent/plugin/input'
|
2
|
+
require 'fluent/plugin/in_monitor_agent'
|
3
|
+
require 'fluent/plugin/prometheus'
|
4
|
+
|
5
|
+
module Fluent::Plugin
|
6
|
+
class PrometheusMonitorInput < Fluent::Plugin::Input
|
7
|
+
Fluent::Plugin.register_input('prometheus_monitor', self)
|
8
|
+
include Fluent::Plugin::PrometheusLabelParser
|
9
|
+
|
10
|
+
helpers :timer
|
11
|
+
|
12
|
+
config_param :interval, :time, default: 5
|
13
|
+
attr_reader :registry
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
super
|
17
|
+
@registry = ::Prometheus::Client.registry
|
18
|
+
end
|
19
|
+
|
20
|
+
def multi_workers_ready?
|
21
|
+
true
|
22
|
+
end
|
23
|
+
|
24
|
+
def configure(conf)
|
25
|
+
super
|
26
|
+
hostname = Socket.gethostname
|
27
|
+
expander_builder = Fluent::Plugin::Prometheus.placeholder_expander(log)
|
28
|
+
expander = expander_builder.build({ 'hostname' => hostname, 'worker_id' => fluentd_worker_id })
|
29
|
+
@base_labels = parse_labels_elements(conf)
|
30
|
+
@base_labels.each do |key, value|
|
31
|
+
unless value.is_a?(String)
|
32
|
+
raise Fluent::ConfigError, "record accessor syntax is not available in prometheus_monitor"
|
33
|
+
end
|
34
|
+
@base_labels[key] = expander.expand(value)
|
35
|
+
end
|
36
|
+
|
37
|
+
if defined?(Fluent::Plugin) && defined?(Fluent::Plugin::MonitorAgentInput)
|
38
|
+
# from v0.14.6
|
39
|
+
@monitor_agent = Fluent::Plugin::MonitorAgentInput.new
|
40
|
+
else
|
41
|
+
@monitor_agent = Fluent::MonitorAgentInput.new
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
def start
|
47
|
+
super
|
48
|
+
|
49
|
+
@buffer_newest_timekey = get_gauge(
|
50
|
+
:fluentd_status_buffer_newest_timekey,
|
51
|
+
'Newest timekey in buffer.')
|
52
|
+
@buffer_oldest_timekey = get_gauge(
|
53
|
+
:fluentd_status_buffer_oldest_timekey,
|
54
|
+
'Oldest timekey in buffer.')
|
55
|
+
buffer_queue_length = get_gauge(
|
56
|
+
:fluentd_status_buffer_queue_length,
|
57
|
+
'Current buffer queue length.')
|
58
|
+
buffer_total_queued_size = get_gauge(
|
59
|
+
:fluentd_status_buffer_total_bytes,
|
60
|
+
'Current total size of queued buffers.')
|
61
|
+
retry_counts = get_gauge(
|
62
|
+
:fluentd_status_retry_count,
|
63
|
+
'Current retry counts.')
|
64
|
+
|
65
|
+
@monitor_info = {
|
66
|
+
'buffer_queue_length' => buffer_queue_length,
|
67
|
+
'buffer_total_queued_size' => buffer_total_queued_size,
|
68
|
+
'retry_count' => retry_counts,
|
69
|
+
}
|
70
|
+
timer_execute(:in_prometheus_monitor, @interval, &method(:update_monitor_info))
|
71
|
+
end
|
72
|
+
|
73
|
+
def update_monitor_info
|
74
|
+
@monitor_agent.plugins_info_all.each do |info|
|
75
|
+
label = labels(info)
|
76
|
+
|
77
|
+
@monitor_info.each do |name, metric|
|
78
|
+
if info[name]
|
79
|
+
metric.set(info[name], labels: label)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
timekeys = info["buffer_timekeys"]
|
84
|
+
if timekeys && !timekeys.empty?
|
85
|
+
@buffer_newest_timekey.set(timekeys.max, labels: label)
|
86
|
+
@buffer_oldest_timekey.set(timekeys.min, labels: label)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def labels(plugin_info)
|
92
|
+
@base_labels.merge(
|
93
|
+
plugin_id: plugin_info["plugin_id"],
|
94
|
+
plugin_category: plugin_info["plugin_category"],
|
95
|
+
type: plugin_info["type"],
|
96
|
+
)
|
97
|
+
end
|
98
|
+
|
99
|
+
def get_gauge(name, docstring)
|
100
|
+
if @registry.exist?(name)
|
101
|
+
@registry.get(name)
|
102
|
+
else
|
103
|
+
@registry.gauge(name, docstring: docstring, labels: @base_labels.keys + [:plugin_id, :plugin_category, :type])
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,234 @@
|
|
1
|
+
require 'fluent/plugin/input'
|
2
|
+
require 'fluent/plugin/in_monitor_agent'
|
3
|
+
require 'fluent/plugin/prometheus'
|
4
|
+
|
5
|
+
module Fluent::Plugin
|
6
|
+
class PrometheusOutputMonitorInput < Fluent::Plugin::Input
|
7
|
+
Fluent::Plugin.register_input('prometheus_output_monitor', self)
|
8
|
+
include Fluent::Plugin::PrometheusLabelParser
|
9
|
+
|
10
|
+
helpers :timer
|
11
|
+
|
12
|
+
config_param :interval, :time, default: 5
|
13
|
+
config_param :gauge_all, :bool, default: true
|
14
|
+
attr_reader :registry
|
15
|
+
|
16
|
+
MONITOR_IVARS = [
|
17
|
+
:retry,
|
18
|
+
|
19
|
+
:num_errors,
|
20
|
+
:emit_count,
|
21
|
+
|
22
|
+
# for v0.12
|
23
|
+
:last_retry_time,
|
24
|
+
|
25
|
+
# from v0.14
|
26
|
+
:emit_records,
|
27
|
+
:write_count,
|
28
|
+
:rollback_count,
|
29
|
+
|
30
|
+
# from v1.6.0
|
31
|
+
:flush_time_count,
|
32
|
+
:slow_flush_count,
|
33
|
+
]
|
34
|
+
|
35
|
+
def initialize
|
36
|
+
super
|
37
|
+
@registry = ::Prometheus::Client.registry
|
38
|
+
end
|
39
|
+
|
40
|
+
def multi_workers_ready?
|
41
|
+
true
|
42
|
+
end
|
43
|
+
|
44
|
+
def configure(conf)
|
45
|
+
super
|
46
|
+
hostname = Socket.gethostname
|
47
|
+
expander_builder = Fluent::Plugin::Prometheus.placeholder_expander(log)
|
48
|
+
expander = expander_builder.build({ 'hostname' => hostname, 'worker_id' => fluentd_worker_id })
|
49
|
+
@base_labels = parse_labels_elements(conf)
|
50
|
+
@base_labels.each do |key, value|
|
51
|
+
unless value.is_a?(String)
|
52
|
+
raise Fluent::ConfigError, "record accessor syntax is not available in prometheus_output_monitor"
|
53
|
+
end
|
54
|
+
@base_labels[key] = expander.expand(value)
|
55
|
+
end
|
56
|
+
|
57
|
+
@monitor_agent = Fluent::Plugin::MonitorAgentInput.new
|
58
|
+
|
59
|
+
@gauge_or_counter = @gauge_all ? :gauge : :counter
|
60
|
+
end
|
61
|
+
|
62
|
+
def start
|
63
|
+
super
|
64
|
+
|
65
|
+
@metrics = {
|
66
|
+
# Buffer metrics
|
67
|
+
buffer_total_queued_size: get_gauge(
|
68
|
+
:fluentd_output_status_buffer_total_bytes,
|
69
|
+
'Current total size of stage and queue buffers.'),
|
70
|
+
buffer_stage_length: get_gauge(
|
71
|
+
:fluentd_output_status_buffer_stage_length,
|
72
|
+
'Current length of stage buffers.'),
|
73
|
+
buffer_stage_byte_size: get_gauge(
|
74
|
+
:fluentd_output_status_buffer_stage_byte_size,
|
75
|
+
'Current total size of stage buffers.'),
|
76
|
+
buffer_queue_length: get_gauge(
|
77
|
+
:fluentd_output_status_buffer_queue_length,
|
78
|
+
'Current length of queue buffers.'),
|
79
|
+
buffer_queue_byte_size: get_gauge(
|
80
|
+
:fluentd_output_status_queue_byte_size,
|
81
|
+
'Current total size of queue buffers.'),
|
82
|
+
buffer_available_buffer_space_ratios: get_gauge(
|
83
|
+
:fluentd_output_status_buffer_available_space_ratio,
|
84
|
+
'Ratio of available space in buffer.'),
|
85
|
+
buffer_newest_timekey: get_gauge(
|
86
|
+
:fluentd_output_status_buffer_newest_timekey,
|
87
|
+
'Newest timekey in buffer.'),
|
88
|
+
buffer_oldest_timekey: get_gauge(
|
89
|
+
:fluentd_output_status_buffer_oldest_timekey,
|
90
|
+
'Oldest timekey in buffer.'),
|
91
|
+
|
92
|
+
# Output metrics
|
93
|
+
retry_counts: get_gauge_or_counter(
|
94
|
+
:fluentd_output_status_retry_count,
|
95
|
+
'Current retry counts.'),
|
96
|
+
num_errors: get_gauge_or_counter(
|
97
|
+
:fluentd_output_status_num_errors,
|
98
|
+
'Current number of errors.'),
|
99
|
+
emit_count: get_gauge_or_counter(
|
100
|
+
:fluentd_output_status_emit_count,
|
101
|
+
'Current emit counts.'),
|
102
|
+
emit_records: get_gauge_or_counter(
|
103
|
+
:fluentd_output_status_emit_records,
|
104
|
+
'Current emit records.'),
|
105
|
+
write_count: get_gauge_or_counter(
|
106
|
+
:fluentd_output_status_write_count,
|
107
|
+
'Current write counts.'),
|
108
|
+
rollback_count: get_gauge(
|
109
|
+
:fluentd_output_status_rollback_count,
|
110
|
+
'Current rollback counts.'),
|
111
|
+
flush_time_count: get_gauge_or_counter(
|
112
|
+
:fluentd_output_status_flush_time_count,
|
113
|
+
'Total flush time.'),
|
114
|
+
slow_flush_count: get_gauge_or_counter(
|
115
|
+
:fluentd_output_status_slow_flush_count,
|
116
|
+
'Current slow flush counts.'),
|
117
|
+
retry_wait: get_gauge(
|
118
|
+
:fluentd_output_status_retry_wait,
|
119
|
+
'Current retry wait'),
|
120
|
+
}
|
121
|
+
timer_execute(:in_prometheus_output_monitor, @interval, &method(:update_monitor_info))
|
122
|
+
end
|
123
|
+
|
124
|
+
def update_monitor_info
|
125
|
+
opts = {
|
126
|
+
ivars: MONITOR_IVARS,
|
127
|
+
with_retry: true,
|
128
|
+
}
|
129
|
+
|
130
|
+
agent_info = @monitor_agent.plugins_info_all(opts).select {|info|
|
131
|
+
info['plugin_category'] == 'output'.freeze
|
132
|
+
}
|
133
|
+
|
134
|
+
monitor_info = {
|
135
|
+
# buffer metrics
|
136
|
+
'buffer_total_queued_size' => [@metrics[:buffer_total_queued_size]],
|
137
|
+
'buffer_stage_length' => [@metrics[:buffer_stage_length]],
|
138
|
+
'buffer_stage_byte_size' => [@metrics[:buffer_stage_byte_size]],
|
139
|
+
'buffer_queue_length' => [@metrics[:buffer_queue_length]],
|
140
|
+
'buffer_queue_byte_size' => [@metrics[:buffer_queue_byte_size]],
|
141
|
+
'buffer_available_buffer_space_ratios' => [@metrics[:buffer_available_buffer_space_ratios]],
|
142
|
+
'buffer_newest_timekey' => [@metrics[:buffer_newest_timekey]],
|
143
|
+
'buffer_oldest_timekey' => [@metrics[:buffer_oldest_timekey]],
|
144
|
+
|
145
|
+
# output metrics
|
146
|
+
'retry_count' => [@metrics[:retry_counts], @metrics[:num_errors]],
|
147
|
+
# Needed since Fluentd v1.14 due to metrics extensions.
|
148
|
+
'write_count' => [@metrics[:write_count]],
|
149
|
+
'emit_count' => [@metrics[:emit_count]],
|
150
|
+
'emit_records' => [@metrics[:emit_records]],
|
151
|
+
'rollback_count' => [@metrics[:rollback_count]],
|
152
|
+
'flush_time_count' => [@metrics[:flush_time_count]],
|
153
|
+
'slow_flush_count' => [@metrics[:slow_flush_count]],
|
154
|
+
}
|
155
|
+
# No needed for Fluentd v1.14 but leave as-is for backward compatibility.
|
156
|
+
instance_vars_info = {
|
157
|
+
num_errors: @metrics[:num_errors],
|
158
|
+
write_count: @metrics[:write_count],
|
159
|
+
emit_count: @metrics[:emit_count],
|
160
|
+
emit_records: @metrics[:emit_records],
|
161
|
+
rollback_count: @metrics[:rollback_count],
|
162
|
+
flush_time_count: @metrics[:flush_time_count],
|
163
|
+
slow_flush_count: @metrics[:slow_flush_count],
|
164
|
+
}
|
165
|
+
|
166
|
+
agent_info.each do |info|
|
167
|
+
label = labels(info)
|
168
|
+
|
169
|
+
monitor_info.each do |name, metrics|
|
170
|
+
metrics.each do |metric|
|
171
|
+
if info[name]
|
172
|
+
if metric.is_a?(::Prometheus::Client::Gauge)
|
173
|
+
metric.set(info[name], labels: label)
|
174
|
+
elsif metric.is_a?(::Prometheus::Client::Counter)
|
175
|
+
metric.increment(by: info[name] - metric.get(labels: label), labels: label)
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
if info['instance_variables']
|
182
|
+
instance_vars_info.each do |name, metric|
|
183
|
+
if info['instance_variables'][name]
|
184
|
+
if metric.is_a?(::Prometheus::Client::Gauge)
|
185
|
+
metric.set(info['instance_variables'][name], labels: label)
|
186
|
+
elsif metric.is_a?(::Prometheus::Client::Counter)
|
187
|
+
metric.increment(by: info['instance_variables'][name] - metric.get(labels: label), labels: label)
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
# compute current retry_wait
|
194
|
+
if info['retry']
|
195
|
+
next_time = info['retry']['next_time']
|
196
|
+
start_time = info['retry']['start']
|
197
|
+
if start_time.nil? && info['instance_variables']
|
198
|
+
# v0.12 does not include start, use last_retry_time instead
|
199
|
+
start_time = info['instance_variables'][:last_retry_time]
|
200
|
+
end
|
201
|
+
|
202
|
+
wait = 0
|
203
|
+
if next_time && start_time
|
204
|
+
wait = next_time - start_time
|
205
|
+
end
|
206
|
+
@metrics[:retry_wait].set(wait.to_f, labels: label)
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
def labels(plugin_info)
|
212
|
+
@base_labels.merge(
|
213
|
+
plugin_id: plugin_info["plugin_id"],
|
214
|
+
type: plugin_info["type"],
|
215
|
+
)
|
216
|
+
end
|
217
|
+
|
218
|
+
def get_gauge(name, docstring)
|
219
|
+
if @registry.exist?(name)
|
220
|
+
@registry.get(name)
|
221
|
+
else
|
222
|
+
@registry.gauge(name, docstring: docstring, labels: @base_labels.keys + [:plugin_id, :type])
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
def get_gauge_or_counter(name, docstring)
|
227
|
+
if @registry.exist?(name)
|
228
|
+
@registry.get(name)
|
229
|
+
else
|
230
|
+
@registry.public_send(@gauge_or_counter, name, docstring: docstring, labels: @base_labels.keys + [:plugin_id, :type])
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
require 'fluent/plugin/input'
|
2
|
+
require 'fluent/plugin/in_monitor_agent'
|
3
|
+
require 'fluent/plugin/prometheus'
|
4
|
+
|
5
|
+
module Fluent::Plugin
|
6
|
+
class PrometheusTailMonitorInput < Fluent::Plugin::Input
|
7
|
+
Fluent::Plugin.register_input('prometheus_tail_monitor', self)
|
8
|
+
include Fluent::Plugin::PrometheusLabelParser
|
9
|
+
|
10
|
+
helpers :timer
|
11
|
+
|
12
|
+
config_param :interval, :time, default: 5
|
13
|
+
attr_reader :registry
|
14
|
+
|
15
|
+
MONITOR_IVARS = [
|
16
|
+
:tails,
|
17
|
+
]
|
18
|
+
|
19
|
+
def initialize
|
20
|
+
super
|
21
|
+
@registry = ::Prometheus::Client.registry
|
22
|
+
end
|
23
|
+
|
24
|
+
def multi_workers_ready?
|
25
|
+
true
|
26
|
+
end
|
27
|
+
|
28
|
+
def configure(conf)
|
29
|
+
super
|
30
|
+
hostname = Socket.gethostname
|
31
|
+
expander_builder = Fluent::Plugin::Prometheus.placeholder_expander(log)
|
32
|
+
expander = expander_builder.build({ 'hostname' => hostname, 'worker_id' => fluentd_worker_id })
|
33
|
+
@base_labels = parse_labels_elements(conf)
|
34
|
+
@base_labels.each do |key, value|
|
35
|
+
unless value.is_a?(String)
|
36
|
+
raise Fluent::ConfigError, "record accessor syntax is not available in prometheus_tail_monitor"
|
37
|
+
end
|
38
|
+
@base_labels[key] = expander.expand(value)
|
39
|
+
end
|
40
|
+
|
41
|
+
@monitor_agent = Fluent::Plugin::MonitorAgentInput.new
|
42
|
+
end
|
43
|
+
|
44
|
+
def start
|
45
|
+
super
|
46
|
+
|
47
|
+
@metrics = {
|
48
|
+
position: get_gauge(
|
49
|
+
:fluentd_tail_file_position,
|
50
|
+
'Current position of file.'),
|
51
|
+
inode: get_gauge(
|
52
|
+
:fluentd_tail_file_inode,
|
53
|
+
'Current inode of file.'),
|
54
|
+
}
|
55
|
+
timer_execute(:in_prometheus_tail_monitor, @interval, &method(:update_monitor_info))
|
56
|
+
end
|
57
|
+
|
58
|
+
def update_monitor_info
|
59
|
+
opts = {
|
60
|
+
ivars: MONITOR_IVARS,
|
61
|
+
}
|
62
|
+
|
63
|
+
agent_info = @monitor_agent.plugins_info_all(opts).select {|info|
|
64
|
+
info['type'] == 'tail'.freeze
|
65
|
+
}
|
66
|
+
|
67
|
+
agent_info.each do |info|
|
68
|
+
tails = info['instance_variables'][:tails]
|
69
|
+
next if tails.nil?
|
70
|
+
|
71
|
+
tails.clone.each do |_, watcher|
|
72
|
+
# Access to internal variable of internal class...
|
73
|
+
# Very fragile implementation
|
74
|
+
pe = watcher.instance_variable_get(:@pe)
|
75
|
+
label = labels(info, watcher.path)
|
76
|
+
@metrics[:inode].set(pe.read_inode, labels: label)
|
77
|
+
@metrics[:position].set(pe.read_pos, labels: label)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def labels(plugin_info, path)
|
83
|
+
@base_labels.merge(
|
84
|
+
plugin_id: plugin_info["plugin_id"],
|
85
|
+
type: plugin_info["type"],
|
86
|
+
path: path,
|
87
|
+
)
|
88
|
+
end
|
89
|
+
|
90
|
+
def get_gauge(name, docstring)
|
91
|
+
if @registry.exist?(name)
|
92
|
+
@registry.get(name)
|
93
|
+
else
|
94
|
+
@registry.gauge(name, docstring: docstring, labels: @base_labels.keys + [:plugin_id, :type, :path])
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|