phihos-fluent-plugin-prometheus 2.0.3.pre.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/linux.yml +34 -0
- data/.gitignore +16 -0
- data/.rspec +2 -0
- data/.travis.yml +14 -0
- data/ChangeLog +43 -0
- data/Gemfile +4 -0
- data/LICENSE +202 -0
- data/README.md +537 -0
- data/Rakefile +7 -0
- data/fluent-plugin-prometheus.gemspec +22 -0
- data/lib/fluent/plugin/filter_prometheus.rb +50 -0
- data/lib/fluent/plugin/in_prometheus/async_wrapper.rb +47 -0
- data/lib/fluent/plugin/in_prometheus.rb +230 -0
- data/lib/fluent/plugin/in_prometheus_monitor.rb +107 -0
- data/lib/fluent/plugin/in_prometheus_output_monitor.rb +234 -0
- data/lib/fluent/plugin/in_prometheus_tail_monitor.rb +98 -0
- data/lib/fluent/plugin/out_prometheus.rb +49 -0
- data/lib/fluent/plugin/prometheus/data_store.rb +103 -0
- data/lib/fluent/plugin/prometheus/placeholder_expander.rb +132 -0
- data/lib/fluent/plugin/prometheus.rb +445 -0
- data/lib/fluent/plugin/prometheus_metrics.rb +77 -0
- data/misc/fluentd_sample.conf +170 -0
- data/misc/nginx_proxy.conf +22 -0
- data/misc/prometheus.yaml +13 -0
- data/misc/prometheus_alerts.yaml +59 -0
- data/spec/fluent/plugin/filter_prometheus_spec.rb +145 -0
- data/spec/fluent/plugin/in_prometheus_monitor_spec.rb +42 -0
- data/spec/fluent/plugin/in_prometheus_spec.rb +225 -0
- data/spec/fluent/plugin/in_prometheus_tail_monitor_spec.rb +42 -0
- data/spec/fluent/plugin/out_prometheus_spec.rb +166 -0
- data/spec/fluent/plugin/prometheus/placeholder_expander_spec.rb +110 -0
- data/spec/fluent/plugin/prometheus_metrics_spec.rb +138 -0
- data/spec/fluent/plugin/shared.rb +248 -0
- data/spec/spec_helper.rb +10 -0
- metadata +176 -0
@@ -0,0 +1,230 @@
|
|
1
|
+
require 'fluent/plugin/input'
|
2
|
+
require 'fluent/plugin/prometheus'
|
3
|
+
require 'fluent/plugin/prometheus_metrics'
|
4
|
+
require 'net/http'
|
5
|
+
require 'openssl'
|
6
|
+
|
7
|
+
module Fluent::Plugin
|
8
|
+
class PrometheusInput < Fluent::Plugin::Input
|
9
|
+
Fluent::Plugin.register_input('prometheus', self)
|
10
|
+
|
11
|
+
helpers :thread, :http_server
|
12
|
+
|
13
|
+
config_param :bind, :string, default: '0.0.0.0'
|
14
|
+
config_param :port, :integer, default: 24231
|
15
|
+
config_param :metrics_path, :string, default: '/metrics'
|
16
|
+
config_param :aggregated_metrics_path, :string, default: '/aggregated_metrics'
|
17
|
+
|
18
|
+
desc 'Enable ssl configuration for the server'
|
19
|
+
config_section :ssl, required: false, multi: false do
|
20
|
+
config_param :enable, :bool, default: false, deprecated: 'Use <transport tls> section'
|
21
|
+
|
22
|
+
desc 'Path to the ssl certificate in PEM format. Read from file and added to conf as "SSLCertificate"'
|
23
|
+
config_param :certificate_path, :string, default: nil, deprecated: 'Use cert_path in <transport tls> section'
|
24
|
+
|
25
|
+
desc 'Path to the ssl private key in PEM format. Read from file and added to conf as "SSLPrivateKey"'
|
26
|
+
config_param :private_key_path, :string, default: nil, deprecated: 'Use private_key_path in <transport tls> section'
|
27
|
+
|
28
|
+
desc 'Path to CA in PEM format. Read from file and added to conf as "SSLCACertificateFile"'
|
29
|
+
config_param :ca_path, :string, default: nil, deprecated: 'Use ca_path in <transport tls> section'
|
30
|
+
|
31
|
+
desc 'Additional ssl conf for the server. Ref: https://github.com/ruby/webrick/blob/master/lib/webrick/ssl.rb'
|
32
|
+
config_param :extra_conf, :hash, default: nil, symbolize_keys: true, deprecated: 'See http helper config'
|
33
|
+
end
|
34
|
+
|
35
|
+
def initialize
|
36
|
+
super
|
37
|
+
@registry = ::Prometheus::Client.registry
|
38
|
+
@secure = nil
|
39
|
+
end
|
40
|
+
|
41
|
+
def configure(conf)
|
42
|
+
super
|
43
|
+
|
44
|
+
# Get how many workers we have
|
45
|
+
sysconf = if self.respond_to?(:owner) && owner.respond_to?(:system_config)
|
46
|
+
owner.system_config
|
47
|
+
elsif self.respond_to?(:system_config)
|
48
|
+
self.system_config
|
49
|
+
else
|
50
|
+
nil
|
51
|
+
end
|
52
|
+
@num_workers = sysconf && sysconf.workers ? sysconf.workers : 1
|
53
|
+
@secure = @transport_config.protocol == :tls || (@ssl && @ssl['enable'])
|
54
|
+
|
55
|
+
@base_port = @port
|
56
|
+
@port += fluentd_worker_id
|
57
|
+
end
|
58
|
+
|
59
|
+
def multi_workers_ready?
|
60
|
+
true
|
61
|
+
end
|
62
|
+
|
63
|
+
def start
|
64
|
+
super
|
65
|
+
|
66
|
+
scheme = @secure ? 'https' : 'http'
|
67
|
+
log.debug "listening prometheus http server on #{scheme}:://#{@bind}:#{@port}/#{@metrics_path} for worker#{fluentd_worker_id}"
|
68
|
+
|
69
|
+
proto = @secure ? :tls : :tcp
|
70
|
+
|
71
|
+
if @ssl && @ssl['enable'] && @ssl['extra_conf']
|
72
|
+
start_webrick
|
73
|
+
return
|
74
|
+
end
|
75
|
+
|
76
|
+
begin
|
77
|
+
require 'async'
|
78
|
+
require 'fluent/plugin/in_prometheus/async_wrapper'
|
79
|
+
extend AsyncWrapper
|
80
|
+
rescue LoadError => _
|
81
|
+
# ignore
|
82
|
+
end
|
83
|
+
|
84
|
+
tls_opt = if @ssl && @ssl['enable']
|
85
|
+
ssl_config = {}
|
86
|
+
|
87
|
+
if (@ssl['certificate_path'] && @ssl['private_key_path'].nil?) || (@ssl['certificate_path'].nil? && @ssl['private_key_path'])
|
88
|
+
raise Fluent::ConfigError.new('both certificate_path and private_key_path must be defined')
|
89
|
+
end
|
90
|
+
|
91
|
+
if @ssl['certificate_path']
|
92
|
+
ssl_config['cert_path'] = @ssl['certificate_path']
|
93
|
+
end
|
94
|
+
|
95
|
+
if @ssl['private_key_path']
|
96
|
+
ssl_config['private_key_path'] = @ssl['private_key_path']
|
97
|
+
end
|
98
|
+
|
99
|
+
if @ssl['ca_path']
|
100
|
+
ssl_config['ca_path'] = @ssl['ca_path']
|
101
|
+
# Only ca_path is insecure in fluentd
|
102
|
+
# https://github.com/fluent/fluentd/blob/2236ad45197ba336fd9faf56f442252c8b226f25/lib/fluent/plugin_helper/cert_option.rb#L68
|
103
|
+
ssl_config['insecure'] = true
|
104
|
+
end
|
105
|
+
|
106
|
+
ssl_config
|
107
|
+
end
|
108
|
+
|
109
|
+
http_server_create_http_server(:in_prometheus_server, addr: @bind, port: @port, logger: log, proto: proto, tls_opts: tls_opt) do |server|
|
110
|
+
server.get(@metrics_path) { |_req| all_metrics }
|
111
|
+
server.get(@aggregated_metrics_path) { |_req| all_workers_metrics }
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def shutdown
|
116
|
+
if @webrick_server
|
117
|
+
@webrick_server.shutdown
|
118
|
+
@webrick_server = nil
|
119
|
+
end
|
120
|
+
super
|
121
|
+
end
|
122
|
+
|
123
|
+
private
|
124
|
+
|
125
|
+
# For compatiblity because http helper can't support extra_conf option
|
126
|
+
def start_webrick
|
127
|
+
require 'webrick/https'
|
128
|
+
require 'webrick'
|
129
|
+
|
130
|
+
config = {
|
131
|
+
BindAddress: @bind,
|
132
|
+
Port: @port,
|
133
|
+
MaxClients: 5,
|
134
|
+
Logger: WEBrick::Log.new(STDERR, WEBrick::Log::FATAL),
|
135
|
+
AccessLog: [],
|
136
|
+
}
|
137
|
+
if (@ssl['certificate_path'] && @ssl['private_key_path'].nil?) || (@ssl['certificate_path'].nil? && @ssl['private_key_path'])
|
138
|
+
raise RuntimeError.new("certificate_path and private_key_path most both be defined")
|
139
|
+
end
|
140
|
+
|
141
|
+
ssl_config = {
|
142
|
+
SSLEnable: true,
|
143
|
+
SSLCertName: [['CN', 'nobody'], ['DC', 'example']]
|
144
|
+
}
|
145
|
+
|
146
|
+
if @ssl['certificate_path']
|
147
|
+
cert = OpenSSL::X509::Certificate.new(File.read(@ssl['certificate_path']))
|
148
|
+
ssl_config[:SSLCertificate] = cert
|
149
|
+
end
|
150
|
+
|
151
|
+
if @ssl['private_key_path']
|
152
|
+
key = OpenSSL::PKey.read(@ssl['private_key_path'])
|
153
|
+
ssl_config[:SSLPrivateKey] = key
|
154
|
+
end
|
155
|
+
|
156
|
+
ssl_config[:SSLCACertificateFile] = @ssl['ca_path'] if @ssl['ca_path']
|
157
|
+
ssl_config = ssl_config.merge(@ssl['extra_conf']) if @ssl['extra_conf']
|
158
|
+
config = ssl_config.merge(config)
|
159
|
+
|
160
|
+
@log.on_debug do
|
161
|
+
@log.debug("WEBrick conf: #{config}")
|
162
|
+
end
|
163
|
+
|
164
|
+
@webrick_server = WEBrick::HTTPServer.new(config)
|
165
|
+
@webrick_server.mount_proc(@metrics_path) do |_req, res|
|
166
|
+
status, header, body = all_metrics
|
167
|
+
res.status = status
|
168
|
+
res['Content-Type'] = header['Content-Type']
|
169
|
+
res.body = body
|
170
|
+
res
|
171
|
+
end
|
172
|
+
|
173
|
+
@webrick_server.mount_proc(@aggregated_metrics_path) do |_req, res|
|
174
|
+
status, header, body = all_workers_metrics
|
175
|
+
res.status = status
|
176
|
+
res['Content-Type'] = header['Content-Type']
|
177
|
+
res.body = body
|
178
|
+
res
|
179
|
+
end
|
180
|
+
|
181
|
+
thread_create(:in_prometheus_webrick) do
|
182
|
+
@webrick_server.start
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
def all_metrics
|
187
|
+
[200, { 'Content-Type' => ::Prometheus::Client::Formats::Text::CONTENT_TYPE }, ::Prometheus::Client::Formats::Text.marshal(@registry)]
|
188
|
+
rescue => e
|
189
|
+
[500, { 'Content-Type' => 'text/plain' }, e.to_s]
|
190
|
+
end
|
191
|
+
|
192
|
+
def all_workers_metrics
|
193
|
+
full_result = PromMetricsAggregator.new
|
194
|
+
|
195
|
+
send_request_to_each_worker do |resp|
|
196
|
+
if resp.code.to_s == '200'
|
197
|
+
full_result.add_metrics(resp.body)
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
[200, { 'Content-Type' => ::Prometheus::Client::Formats::Text::CONTENT_TYPE }, full_result.get_metrics]
|
202
|
+
rescue => e
|
203
|
+
[500, { 'Content-Type' => 'text/plain' }, e.to_s]
|
204
|
+
end
|
205
|
+
|
206
|
+
def send_request_to_each_worker
|
207
|
+
bind = (@bind == '0.0.0.0') ? '127.0.0.1' : @bind
|
208
|
+
[*(@base_port...(@base_port + @num_workers))].each do |worker_port|
|
209
|
+
do_request(host: bind, port: worker_port, secure: @secure) do |http|
|
210
|
+
yield(http.get(@metrics_path))
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
# might be replaced by AsyncWrapper if async gem is installed
|
216
|
+
def do_request(host:, port:, secure:)
|
217
|
+
http = Net::HTTP.new(host, port)
|
218
|
+
|
219
|
+
if secure
|
220
|
+
http.use_ssl = true
|
221
|
+
# target is our child process. so it's secure.
|
222
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
223
|
+
end
|
224
|
+
|
225
|
+
http.start do
|
226
|
+
yield(http)
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'fluent/plugin/input'
|
2
|
+
require 'fluent/plugin/in_monitor_agent'
|
3
|
+
require 'fluent/plugin/prometheus'
|
4
|
+
|
5
|
+
module Fluent::Plugin
|
6
|
+
class PrometheusMonitorInput < Fluent::Plugin::Input
|
7
|
+
Fluent::Plugin.register_input('prometheus_monitor', self)
|
8
|
+
include Fluent::Plugin::PrometheusLabelParser
|
9
|
+
|
10
|
+
helpers :timer
|
11
|
+
|
12
|
+
config_param :interval, :time, default: 5
|
13
|
+
attr_reader :registry
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
super
|
17
|
+
@registry = ::Prometheus::Client.registry
|
18
|
+
end
|
19
|
+
|
20
|
+
def multi_workers_ready?
|
21
|
+
true
|
22
|
+
end
|
23
|
+
|
24
|
+
def configure(conf)
|
25
|
+
super
|
26
|
+
hostname = Socket.gethostname
|
27
|
+
expander_builder = Fluent::Plugin::Prometheus.placeholder_expander(log)
|
28
|
+
expander = expander_builder.build({ 'hostname' => hostname, 'worker_id' => fluentd_worker_id })
|
29
|
+
@base_labels = parse_labels_elements(conf)
|
30
|
+
@base_labels.each do |key, value|
|
31
|
+
unless value.is_a?(String)
|
32
|
+
raise Fluent::ConfigError, "record accessor syntax is not available in prometheus_monitor"
|
33
|
+
end
|
34
|
+
@base_labels[key] = expander.expand(value)
|
35
|
+
end
|
36
|
+
|
37
|
+
if defined?(Fluent::Plugin) && defined?(Fluent::Plugin::MonitorAgentInput)
|
38
|
+
# from v0.14.6
|
39
|
+
@monitor_agent = Fluent::Plugin::MonitorAgentInput.new
|
40
|
+
else
|
41
|
+
@monitor_agent = Fluent::MonitorAgentInput.new
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
def start
|
47
|
+
super
|
48
|
+
|
49
|
+
@buffer_newest_timekey = get_gauge(
|
50
|
+
:fluentd_status_buffer_newest_timekey,
|
51
|
+
'Newest timekey in buffer.')
|
52
|
+
@buffer_oldest_timekey = get_gauge(
|
53
|
+
:fluentd_status_buffer_oldest_timekey,
|
54
|
+
'Oldest timekey in buffer.')
|
55
|
+
buffer_queue_length = get_gauge(
|
56
|
+
:fluentd_status_buffer_queue_length,
|
57
|
+
'Current buffer queue length.')
|
58
|
+
buffer_total_queued_size = get_gauge(
|
59
|
+
:fluentd_status_buffer_total_bytes,
|
60
|
+
'Current total size of queued buffers.')
|
61
|
+
retry_counts = get_gauge(
|
62
|
+
:fluentd_status_retry_count,
|
63
|
+
'Current retry counts.')
|
64
|
+
|
65
|
+
@monitor_info = {
|
66
|
+
'buffer_queue_length' => buffer_queue_length,
|
67
|
+
'buffer_total_queued_size' => buffer_total_queued_size,
|
68
|
+
'retry_count' => retry_counts,
|
69
|
+
}
|
70
|
+
timer_execute(:in_prometheus_monitor, @interval, &method(:update_monitor_info))
|
71
|
+
end
|
72
|
+
|
73
|
+
def update_monitor_info
|
74
|
+
@monitor_agent.plugins_info_all.each do |info|
|
75
|
+
label = labels(info)
|
76
|
+
|
77
|
+
@monitor_info.each do |name, metric|
|
78
|
+
if info[name]
|
79
|
+
metric.set(info[name], labels: label)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
timekeys = info["buffer_timekeys"]
|
84
|
+
if timekeys && !timekeys.empty?
|
85
|
+
@buffer_newest_timekey.set(timekeys.max, labels: label)
|
86
|
+
@buffer_oldest_timekey.set(timekeys.min, labels: label)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def labels(plugin_info)
|
92
|
+
@base_labels.merge(
|
93
|
+
plugin_id: plugin_info["plugin_id"],
|
94
|
+
plugin_category: plugin_info["plugin_category"],
|
95
|
+
type: plugin_info["type"],
|
96
|
+
)
|
97
|
+
end
|
98
|
+
|
99
|
+
def get_gauge(name, docstring)
|
100
|
+
if @registry.exist?(name)
|
101
|
+
@registry.get(name)
|
102
|
+
else
|
103
|
+
@registry.gauge(name, docstring: docstring, labels: @base_labels.keys + [:plugin_id, :plugin_category, :type])
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,234 @@
|
|
1
|
+
require 'fluent/plugin/input'
|
2
|
+
require 'fluent/plugin/in_monitor_agent'
|
3
|
+
require 'fluent/plugin/prometheus'
|
4
|
+
|
5
|
+
module Fluent::Plugin
|
6
|
+
class PrometheusOutputMonitorInput < Fluent::Plugin::Input
|
7
|
+
Fluent::Plugin.register_input('prometheus_output_monitor', self)
|
8
|
+
include Fluent::Plugin::PrometheusLabelParser
|
9
|
+
|
10
|
+
helpers :timer
|
11
|
+
|
12
|
+
config_param :interval, :time, default: 5
|
13
|
+
config_param :gauge_all, :bool, default: true
|
14
|
+
attr_reader :registry
|
15
|
+
|
16
|
+
MONITOR_IVARS = [
|
17
|
+
:retry,
|
18
|
+
|
19
|
+
:num_errors,
|
20
|
+
:emit_count,
|
21
|
+
|
22
|
+
# for v0.12
|
23
|
+
:last_retry_time,
|
24
|
+
|
25
|
+
# from v0.14
|
26
|
+
:emit_records,
|
27
|
+
:write_count,
|
28
|
+
:rollback_count,
|
29
|
+
|
30
|
+
# from v1.6.0
|
31
|
+
:flush_time_count,
|
32
|
+
:slow_flush_count,
|
33
|
+
]
|
34
|
+
|
35
|
+
def initialize
|
36
|
+
super
|
37
|
+
@registry = ::Prometheus::Client.registry
|
38
|
+
end
|
39
|
+
|
40
|
+
def multi_workers_ready?
|
41
|
+
true
|
42
|
+
end
|
43
|
+
|
44
|
+
def configure(conf)
|
45
|
+
super
|
46
|
+
hostname = Socket.gethostname
|
47
|
+
expander_builder = Fluent::Plugin::Prometheus.placeholder_expander(log)
|
48
|
+
expander = expander_builder.build({ 'hostname' => hostname, 'worker_id' => fluentd_worker_id })
|
49
|
+
@base_labels = parse_labels_elements(conf)
|
50
|
+
@base_labels.each do |key, value|
|
51
|
+
unless value.is_a?(String)
|
52
|
+
raise Fluent::ConfigError, "record accessor syntax is not available in prometheus_output_monitor"
|
53
|
+
end
|
54
|
+
@base_labels[key] = expander.expand(value)
|
55
|
+
end
|
56
|
+
|
57
|
+
@monitor_agent = Fluent::Plugin::MonitorAgentInput.new
|
58
|
+
|
59
|
+
@gauge_or_counter = @gauge_all ? :gauge : :counter
|
60
|
+
end
|
61
|
+
|
62
|
+
def start
|
63
|
+
super
|
64
|
+
|
65
|
+
@metrics = {
|
66
|
+
# Buffer metrics
|
67
|
+
buffer_total_queued_size: get_gauge(
|
68
|
+
:fluentd_output_status_buffer_total_bytes,
|
69
|
+
'Current total size of stage and queue buffers.'),
|
70
|
+
buffer_stage_length: get_gauge(
|
71
|
+
:fluentd_output_status_buffer_stage_length,
|
72
|
+
'Current length of stage buffers.'),
|
73
|
+
buffer_stage_byte_size: get_gauge(
|
74
|
+
:fluentd_output_status_buffer_stage_byte_size,
|
75
|
+
'Current total size of stage buffers.'),
|
76
|
+
buffer_queue_length: get_gauge(
|
77
|
+
:fluentd_output_status_buffer_queue_length,
|
78
|
+
'Current length of queue buffers.'),
|
79
|
+
buffer_queue_byte_size: get_gauge(
|
80
|
+
:fluentd_output_status_queue_byte_size,
|
81
|
+
'Current total size of queue buffers.'),
|
82
|
+
buffer_available_buffer_space_ratios: get_gauge(
|
83
|
+
:fluentd_output_status_buffer_available_space_ratio,
|
84
|
+
'Ratio of available space in buffer.'),
|
85
|
+
buffer_newest_timekey: get_gauge(
|
86
|
+
:fluentd_output_status_buffer_newest_timekey,
|
87
|
+
'Newest timekey in buffer.'),
|
88
|
+
buffer_oldest_timekey: get_gauge(
|
89
|
+
:fluentd_output_status_buffer_oldest_timekey,
|
90
|
+
'Oldest timekey in buffer.'),
|
91
|
+
|
92
|
+
# Output metrics
|
93
|
+
retry_counts: get_gauge_or_counter(
|
94
|
+
:fluentd_output_status_retry_count,
|
95
|
+
'Current retry counts.'),
|
96
|
+
num_errors: get_gauge_or_counter(
|
97
|
+
:fluentd_output_status_num_errors,
|
98
|
+
'Current number of errors.'),
|
99
|
+
emit_count: get_gauge_or_counter(
|
100
|
+
:fluentd_output_status_emit_count,
|
101
|
+
'Current emit counts.'),
|
102
|
+
emit_records: get_gauge_or_counter(
|
103
|
+
:fluentd_output_status_emit_records,
|
104
|
+
'Current emit records.'),
|
105
|
+
write_count: get_gauge_or_counter(
|
106
|
+
:fluentd_output_status_write_count,
|
107
|
+
'Current write counts.'),
|
108
|
+
rollback_count: get_gauge(
|
109
|
+
:fluentd_output_status_rollback_count,
|
110
|
+
'Current rollback counts.'),
|
111
|
+
flush_time_count: get_gauge_or_counter(
|
112
|
+
:fluentd_output_status_flush_time_count,
|
113
|
+
'Total flush time.'),
|
114
|
+
slow_flush_count: get_gauge_or_counter(
|
115
|
+
:fluentd_output_status_slow_flush_count,
|
116
|
+
'Current slow flush counts.'),
|
117
|
+
retry_wait: get_gauge(
|
118
|
+
:fluentd_output_status_retry_wait,
|
119
|
+
'Current retry wait'),
|
120
|
+
}
|
121
|
+
timer_execute(:in_prometheus_output_monitor, @interval, &method(:update_monitor_info))
|
122
|
+
end
|
123
|
+
|
124
|
+
def update_monitor_info
|
125
|
+
opts = {
|
126
|
+
ivars: MONITOR_IVARS,
|
127
|
+
with_retry: true,
|
128
|
+
}
|
129
|
+
|
130
|
+
agent_info = @monitor_agent.plugins_info_all(opts).select {|info|
|
131
|
+
info['plugin_category'] == 'output'.freeze
|
132
|
+
}
|
133
|
+
|
134
|
+
monitor_info = {
|
135
|
+
# buffer metrics
|
136
|
+
'buffer_total_queued_size' => [@metrics[:buffer_total_queued_size]],
|
137
|
+
'buffer_stage_length' => [@metrics[:buffer_stage_length]],
|
138
|
+
'buffer_stage_byte_size' => [@metrics[:buffer_stage_byte_size]],
|
139
|
+
'buffer_queue_length' => [@metrics[:buffer_queue_length]],
|
140
|
+
'buffer_queue_byte_size' => [@metrics[:buffer_queue_byte_size]],
|
141
|
+
'buffer_available_buffer_space_ratios' => [@metrics[:buffer_available_buffer_space_ratios]],
|
142
|
+
'buffer_newest_timekey' => [@metrics[:buffer_newest_timekey]],
|
143
|
+
'buffer_oldest_timekey' => [@metrics[:buffer_oldest_timekey]],
|
144
|
+
|
145
|
+
# output metrics
|
146
|
+
'retry_count' => [@metrics[:retry_counts], @metrics[:num_errors]],
|
147
|
+
# Needed since Fluentd v1.14 due to metrics extensions.
|
148
|
+
'write_count' => [@metrics[:write_count]],
|
149
|
+
'emit_count' => [@metrics[:emit_count]],
|
150
|
+
'emit_records' => [@metrics[:emit_records]],
|
151
|
+
'rollback_count' => [@metrics[:rollback_count]],
|
152
|
+
'flush_time_count' => [@metrics[:flush_time_count]],
|
153
|
+
'slow_flush_count' => [@metrics[:slow_flush_count]],
|
154
|
+
}
|
155
|
+
# No needed for Fluentd v1.14 but leave as-is for backward compatibility.
|
156
|
+
instance_vars_info = {
|
157
|
+
num_errors: @metrics[:num_errors],
|
158
|
+
write_count: @metrics[:write_count],
|
159
|
+
emit_count: @metrics[:emit_count],
|
160
|
+
emit_records: @metrics[:emit_records],
|
161
|
+
rollback_count: @metrics[:rollback_count],
|
162
|
+
flush_time_count: @metrics[:flush_time_count],
|
163
|
+
slow_flush_count: @metrics[:slow_flush_count],
|
164
|
+
}
|
165
|
+
|
166
|
+
agent_info.each do |info|
|
167
|
+
label = labels(info)
|
168
|
+
|
169
|
+
monitor_info.each do |name, metrics|
|
170
|
+
metrics.each do |metric|
|
171
|
+
if info[name]
|
172
|
+
if metric.is_a?(::Prometheus::Client::Gauge)
|
173
|
+
metric.set(info[name], labels: label)
|
174
|
+
elsif metric.is_a?(::Prometheus::Client::Counter)
|
175
|
+
metric.increment(by: info[name] - metric.get(labels: label), labels: label)
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
if info['instance_variables']
|
182
|
+
instance_vars_info.each do |name, metric|
|
183
|
+
if info['instance_variables'][name]
|
184
|
+
if metric.is_a?(::Prometheus::Client::Gauge)
|
185
|
+
metric.set(info['instance_variables'][name], labels: label)
|
186
|
+
elsif metric.is_a?(::Prometheus::Client::Counter)
|
187
|
+
metric.increment(by: info['instance_variables'][name] - metric.get(labels: label), labels: label)
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
# compute current retry_wait
|
194
|
+
if info['retry']
|
195
|
+
next_time = info['retry']['next_time']
|
196
|
+
start_time = info['retry']['start']
|
197
|
+
if start_time.nil? && info['instance_variables']
|
198
|
+
# v0.12 does not include start, use last_retry_time instead
|
199
|
+
start_time = info['instance_variables'][:last_retry_time]
|
200
|
+
end
|
201
|
+
|
202
|
+
wait = 0
|
203
|
+
if next_time && start_time
|
204
|
+
wait = next_time - start_time
|
205
|
+
end
|
206
|
+
@metrics[:retry_wait].set(wait.to_f, labels: label)
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
def labels(plugin_info)
|
212
|
+
@base_labels.merge(
|
213
|
+
plugin_id: plugin_info["plugin_id"],
|
214
|
+
type: plugin_info["type"],
|
215
|
+
)
|
216
|
+
end
|
217
|
+
|
218
|
+
def get_gauge(name, docstring)
|
219
|
+
if @registry.exist?(name)
|
220
|
+
@registry.get(name)
|
221
|
+
else
|
222
|
+
@registry.gauge(name, docstring: docstring, labels: @base_labels.keys + [:plugin_id, :type])
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
def get_gauge_or_counter(name, docstring)
|
227
|
+
if @registry.exist?(name)
|
228
|
+
@registry.get(name)
|
229
|
+
else
|
230
|
+
@registry.public_send(@gauge_or_counter, name, docstring: docstring, labels: @base_labels.keys + [:plugin_id, :type])
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
require 'fluent/plugin/input'
|
2
|
+
require 'fluent/plugin/in_monitor_agent'
|
3
|
+
require 'fluent/plugin/prometheus'
|
4
|
+
|
5
|
+
module Fluent::Plugin
|
6
|
+
class PrometheusTailMonitorInput < Fluent::Plugin::Input
|
7
|
+
Fluent::Plugin.register_input('prometheus_tail_monitor', self)
|
8
|
+
include Fluent::Plugin::PrometheusLabelParser
|
9
|
+
|
10
|
+
helpers :timer
|
11
|
+
|
12
|
+
config_param :interval, :time, default: 5
|
13
|
+
attr_reader :registry
|
14
|
+
|
15
|
+
MONITOR_IVARS = [
|
16
|
+
:tails,
|
17
|
+
]
|
18
|
+
|
19
|
+
def initialize
|
20
|
+
super
|
21
|
+
@registry = ::Prometheus::Client.registry
|
22
|
+
end
|
23
|
+
|
24
|
+
def multi_workers_ready?
|
25
|
+
true
|
26
|
+
end
|
27
|
+
|
28
|
+
def configure(conf)
|
29
|
+
super
|
30
|
+
hostname = Socket.gethostname
|
31
|
+
expander_builder = Fluent::Plugin::Prometheus.placeholder_expander(log)
|
32
|
+
expander = expander_builder.build({ 'hostname' => hostname, 'worker_id' => fluentd_worker_id })
|
33
|
+
@base_labels = parse_labels_elements(conf)
|
34
|
+
@base_labels.each do |key, value|
|
35
|
+
unless value.is_a?(String)
|
36
|
+
raise Fluent::ConfigError, "record accessor syntax is not available in prometheus_tail_monitor"
|
37
|
+
end
|
38
|
+
@base_labels[key] = expander.expand(value)
|
39
|
+
end
|
40
|
+
|
41
|
+
@monitor_agent = Fluent::Plugin::MonitorAgentInput.new
|
42
|
+
end
|
43
|
+
|
44
|
+
def start
|
45
|
+
super
|
46
|
+
|
47
|
+
@metrics = {
|
48
|
+
position: get_gauge(
|
49
|
+
:fluentd_tail_file_position,
|
50
|
+
'Current position of file.'),
|
51
|
+
inode: get_gauge(
|
52
|
+
:fluentd_tail_file_inode,
|
53
|
+
'Current inode of file.'),
|
54
|
+
}
|
55
|
+
timer_execute(:in_prometheus_tail_monitor, @interval, &method(:update_monitor_info))
|
56
|
+
end
|
57
|
+
|
58
|
+
def update_monitor_info
|
59
|
+
opts = {
|
60
|
+
ivars: MONITOR_IVARS,
|
61
|
+
}
|
62
|
+
|
63
|
+
agent_info = @monitor_agent.plugins_info_all(opts).select {|info|
|
64
|
+
info['type'] == 'tail'.freeze
|
65
|
+
}
|
66
|
+
|
67
|
+
agent_info.each do |info|
|
68
|
+
tails = info['instance_variables'][:tails]
|
69
|
+
next if tails.nil?
|
70
|
+
|
71
|
+
tails.clone.each do |_, watcher|
|
72
|
+
# Access to internal variable of internal class...
|
73
|
+
# Very fragile implementation
|
74
|
+
pe = watcher.instance_variable_get(:@pe)
|
75
|
+
label = labels(info, watcher.path)
|
76
|
+
@metrics[:inode].set(pe.read_inode, labels: label)
|
77
|
+
@metrics[:position].set(pe.read_pos, labels: label)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def labels(plugin_info, path)
|
83
|
+
@base_labels.merge(
|
84
|
+
plugin_id: plugin_info["plugin_id"],
|
85
|
+
type: plugin_info["type"],
|
86
|
+
path: path,
|
87
|
+
)
|
88
|
+
end
|
89
|
+
|
90
|
+
def get_gauge(name, docstring)
|
91
|
+
if @registry.exist?(name)
|
92
|
+
@registry.get(name)
|
93
|
+
else
|
94
|
+
@registry.gauge(name, docstring: docstring, labels: @base_labels.keys + [:plugin_id, :type, :path])
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|