phihos-fluent-plugin-prometheus 2.0.3.pre.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/linux.yml +34 -0
  3. data/.gitignore +16 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +14 -0
  6. data/ChangeLog +43 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE +202 -0
  9. data/README.md +537 -0
  10. data/Rakefile +7 -0
  11. data/fluent-plugin-prometheus.gemspec +22 -0
  12. data/lib/fluent/plugin/filter_prometheus.rb +50 -0
  13. data/lib/fluent/plugin/in_prometheus/async_wrapper.rb +47 -0
  14. data/lib/fluent/plugin/in_prometheus.rb +230 -0
  15. data/lib/fluent/plugin/in_prometheus_monitor.rb +107 -0
  16. data/lib/fluent/plugin/in_prometheus_output_monitor.rb +234 -0
  17. data/lib/fluent/plugin/in_prometheus_tail_monitor.rb +98 -0
  18. data/lib/fluent/plugin/out_prometheus.rb +49 -0
  19. data/lib/fluent/plugin/prometheus/data_store.rb +103 -0
  20. data/lib/fluent/plugin/prometheus/placeholder_expander.rb +132 -0
  21. data/lib/fluent/plugin/prometheus.rb +445 -0
  22. data/lib/fluent/plugin/prometheus_metrics.rb +77 -0
  23. data/misc/fluentd_sample.conf +170 -0
  24. data/misc/nginx_proxy.conf +22 -0
  25. data/misc/prometheus.yaml +13 -0
  26. data/misc/prometheus_alerts.yaml +59 -0
  27. data/spec/fluent/plugin/filter_prometheus_spec.rb +145 -0
  28. data/spec/fluent/plugin/in_prometheus_monitor_spec.rb +42 -0
  29. data/spec/fluent/plugin/in_prometheus_spec.rb +225 -0
  30. data/spec/fluent/plugin/in_prometheus_tail_monitor_spec.rb +42 -0
  31. data/spec/fluent/plugin/out_prometheus_spec.rb +166 -0
  32. data/spec/fluent/plugin/prometheus/placeholder_expander_spec.rb +110 -0
  33. data/spec/fluent/plugin/prometheus_metrics_spec.rb +138 -0
  34. data/spec/fluent/plugin/shared.rb +248 -0
  35. data/spec/spec_helper.rb +10 -0
  36. metadata +176 -0
@@ -0,0 +1,230 @@
1
+ require 'fluent/plugin/input'
2
+ require 'fluent/plugin/prometheus'
3
+ require 'fluent/plugin/prometheus_metrics'
4
+ require 'net/http'
5
+ require 'openssl'
6
+
7
+ module Fluent::Plugin
8
+ class PrometheusInput < Fluent::Plugin::Input
9
+ Fluent::Plugin.register_input('prometheus', self)
10
+
11
+ helpers :thread, :http_server
12
+
13
+ config_param :bind, :string, default: '0.0.0.0'
14
+ config_param :port, :integer, default: 24231
15
+ config_param :metrics_path, :string, default: '/metrics'
16
+ config_param :aggregated_metrics_path, :string, default: '/aggregated_metrics'
17
+
18
+ desc 'Enable ssl configuration for the server'
19
+ config_section :ssl, required: false, multi: false do
20
+ config_param :enable, :bool, default: false, deprecated: 'Use <transport tls> section'
21
+
22
+ desc 'Path to the ssl certificate in PEM format. Read from file and added to conf as "SSLCertificate"'
23
+ config_param :certificate_path, :string, default: nil, deprecated: 'Use cert_path in <transport tls> section'
24
+
25
+ desc 'Path to the ssl private key in PEM format. Read from file and added to conf as "SSLPrivateKey"'
26
+ config_param :private_key_path, :string, default: nil, deprecated: 'Use private_key_path in <transport tls> section'
27
+
28
+ desc 'Path to CA in PEM format. Read from file and added to conf as "SSLCACertificateFile"'
29
+ config_param :ca_path, :string, default: nil, deprecated: 'Use ca_path in <transport tls> section'
30
+
31
+ desc 'Additional ssl conf for the server. Ref: https://github.com/ruby/webrick/blob/master/lib/webrick/ssl.rb'
32
+ config_param :extra_conf, :hash, default: nil, symbolize_keys: true, deprecated: 'See http helper config'
33
+ end
34
+
35
+ def initialize
36
+ super
37
+ @registry = ::Prometheus::Client.registry
38
+ @secure = nil
39
+ end
40
+
41
+ def configure(conf)
42
+ super
43
+
44
+ # Get how many workers we have
45
+ sysconf = if self.respond_to?(:owner) && owner.respond_to?(:system_config)
46
+ owner.system_config
47
+ elsif self.respond_to?(:system_config)
48
+ self.system_config
49
+ else
50
+ nil
51
+ end
52
+ @num_workers = sysconf && sysconf.workers ? sysconf.workers : 1
53
+ @secure = @transport_config.protocol == :tls || (@ssl && @ssl['enable'])
54
+
55
+ @base_port = @port
56
+ @port += fluentd_worker_id
57
+ end
58
+
59
+ def multi_workers_ready?
60
+ true
61
+ end
62
+
63
+ def start
64
+ super
65
+
66
+ scheme = @secure ? 'https' : 'http'
67
+ log.debug "listening prometheus http server on #{scheme}:://#{@bind}:#{@port}/#{@metrics_path} for worker#{fluentd_worker_id}"
68
+
69
+ proto = @secure ? :tls : :tcp
70
+
71
+ if @ssl && @ssl['enable'] && @ssl['extra_conf']
72
+ start_webrick
73
+ return
74
+ end
75
+
76
+ begin
77
+ require 'async'
78
+ require 'fluent/plugin/in_prometheus/async_wrapper'
79
+ extend AsyncWrapper
80
+ rescue LoadError => _
81
+ # ignore
82
+ end
83
+
84
+ tls_opt = if @ssl && @ssl['enable']
85
+ ssl_config = {}
86
+
87
+ if (@ssl['certificate_path'] && @ssl['private_key_path'].nil?) || (@ssl['certificate_path'].nil? && @ssl['private_key_path'])
88
+ raise Fluent::ConfigError.new('both certificate_path and private_key_path must be defined')
89
+ end
90
+
91
+ if @ssl['certificate_path']
92
+ ssl_config['cert_path'] = @ssl['certificate_path']
93
+ end
94
+
95
+ if @ssl['private_key_path']
96
+ ssl_config['private_key_path'] = @ssl['private_key_path']
97
+ end
98
+
99
+ if @ssl['ca_path']
100
+ ssl_config['ca_path'] = @ssl['ca_path']
101
+ # Only ca_path is insecure in fluentd
102
+ # https://github.com/fluent/fluentd/blob/2236ad45197ba336fd9faf56f442252c8b226f25/lib/fluent/plugin_helper/cert_option.rb#L68
103
+ ssl_config['insecure'] = true
104
+ end
105
+
106
+ ssl_config
107
+ end
108
+
109
+ http_server_create_http_server(:in_prometheus_server, addr: @bind, port: @port, logger: log, proto: proto, tls_opts: tls_opt) do |server|
110
+ server.get(@metrics_path) { |_req| all_metrics }
111
+ server.get(@aggregated_metrics_path) { |_req| all_workers_metrics }
112
+ end
113
+ end
114
+
115
+ def shutdown
116
+ if @webrick_server
117
+ @webrick_server.shutdown
118
+ @webrick_server = nil
119
+ end
120
+ super
121
+ end
122
+
123
+ private
124
+
125
+ # For compatiblity because http helper can't support extra_conf option
126
+ def start_webrick
127
+ require 'webrick/https'
128
+ require 'webrick'
129
+
130
+ config = {
131
+ BindAddress: @bind,
132
+ Port: @port,
133
+ MaxClients: 5,
134
+ Logger: WEBrick::Log.new(STDERR, WEBrick::Log::FATAL),
135
+ AccessLog: [],
136
+ }
137
+ if (@ssl['certificate_path'] && @ssl['private_key_path'].nil?) || (@ssl['certificate_path'].nil? && @ssl['private_key_path'])
138
+ raise RuntimeError.new("certificate_path and private_key_path most both be defined")
139
+ end
140
+
141
+ ssl_config = {
142
+ SSLEnable: true,
143
+ SSLCertName: [['CN', 'nobody'], ['DC', 'example']]
144
+ }
145
+
146
+ if @ssl['certificate_path']
147
+ cert = OpenSSL::X509::Certificate.new(File.read(@ssl['certificate_path']))
148
+ ssl_config[:SSLCertificate] = cert
149
+ end
150
+
151
+ if @ssl['private_key_path']
152
+ key = OpenSSL::PKey.read(@ssl['private_key_path'])
153
+ ssl_config[:SSLPrivateKey] = key
154
+ end
155
+
156
+ ssl_config[:SSLCACertificateFile] = @ssl['ca_path'] if @ssl['ca_path']
157
+ ssl_config = ssl_config.merge(@ssl['extra_conf']) if @ssl['extra_conf']
158
+ config = ssl_config.merge(config)
159
+
160
+ @log.on_debug do
161
+ @log.debug("WEBrick conf: #{config}")
162
+ end
163
+
164
+ @webrick_server = WEBrick::HTTPServer.new(config)
165
+ @webrick_server.mount_proc(@metrics_path) do |_req, res|
166
+ status, header, body = all_metrics
167
+ res.status = status
168
+ res['Content-Type'] = header['Content-Type']
169
+ res.body = body
170
+ res
171
+ end
172
+
173
+ @webrick_server.mount_proc(@aggregated_metrics_path) do |_req, res|
174
+ status, header, body = all_workers_metrics
175
+ res.status = status
176
+ res['Content-Type'] = header['Content-Type']
177
+ res.body = body
178
+ res
179
+ end
180
+
181
+ thread_create(:in_prometheus_webrick) do
182
+ @webrick_server.start
183
+ end
184
+ end
185
+
186
+ def all_metrics
187
+ [200, { 'Content-Type' => ::Prometheus::Client::Formats::Text::CONTENT_TYPE }, ::Prometheus::Client::Formats::Text.marshal(@registry)]
188
+ rescue => e
189
+ [500, { 'Content-Type' => 'text/plain' }, e.to_s]
190
+ end
191
+
192
+ def all_workers_metrics
193
+ full_result = PromMetricsAggregator.new
194
+
195
+ send_request_to_each_worker do |resp|
196
+ if resp.code.to_s == '200'
197
+ full_result.add_metrics(resp.body)
198
+ end
199
+ end
200
+
201
+ [200, { 'Content-Type' => ::Prometheus::Client::Formats::Text::CONTENT_TYPE }, full_result.get_metrics]
202
+ rescue => e
203
+ [500, { 'Content-Type' => 'text/plain' }, e.to_s]
204
+ end
205
+
206
+ def send_request_to_each_worker
207
+ bind = (@bind == '0.0.0.0') ? '127.0.0.1' : @bind
208
+ [*(@base_port...(@base_port + @num_workers))].each do |worker_port|
209
+ do_request(host: bind, port: worker_port, secure: @secure) do |http|
210
+ yield(http.get(@metrics_path))
211
+ end
212
+ end
213
+ end
214
+
215
+ # might be replaced by AsyncWrapper if async gem is installed
216
+ def do_request(host:, port:, secure:)
217
+ http = Net::HTTP.new(host, port)
218
+
219
+ if secure
220
+ http.use_ssl = true
221
+ # target is our child process. so it's secure.
222
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
223
+ end
224
+
225
+ http.start do
226
+ yield(http)
227
+ end
228
+ end
229
+ end
230
+ end
@@ -0,0 +1,107 @@
1
+ require 'fluent/plugin/input'
2
+ require 'fluent/plugin/in_monitor_agent'
3
+ require 'fluent/plugin/prometheus'
4
+
5
+ module Fluent::Plugin
6
+ class PrometheusMonitorInput < Fluent::Plugin::Input
7
+ Fluent::Plugin.register_input('prometheus_monitor', self)
8
+ include Fluent::Plugin::PrometheusLabelParser
9
+
10
+ helpers :timer
11
+
12
+ config_param :interval, :time, default: 5
13
+ attr_reader :registry
14
+
15
+ def initialize
16
+ super
17
+ @registry = ::Prometheus::Client.registry
18
+ end
19
+
20
+ def multi_workers_ready?
21
+ true
22
+ end
23
+
24
+ def configure(conf)
25
+ super
26
+ hostname = Socket.gethostname
27
+ expander_builder = Fluent::Plugin::Prometheus.placeholder_expander(log)
28
+ expander = expander_builder.build({ 'hostname' => hostname, 'worker_id' => fluentd_worker_id })
29
+ @base_labels = parse_labels_elements(conf)
30
+ @base_labels.each do |key, value|
31
+ unless value.is_a?(String)
32
+ raise Fluent::ConfigError, "record accessor syntax is not available in prometheus_monitor"
33
+ end
34
+ @base_labels[key] = expander.expand(value)
35
+ end
36
+
37
+ if defined?(Fluent::Plugin) && defined?(Fluent::Plugin::MonitorAgentInput)
38
+ # from v0.14.6
39
+ @monitor_agent = Fluent::Plugin::MonitorAgentInput.new
40
+ else
41
+ @monitor_agent = Fluent::MonitorAgentInput.new
42
+ end
43
+
44
+ end
45
+
46
+ def start
47
+ super
48
+
49
+ @buffer_newest_timekey = get_gauge(
50
+ :fluentd_status_buffer_newest_timekey,
51
+ 'Newest timekey in buffer.')
52
+ @buffer_oldest_timekey = get_gauge(
53
+ :fluentd_status_buffer_oldest_timekey,
54
+ 'Oldest timekey in buffer.')
55
+ buffer_queue_length = get_gauge(
56
+ :fluentd_status_buffer_queue_length,
57
+ 'Current buffer queue length.')
58
+ buffer_total_queued_size = get_gauge(
59
+ :fluentd_status_buffer_total_bytes,
60
+ 'Current total size of queued buffers.')
61
+ retry_counts = get_gauge(
62
+ :fluentd_status_retry_count,
63
+ 'Current retry counts.')
64
+
65
+ @monitor_info = {
66
+ 'buffer_queue_length' => buffer_queue_length,
67
+ 'buffer_total_queued_size' => buffer_total_queued_size,
68
+ 'retry_count' => retry_counts,
69
+ }
70
+ timer_execute(:in_prometheus_monitor, @interval, &method(:update_monitor_info))
71
+ end
72
+
73
+ def update_monitor_info
74
+ @monitor_agent.plugins_info_all.each do |info|
75
+ label = labels(info)
76
+
77
+ @monitor_info.each do |name, metric|
78
+ if info[name]
79
+ metric.set(info[name], labels: label)
80
+ end
81
+ end
82
+
83
+ timekeys = info["buffer_timekeys"]
84
+ if timekeys && !timekeys.empty?
85
+ @buffer_newest_timekey.set(timekeys.max, labels: label)
86
+ @buffer_oldest_timekey.set(timekeys.min, labels: label)
87
+ end
88
+ end
89
+ end
90
+
91
+ def labels(plugin_info)
92
+ @base_labels.merge(
93
+ plugin_id: plugin_info["plugin_id"],
94
+ plugin_category: plugin_info["plugin_category"],
95
+ type: plugin_info["type"],
96
+ )
97
+ end
98
+
99
+ def get_gauge(name, docstring)
100
+ if @registry.exist?(name)
101
+ @registry.get(name)
102
+ else
103
+ @registry.gauge(name, docstring: docstring, labels: @base_labels.keys + [:plugin_id, :plugin_category, :type])
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,234 @@
1
+ require 'fluent/plugin/input'
2
+ require 'fluent/plugin/in_monitor_agent'
3
+ require 'fluent/plugin/prometheus'
4
+
5
+ module Fluent::Plugin
6
+ class PrometheusOutputMonitorInput < Fluent::Plugin::Input
7
+ Fluent::Plugin.register_input('prometheus_output_monitor', self)
8
+ include Fluent::Plugin::PrometheusLabelParser
9
+
10
+ helpers :timer
11
+
12
+ config_param :interval, :time, default: 5
13
+ config_param :gauge_all, :bool, default: true
14
+ attr_reader :registry
15
+
16
+ MONITOR_IVARS = [
17
+ :retry,
18
+
19
+ :num_errors,
20
+ :emit_count,
21
+
22
+ # for v0.12
23
+ :last_retry_time,
24
+
25
+ # from v0.14
26
+ :emit_records,
27
+ :write_count,
28
+ :rollback_count,
29
+
30
+ # from v1.6.0
31
+ :flush_time_count,
32
+ :slow_flush_count,
33
+ ]
34
+
35
+ def initialize
36
+ super
37
+ @registry = ::Prometheus::Client.registry
38
+ end
39
+
40
+ def multi_workers_ready?
41
+ true
42
+ end
43
+
44
+ def configure(conf)
45
+ super
46
+ hostname = Socket.gethostname
47
+ expander_builder = Fluent::Plugin::Prometheus.placeholder_expander(log)
48
+ expander = expander_builder.build({ 'hostname' => hostname, 'worker_id' => fluentd_worker_id })
49
+ @base_labels = parse_labels_elements(conf)
50
+ @base_labels.each do |key, value|
51
+ unless value.is_a?(String)
52
+ raise Fluent::ConfigError, "record accessor syntax is not available in prometheus_output_monitor"
53
+ end
54
+ @base_labels[key] = expander.expand(value)
55
+ end
56
+
57
+ @monitor_agent = Fluent::Plugin::MonitorAgentInput.new
58
+
59
+ @gauge_or_counter = @gauge_all ? :gauge : :counter
60
+ end
61
+
62
+ def start
63
+ super
64
+
65
+ @metrics = {
66
+ # Buffer metrics
67
+ buffer_total_queued_size: get_gauge(
68
+ :fluentd_output_status_buffer_total_bytes,
69
+ 'Current total size of stage and queue buffers.'),
70
+ buffer_stage_length: get_gauge(
71
+ :fluentd_output_status_buffer_stage_length,
72
+ 'Current length of stage buffers.'),
73
+ buffer_stage_byte_size: get_gauge(
74
+ :fluentd_output_status_buffer_stage_byte_size,
75
+ 'Current total size of stage buffers.'),
76
+ buffer_queue_length: get_gauge(
77
+ :fluentd_output_status_buffer_queue_length,
78
+ 'Current length of queue buffers.'),
79
+ buffer_queue_byte_size: get_gauge(
80
+ :fluentd_output_status_queue_byte_size,
81
+ 'Current total size of queue buffers.'),
82
+ buffer_available_buffer_space_ratios: get_gauge(
83
+ :fluentd_output_status_buffer_available_space_ratio,
84
+ 'Ratio of available space in buffer.'),
85
+ buffer_newest_timekey: get_gauge(
86
+ :fluentd_output_status_buffer_newest_timekey,
87
+ 'Newest timekey in buffer.'),
88
+ buffer_oldest_timekey: get_gauge(
89
+ :fluentd_output_status_buffer_oldest_timekey,
90
+ 'Oldest timekey in buffer.'),
91
+
92
+ # Output metrics
93
+ retry_counts: get_gauge_or_counter(
94
+ :fluentd_output_status_retry_count,
95
+ 'Current retry counts.'),
96
+ num_errors: get_gauge_or_counter(
97
+ :fluentd_output_status_num_errors,
98
+ 'Current number of errors.'),
99
+ emit_count: get_gauge_or_counter(
100
+ :fluentd_output_status_emit_count,
101
+ 'Current emit counts.'),
102
+ emit_records: get_gauge_or_counter(
103
+ :fluentd_output_status_emit_records,
104
+ 'Current emit records.'),
105
+ write_count: get_gauge_or_counter(
106
+ :fluentd_output_status_write_count,
107
+ 'Current write counts.'),
108
+ rollback_count: get_gauge(
109
+ :fluentd_output_status_rollback_count,
110
+ 'Current rollback counts.'),
111
+ flush_time_count: get_gauge_or_counter(
112
+ :fluentd_output_status_flush_time_count,
113
+ 'Total flush time.'),
114
+ slow_flush_count: get_gauge_or_counter(
115
+ :fluentd_output_status_slow_flush_count,
116
+ 'Current slow flush counts.'),
117
+ retry_wait: get_gauge(
118
+ :fluentd_output_status_retry_wait,
119
+ 'Current retry wait'),
120
+ }
121
+ timer_execute(:in_prometheus_output_monitor, @interval, &method(:update_monitor_info))
122
+ end
123
+
124
+ def update_monitor_info
125
+ opts = {
126
+ ivars: MONITOR_IVARS,
127
+ with_retry: true,
128
+ }
129
+
130
+ agent_info = @monitor_agent.plugins_info_all(opts).select {|info|
131
+ info['plugin_category'] == 'output'.freeze
132
+ }
133
+
134
+ monitor_info = {
135
+ # buffer metrics
136
+ 'buffer_total_queued_size' => [@metrics[:buffer_total_queued_size]],
137
+ 'buffer_stage_length' => [@metrics[:buffer_stage_length]],
138
+ 'buffer_stage_byte_size' => [@metrics[:buffer_stage_byte_size]],
139
+ 'buffer_queue_length' => [@metrics[:buffer_queue_length]],
140
+ 'buffer_queue_byte_size' => [@metrics[:buffer_queue_byte_size]],
141
+ 'buffer_available_buffer_space_ratios' => [@metrics[:buffer_available_buffer_space_ratios]],
142
+ 'buffer_newest_timekey' => [@metrics[:buffer_newest_timekey]],
143
+ 'buffer_oldest_timekey' => [@metrics[:buffer_oldest_timekey]],
144
+
145
+ # output metrics
146
+ 'retry_count' => [@metrics[:retry_counts], @metrics[:num_errors]],
147
+ # Needed since Fluentd v1.14 due to metrics extensions.
148
+ 'write_count' => [@metrics[:write_count]],
149
+ 'emit_count' => [@metrics[:emit_count]],
150
+ 'emit_records' => [@metrics[:emit_records]],
151
+ 'rollback_count' => [@metrics[:rollback_count]],
152
+ 'flush_time_count' => [@metrics[:flush_time_count]],
153
+ 'slow_flush_count' => [@metrics[:slow_flush_count]],
154
+ }
155
+ # No needed for Fluentd v1.14 but leave as-is for backward compatibility.
156
+ instance_vars_info = {
157
+ num_errors: @metrics[:num_errors],
158
+ write_count: @metrics[:write_count],
159
+ emit_count: @metrics[:emit_count],
160
+ emit_records: @metrics[:emit_records],
161
+ rollback_count: @metrics[:rollback_count],
162
+ flush_time_count: @metrics[:flush_time_count],
163
+ slow_flush_count: @metrics[:slow_flush_count],
164
+ }
165
+
166
+ agent_info.each do |info|
167
+ label = labels(info)
168
+
169
+ monitor_info.each do |name, metrics|
170
+ metrics.each do |metric|
171
+ if info[name]
172
+ if metric.is_a?(::Prometheus::Client::Gauge)
173
+ metric.set(info[name], labels: label)
174
+ elsif metric.is_a?(::Prometheus::Client::Counter)
175
+ metric.increment(by: info[name] - metric.get(labels: label), labels: label)
176
+ end
177
+ end
178
+ end
179
+ end
180
+
181
+ if info['instance_variables']
182
+ instance_vars_info.each do |name, metric|
183
+ if info['instance_variables'][name]
184
+ if metric.is_a?(::Prometheus::Client::Gauge)
185
+ metric.set(info['instance_variables'][name], labels: label)
186
+ elsif metric.is_a?(::Prometheus::Client::Counter)
187
+ metric.increment(by: info['instance_variables'][name] - metric.get(labels: label), labels: label)
188
+ end
189
+ end
190
+ end
191
+ end
192
+
193
+ # compute current retry_wait
194
+ if info['retry']
195
+ next_time = info['retry']['next_time']
196
+ start_time = info['retry']['start']
197
+ if start_time.nil? && info['instance_variables']
198
+ # v0.12 does not include start, use last_retry_time instead
199
+ start_time = info['instance_variables'][:last_retry_time]
200
+ end
201
+
202
+ wait = 0
203
+ if next_time && start_time
204
+ wait = next_time - start_time
205
+ end
206
+ @metrics[:retry_wait].set(wait.to_f, labels: label)
207
+ end
208
+ end
209
+ end
210
+
211
+ def labels(plugin_info)
212
+ @base_labels.merge(
213
+ plugin_id: plugin_info["plugin_id"],
214
+ type: plugin_info["type"],
215
+ )
216
+ end
217
+
218
+ def get_gauge(name, docstring)
219
+ if @registry.exist?(name)
220
+ @registry.get(name)
221
+ else
222
+ @registry.gauge(name, docstring: docstring, labels: @base_labels.keys + [:plugin_id, :type])
223
+ end
224
+ end
225
+
226
+ def get_gauge_or_counter(name, docstring)
227
+ if @registry.exist?(name)
228
+ @registry.get(name)
229
+ else
230
+ @registry.public_send(@gauge_or_counter, name, docstring: docstring, labels: @base_labels.keys + [:plugin_id, :type])
231
+ end
232
+ end
233
+ end
234
+ end
@@ -0,0 +1,98 @@
1
+ require 'fluent/plugin/input'
2
+ require 'fluent/plugin/in_monitor_agent'
3
+ require 'fluent/plugin/prometheus'
4
+
5
+ module Fluent::Plugin
6
+ class PrometheusTailMonitorInput < Fluent::Plugin::Input
7
+ Fluent::Plugin.register_input('prometheus_tail_monitor', self)
8
+ include Fluent::Plugin::PrometheusLabelParser
9
+
10
+ helpers :timer
11
+
12
+ config_param :interval, :time, default: 5
13
+ attr_reader :registry
14
+
15
+ MONITOR_IVARS = [
16
+ :tails,
17
+ ]
18
+
19
+ def initialize
20
+ super
21
+ @registry = ::Prometheus::Client.registry
22
+ end
23
+
24
+ def multi_workers_ready?
25
+ true
26
+ end
27
+
28
+ def configure(conf)
29
+ super
30
+ hostname = Socket.gethostname
31
+ expander_builder = Fluent::Plugin::Prometheus.placeholder_expander(log)
32
+ expander = expander_builder.build({ 'hostname' => hostname, 'worker_id' => fluentd_worker_id })
33
+ @base_labels = parse_labels_elements(conf)
34
+ @base_labels.each do |key, value|
35
+ unless value.is_a?(String)
36
+ raise Fluent::ConfigError, "record accessor syntax is not available in prometheus_tail_monitor"
37
+ end
38
+ @base_labels[key] = expander.expand(value)
39
+ end
40
+
41
+ @monitor_agent = Fluent::Plugin::MonitorAgentInput.new
42
+ end
43
+
44
+ def start
45
+ super
46
+
47
+ @metrics = {
48
+ position: get_gauge(
49
+ :fluentd_tail_file_position,
50
+ 'Current position of file.'),
51
+ inode: get_gauge(
52
+ :fluentd_tail_file_inode,
53
+ 'Current inode of file.'),
54
+ }
55
+ timer_execute(:in_prometheus_tail_monitor, @interval, &method(:update_monitor_info))
56
+ end
57
+
58
+ def update_monitor_info
59
+ opts = {
60
+ ivars: MONITOR_IVARS,
61
+ }
62
+
63
+ agent_info = @monitor_agent.plugins_info_all(opts).select {|info|
64
+ info['type'] == 'tail'.freeze
65
+ }
66
+
67
+ agent_info.each do |info|
68
+ tails = info['instance_variables'][:tails]
69
+ next if tails.nil?
70
+
71
+ tails.clone.each do |_, watcher|
72
+ # Access to internal variable of internal class...
73
+ # Very fragile implementation
74
+ pe = watcher.instance_variable_get(:@pe)
75
+ label = labels(info, watcher.path)
76
+ @metrics[:inode].set(pe.read_inode, labels: label)
77
+ @metrics[:position].set(pe.read_pos, labels: label)
78
+ end
79
+ end
80
+ end
81
+
82
+ def labels(plugin_info, path)
83
+ @base_labels.merge(
84
+ plugin_id: plugin_info["plugin_id"],
85
+ type: plugin_info["type"],
86
+ path: path,
87
+ )
88
+ end
89
+
90
+ def get_gauge(name, docstring)
91
+ if @registry.exist?(name)
92
+ @registry.get(name)
93
+ else
94
+ @registry.gauge(name, docstring: docstring, labels: @base_labels.keys + [:plugin_id, :type, :path])
95
+ end
96
+ end
97
+ end
98
+ end