phihos-fluent-plugin-prometheus 2.0.3.pre.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/linux.yml +34 -0
  3. data/.gitignore +16 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +14 -0
  6. data/ChangeLog +43 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE +202 -0
  9. data/README.md +537 -0
  10. data/Rakefile +7 -0
  11. data/fluent-plugin-prometheus.gemspec +22 -0
  12. data/lib/fluent/plugin/filter_prometheus.rb +50 -0
  13. data/lib/fluent/plugin/in_prometheus/async_wrapper.rb +47 -0
  14. data/lib/fluent/plugin/in_prometheus.rb +230 -0
  15. data/lib/fluent/plugin/in_prometheus_monitor.rb +107 -0
  16. data/lib/fluent/plugin/in_prometheus_output_monitor.rb +234 -0
  17. data/lib/fluent/plugin/in_prometheus_tail_monitor.rb +98 -0
  18. data/lib/fluent/plugin/out_prometheus.rb +49 -0
  19. data/lib/fluent/plugin/prometheus/data_store.rb +103 -0
  20. data/lib/fluent/plugin/prometheus/placeholder_expander.rb +132 -0
  21. data/lib/fluent/plugin/prometheus.rb +445 -0
  22. data/lib/fluent/plugin/prometheus_metrics.rb +77 -0
  23. data/misc/fluentd_sample.conf +170 -0
  24. data/misc/nginx_proxy.conf +22 -0
  25. data/misc/prometheus.yaml +13 -0
  26. data/misc/prometheus_alerts.yaml +59 -0
  27. data/spec/fluent/plugin/filter_prometheus_spec.rb +145 -0
  28. data/spec/fluent/plugin/in_prometheus_monitor_spec.rb +42 -0
  29. data/spec/fluent/plugin/in_prometheus_spec.rb +225 -0
  30. data/spec/fluent/plugin/in_prometheus_tail_monitor_spec.rb +42 -0
  31. data/spec/fluent/plugin/out_prometheus_spec.rb +166 -0
  32. data/spec/fluent/plugin/prometheus/placeholder_expander_spec.rb +110 -0
  33. data/spec/fluent/plugin/prometheus_metrics_spec.rb +138 -0
  34. data/spec/fluent/plugin/shared.rb +248 -0
  35. data/spec/spec_helper.rb +10 -0
  36. metadata +176 -0
@@ -0,0 +1,230 @@
1
+ require 'fluent/plugin/input'
2
+ require 'fluent/plugin/prometheus'
3
+ require 'fluent/plugin/prometheus_metrics'
4
+ require 'net/http'
5
+ require 'openssl'
6
+
7
+ module Fluent::Plugin
8
+ class PrometheusInput < Fluent::Plugin::Input
9
+ Fluent::Plugin.register_input('prometheus', self)
10
+
11
+ helpers :thread, :http_server
12
+
13
+ config_param :bind, :string, default: '0.0.0.0'
14
+ config_param :port, :integer, default: 24231
15
+ config_param :metrics_path, :string, default: '/metrics'
16
+ config_param :aggregated_metrics_path, :string, default: '/aggregated_metrics'
17
+
18
+ desc 'Enable ssl configuration for the server'
19
+ config_section :ssl, required: false, multi: false do
20
+ config_param :enable, :bool, default: false, deprecated: 'Use <transport tls> section'
21
+
22
+ desc 'Path to the ssl certificate in PEM format. Read from file and added to conf as "SSLCertificate"'
23
+ config_param :certificate_path, :string, default: nil, deprecated: 'Use cert_path in <transport tls> section'
24
+
25
+ desc 'Path to the ssl private key in PEM format. Read from file and added to conf as "SSLPrivateKey"'
26
+ config_param :private_key_path, :string, default: nil, deprecated: 'Use private_key_path in <transport tls> section'
27
+
28
+ desc 'Path to CA in PEM format. Read from file and added to conf as "SSLCACertificateFile"'
29
+ config_param :ca_path, :string, default: nil, deprecated: 'Use ca_path in <transport tls> section'
30
+
31
+ desc 'Additional ssl conf for the server. Ref: https://github.com/ruby/webrick/blob/master/lib/webrick/ssl.rb'
32
+ config_param :extra_conf, :hash, default: nil, symbolize_keys: true, deprecated: 'See http helper config'
33
+ end
34
+
35
+ def initialize
36
+ super
37
+ @registry = ::Prometheus::Client.registry
38
+ @secure = nil
39
+ end
40
+
41
+ def configure(conf)
42
+ super
43
+
44
+ # Get how many workers we have
45
+ sysconf = if self.respond_to?(:owner) && owner.respond_to?(:system_config)
46
+ owner.system_config
47
+ elsif self.respond_to?(:system_config)
48
+ self.system_config
49
+ else
50
+ nil
51
+ end
52
+ @num_workers = sysconf && sysconf.workers ? sysconf.workers : 1
53
+ @secure = @transport_config.protocol == :tls || (@ssl && @ssl['enable'])
54
+
55
+ @base_port = @port
56
+ @port += fluentd_worker_id
57
+ end
58
+
59
+ def multi_workers_ready?
60
+ true
61
+ end
62
+
63
+ def start
64
+ super
65
+
66
+ scheme = @secure ? 'https' : 'http'
67
+ log.debug "listening prometheus http server on #{scheme}:://#{@bind}:#{@port}/#{@metrics_path} for worker#{fluentd_worker_id}"
68
+
69
+ proto = @secure ? :tls : :tcp
70
+
71
+ if @ssl && @ssl['enable'] && @ssl['extra_conf']
72
+ start_webrick
73
+ return
74
+ end
75
+
76
+ begin
77
+ require 'async'
78
+ require 'fluent/plugin/in_prometheus/async_wrapper'
79
+ extend AsyncWrapper
80
+ rescue LoadError => _
81
+ # ignore
82
+ end
83
+
84
+ tls_opt = if @ssl && @ssl['enable']
85
+ ssl_config = {}
86
+
87
+ if (@ssl['certificate_path'] && @ssl['private_key_path'].nil?) || (@ssl['certificate_path'].nil? && @ssl['private_key_path'])
88
+ raise Fluent::ConfigError.new('both certificate_path and private_key_path must be defined')
89
+ end
90
+
91
+ if @ssl['certificate_path']
92
+ ssl_config['cert_path'] = @ssl['certificate_path']
93
+ end
94
+
95
+ if @ssl['private_key_path']
96
+ ssl_config['private_key_path'] = @ssl['private_key_path']
97
+ end
98
+
99
+ if @ssl['ca_path']
100
+ ssl_config['ca_path'] = @ssl['ca_path']
101
+ # Only ca_path is insecure in fluentd
102
+ # https://github.com/fluent/fluentd/blob/2236ad45197ba336fd9faf56f442252c8b226f25/lib/fluent/plugin_helper/cert_option.rb#L68
103
+ ssl_config['insecure'] = true
104
+ end
105
+
106
+ ssl_config
107
+ end
108
+
109
+ http_server_create_http_server(:in_prometheus_server, addr: @bind, port: @port, logger: log, proto: proto, tls_opts: tls_opt) do |server|
110
+ server.get(@metrics_path) { |_req| all_metrics }
111
+ server.get(@aggregated_metrics_path) { |_req| all_workers_metrics }
112
+ end
113
+ end
114
+
115
+ def shutdown
116
+ if @webrick_server
117
+ @webrick_server.shutdown
118
+ @webrick_server = nil
119
+ end
120
+ super
121
+ end
122
+
123
+ private
124
+
125
+ # For compatiblity because http helper can't support extra_conf option
126
+ def start_webrick
127
+ require 'webrick/https'
128
+ require 'webrick'
129
+
130
+ config = {
131
+ BindAddress: @bind,
132
+ Port: @port,
133
+ MaxClients: 5,
134
+ Logger: WEBrick::Log.new(STDERR, WEBrick::Log::FATAL),
135
+ AccessLog: [],
136
+ }
137
+ if (@ssl['certificate_path'] && @ssl['private_key_path'].nil?) || (@ssl['certificate_path'].nil? && @ssl['private_key_path'])
138
+ raise RuntimeError.new("certificate_path and private_key_path most both be defined")
139
+ end
140
+
141
+ ssl_config = {
142
+ SSLEnable: true,
143
+ SSLCertName: [['CN', 'nobody'], ['DC', 'example']]
144
+ }
145
+
146
+ if @ssl['certificate_path']
147
+ cert = OpenSSL::X509::Certificate.new(File.read(@ssl['certificate_path']))
148
+ ssl_config[:SSLCertificate] = cert
149
+ end
150
+
151
+ if @ssl['private_key_path']
152
+ key = OpenSSL::PKey.read(@ssl['private_key_path'])
153
+ ssl_config[:SSLPrivateKey] = key
154
+ end
155
+
156
+ ssl_config[:SSLCACertificateFile] = @ssl['ca_path'] if @ssl['ca_path']
157
+ ssl_config = ssl_config.merge(@ssl['extra_conf']) if @ssl['extra_conf']
158
+ config = ssl_config.merge(config)
159
+
160
+ @log.on_debug do
161
+ @log.debug("WEBrick conf: #{config}")
162
+ end
163
+
164
+ @webrick_server = WEBrick::HTTPServer.new(config)
165
+ @webrick_server.mount_proc(@metrics_path) do |_req, res|
166
+ status, header, body = all_metrics
167
+ res.status = status
168
+ res['Content-Type'] = header['Content-Type']
169
+ res.body = body
170
+ res
171
+ end
172
+
173
+ @webrick_server.mount_proc(@aggregated_metrics_path) do |_req, res|
174
+ status, header, body = all_workers_metrics
175
+ res.status = status
176
+ res['Content-Type'] = header['Content-Type']
177
+ res.body = body
178
+ res
179
+ end
180
+
181
+ thread_create(:in_prometheus_webrick) do
182
+ @webrick_server.start
183
+ end
184
+ end
185
+
186
+ def all_metrics
187
+ [200, { 'Content-Type' => ::Prometheus::Client::Formats::Text::CONTENT_TYPE }, ::Prometheus::Client::Formats::Text.marshal(@registry)]
188
+ rescue => e
189
+ [500, { 'Content-Type' => 'text/plain' }, e.to_s]
190
+ end
191
+
192
+ def all_workers_metrics
193
+ full_result = PromMetricsAggregator.new
194
+
195
+ send_request_to_each_worker do |resp|
196
+ if resp.code.to_s == '200'
197
+ full_result.add_metrics(resp.body)
198
+ end
199
+ end
200
+
201
+ [200, { 'Content-Type' => ::Prometheus::Client::Formats::Text::CONTENT_TYPE }, full_result.get_metrics]
202
+ rescue => e
203
+ [500, { 'Content-Type' => 'text/plain' }, e.to_s]
204
+ end
205
+
206
+ def send_request_to_each_worker
207
+ bind = (@bind == '0.0.0.0') ? '127.0.0.1' : @bind
208
+ [*(@base_port...(@base_port + @num_workers))].each do |worker_port|
209
+ do_request(host: bind, port: worker_port, secure: @secure) do |http|
210
+ yield(http.get(@metrics_path))
211
+ end
212
+ end
213
+ end
214
+
215
+ # might be replaced by AsyncWrapper if async gem is installed
216
+ def do_request(host:, port:, secure:)
217
+ http = Net::HTTP.new(host, port)
218
+
219
+ if secure
220
+ http.use_ssl = true
221
+ # target is our child process. so it's secure.
222
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
223
+ end
224
+
225
+ http.start do
226
+ yield(http)
227
+ end
228
+ end
229
+ end
230
+ end
@@ -0,0 +1,107 @@
1
+ require 'fluent/plugin/input'
2
+ require 'fluent/plugin/in_monitor_agent'
3
+ require 'fluent/plugin/prometheus'
4
+
5
+ module Fluent::Plugin
6
+ class PrometheusMonitorInput < Fluent::Plugin::Input
7
+ Fluent::Plugin.register_input('prometheus_monitor', self)
8
+ include Fluent::Plugin::PrometheusLabelParser
9
+
10
+ helpers :timer
11
+
12
+ config_param :interval, :time, default: 5
13
+ attr_reader :registry
14
+
15
+ def initialize
16
+ super
17
+ @registry = ::Prometheus::Client.registry
18
+ end
19
+
20
+ def multi_workers_ready?
21
+ true
22
+ end
23
+
24
+ def configure(conf)
25
+ super
26
+ hostname = Socket.gethostname
27
+ expander_builder = Fluent::Plugin::Prometheus.placeholder_expander(log)
28
+ expander = expander_builder.build({ 'hostname' => hostname, 'worker_id' => fluentd_worker_id })
29
+ @base_labels = parse_labels_elements(conf)
30
+ @base_labels.each do |key, value|
31
+ unless value.is_a?(String)
32
+ raise Fluent::ConfigError, "record accessor syntax is not available in prometheus_monitor"
33
+ end
34
+ @base_labels[key] = expander.expand(value)
35
+ end
36
+
37
+ if defined?(Fluent::Plugin) && defined?(Fluent::Plugin::MonitorAgentInput)
38
+ # from v0.14.6
39
+ @monitor_agent = Fluent::Plugin::MonitorAgentInput.new
40
+ else
41
+ @monitor_agent = Fluent::MonitorAgentInput.new
42
+ end
43
+
44
+ end
45
+
46
+ def start
47
+ super
48
+
49
+ @buffer_newest_timekey = get_gauge(
50
+ :fluentd_status_buffer_newest_timekey,
51
+ 'Newest timekey in buffer.')
52
+ @buffer_oldest_timekey = get_gauge(
53
+ :fluentd_status_buffer_oldest_timekey,
54
+ 'Oldest timekey in buffer.')
55
+ buffer_queue_length = get_gauge(
56
+ :fluentd_status_buffer_queue_length,
57
+ 'Current buffer queue length.')
58
+ buffer_total_queued_size = get_gauge(
59
+ :fluentd_status_buffer_total_bytes,
60
+ 'Current total size of queued buffers.')
61
+ retry_counts = get_gauge(
62
+ :fluentd_status_retry_count,
63
+ 'Current retry counts.')
64
+
65
+ @monitor_info = {
66
+ 'buffer_queue_length' => buffer_queue_length,
67
+ 'buffer_total_queued_size' => buffer_total_queued_size,
68
+ 'retry_count' => retry_counts,
69
+ }
70
+ timer_execute(:in_prometheus_monitor, @interval, &method(:update_monitor_info))
71
+ end
72
+
73
+ def update_monitor_info
74
+ @monitor_agent.plugins_info_all.each do |info|
75
+ label = labels(info)
76
+
77
+ @monitor_info.each do |name, metric|
78
+ if info[name]
79
+ metric.set(info[name], labels: label)
80
+ end
81
+ end
82
+
83
+ timekeys = info["buffer_timekeys"]
84
+ if timekeys && !timekeys.empty?
85
+ @buffer_newest_timekey.set(timekeys.max, labels: label)
86
+ @buffer_oldest_timekey.set(timekeys.min, labels: label)
87
+ end
88
+ end
89
+ end
90
+
91
+ def labels(plugin_info)
92
+ @base_labels.merge(
93
+ plugin_id: plugin_info["plugin_id"],
94
+ plugin_category: plugin_info["plugin_category"],
95
+ type: plugin_info["type"],
96
+ )
97
+ end
98
+
99
+ def get_gauge(name, docstring)
100
+ if @registry.exist?(name)
101
+ @registry.get(name)
102
+ else
103
+ @registry.gauge(name, docstring: docstring, labels: @base_labels.keys + [:plugin_id, :plugin_category, :type])
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,234 @@
1
+ require 'fluent/plugin/input'
2
+ require 'fluent/plugin/in_monitor_agent'
3
+ require 'fluent/plugin/prometheus'
4
+
5
+ module Fluent::Plugin
6
+ class PrometheusOutputMonitorInput < Fluent::Plugin::Input
7
+ Fluent::Plugin.register_input('prometheus_output_monitor', self)
8
+ include Fluent::Plugin::PrometheusLabelParser
9
+
10
+ helpers :timer
11
+
12
+ config_param :interval, :time, default: 5
13
+ config_param :gauge_all, :bool, default: true
14
+ attr_reader :registry
15
+
16
+ MONITOR_IVARS = [
17
+ :retry,
18
+
19
+ :num_errors,
20
+ :emit_count,
21
+
22
+ # for v0.12
23
+ :last_retry_time,
24
+
25
+ # from v0.14
26
+ :emit_records,
27
+ :write_count,
28
+ :rollback_count,
29
+
30
+ # from v1.6.0
31
+ :flush_time_count,
32
+ :slow_flush_count,
33
+ ]
34
+
35
+ def initialize
36
+ super
37
+ @registry = ::Prometheus::Client.registry
38
+ end
39
+
40
+ def multi_workers_ready?
41
+ true
42
+ end
43
+
44
+ def configure(conf)
45
+ super
46
+ hostname = Socket.gethostname
47
+ expander_builder = Fluent::Plugin::Prometheus.placeholder_expander(log)
48
+ expander = expander_builder.build({ 'hostname' => hostname, 'worker_id' => fluentd_worker_id })
49
+ @base_labels = parse_labels_elements(conf)
50
+ @base_labels.each do |key, value|
51
+ unless value.is_a?(String)
52
+ raise Fluent::ConfigError, "record accessor syntax is not available in prometheus_output_monitor"
53
+ end
54
+ @base_labels[key] = expander.expand(value)
55
+ end
56
+
57
+ @monitor_agent = Fluent::Plugin::MonitorAgentInput.new
58
+
59
+ @gauge_or_counter = @gauge_all ? :gauge : :counter
60
+ end
61
+
62
+ def start
63
+ super
64
+
65
+ @metrics = {
66
+ # Buffer metrics
67
+ buffer_total_queued_size: get_gauge(
68
+ :fluentd_output_status_buffer_total_bytes,
69
+ 'Current total size of stage and queue buffers.'),
70
+ buffer_stage_length: get_gauge(
71
+ :fluentd_output_status_buffer_stage_length,
72
+ 'Current length of stage buffers.'),
73
+ buffer_stage_byte_size: get_gauge(
74
+ :fluentd_output_status_buffer_stage_byte_size,
75
+ 'Current total size of stage buffers.'),
76
+ buffer_queue_length: get_gauge(
77
+ :fluentd_output_status_buffer_queue_length,
78
+ 'Current length of queue buffers.'),
79
+ buffer_queue_byte_size: get_gauge(
80
+ :fluentd_output_status_queue_byte_size,
81
+ 'Current total size of queue buffers.'),
82
+ buffer_available_buffer_space_ratios: get_gauge(
83
+ :fluentd_output_status_buffer_available_space_ratio,
84
+ 'Ratio of available space in buffer.'),
85
+ buffer_newest_timekey: get_gauge(
86
+ :fluentd_output_status_buffer_newest_timekey,
87
+ 'Newest timekey in buffer.'),
88
+ buffer_oldest_timekey: get_gauge(
89
+ :fluentd_output_status_buffer_oldest_timekey,
90
+ 'Oldest timekey in buffer.'),
91
+
92
+ # Output metrics
93
+ retry_counts: get_gauge_or_counter(
94
+ :fluentd_output_status_retry_count,
95
+ 'Current retry counts.'),
96
+ num_errors: get_gauge_or_counter(
97
+ :fluentd_output_status_num_errors,
98
+ 'Current number of errors.'),
99
+ emit_count: get_gauge_or_counter(
100
+ :fluentd_output_status_emit_count,
101
+ 'Current emit counts.'),
102
+ emit_records: get_gauge_or_counter(
103
+ :fluentd_output_status_emit_records,
104
+ 'Current emit records.'),
105
+ write_count: get_gauge_or_counter(
106
+ :fluentd_output_status_write_count,
107
+ 'Current write counts.'),
108
+ rollback_count: get_gauge(
109
+ :fluentd_output_status_rollback_count,
110
+ 'Current rollback counts.'),
111
+ flush_time_count: get_gauge_or_counter(
112
+ :fluentd_output_status_flush_time_count,
113
+ 'Total flush time.'),
114
+ slow_flush_count: get_gauge_or_counter(
115
+ :fluentd_output_status_slow_flush_count,
116
+ 'Current slow flush counts.'),
117
+ retry_wait: get_gauge(
118
+ :fluentd_output_status_retry_wait,
119
+ 'Current retry wait'),
120
+ }
121
+ timer_execute(:in_prometheus_output_monitor, @interval, &method(:update_monitor_info))
122
+ end
123
+
124
+ def update_monitor_info
125
+ opts = {
126
+ ivars: MONITOR_IVARS,
127
+ with_retry: true,
128
+ }
129
+
130
+ agent_info = @monitor_agent.plugins_info_all(opts).select {|info|
131
+ info['plugin_category'] == 'output'.freeze
132
+ }
133
+
134
+ monitor_info = {
135
+ # buffer metrics
136
+ 'buffer_total_queued_size' => [@metrics[:buffer_total_queued_size]],
137
+ 'buffer_stage_length' => [@metrics[:buffer_stage_length]],
138
+ 'buffer_stage_byte_size' => [@metrics[:buffer_stage_byte_size]],
139
+ 'buffer_queue_length' => [@metrics[:buffer_queue_length]],
140
+ 'buffer_queue_byte_size' => [@metrics[:buffer_queue_byte_size]],
141
+ 'buffer_available_buffer_space_ratios' => [@metrics[:buffer_available_buffer_space_ratios]],
142
+ 'buffer_newest_timekey' => [@metrics[:buffer_newest_timekey]],
143
+ 'buffer_oldest_timekey' => [@metrics[:buffer_oldest_timekey]],
144
+
145
+ # output metrics
146
+ 'retry_count' => [@metrics[:retry_counts], @metrics[:num_errors]],
147
+ # Needed since Fluentd v1.14 due to metrics extensions.
148
+ 'write_count' => [@metrics[:write_count]],
149
+ 'emit_count' => [@metrics[:emit_count]],
150
+ 'emit_records' => [@metrics[:emit_records]],
151
+ 'rollback_count' => [@metrics[:rollback_count]],
152
+ 'flush_time_count' => [@metrics[:flush_time_count]],
153
+ 'slow_flush_count' => [@metrics[:slow_flush_count]],
154
+ }
155
+ # No needed for Fluentd v1.14 but leave as-is for backward compatibility.
156
+ instance_vars_info = {
157
+ num_errors: @metrics[:num_errors],
158
+ write_count: @metrics[:write_count],
159
+ emit_count: @metrics[:emit_count],
160
+ emit_records: @metrics[:emit_records],
161
+ rollback_count: @metrics[:rollback_count],
162
+ flush_time_count: @metrics[:flush_time_count],
163
+ slow_flush_count: @metrics[:slow_flush_count],
164
+ }
165
+
166
+ agent_info.each do |info|
167
+ label = labels(info)
168
+
169
+ monitor_info.each do |name, metrics|
170
+ metrics.each do |metric|
171
+ if info[name]
172
+ if metric.is_a?(::Prometheus::Client::Gauge)
173
+ metric.set(info[name], labels: label)
174
+ elsif metric.is_a?(::Prometheus::Client::Counter)
175
+ metric.increment(by: info[name] - metric.get(labels: label), labels: label)
176
+ end
177
+ end
178
+ end
179
+ end
180
+
181
+ if info['instance_variables']
182
+ instance_vars_info.each do |name, metric|
183
+ if info['instance_variables'][name]
184
+ if metric.is_a?(::Prometheus::Client::Gauge)
185
+ metric.set(info['instance_variables'][name], labels: label)
186
+ elsif metric.is_a?(::Prometheus::Client::Counter)
187
+ metric.increment(by: info['instance_variables'][name] - metric.get(labels: label), labels: label)
188
+ end
189
+ end
190
+ end
191
+ end
192
+
193
+ # compute current retry_wait
194
+ if info['retry']
195
+ next_time = info['retry']['next_time']
196
+ start_time = info['retry']['start']
197
+ if start_time.nil? && info['instance_variables']
198
+ # v0.12 does not include start, use last_retry_time instead
199
+ start_time = info['instance_variables'][:last_retry_time]
200
+ end
201
+
202
+ wait = 0
203
+ if next_time && start_time
204
+ wait = next_time - start_time
205
+ end
206
+ @metrics[:retry_wait].set(wait.to_f, labels: label)
207
+ end
208
+ end
209
+ end
210
+
211
+ def labels(plugin_info)
212
+ @base_labels.merge(
213
+ plugin_id: plugin_info["plugin_id"],
214
+ type: plugin_info["type"],
215
+ )
216
+ end
217
+
218
+ def get_gauge(name, docstring)
219
+ if @registry.exist?(name)
220
+ @registry.get(name)
221
+ else
222
+ @registry.gauge(name, docstring: docstring, labels: @base_labels.keys + [:plugin_id, :type])
223
+ end
224
+ end
225
+
226
+ def get_gauge_or_counter(name, docstring)
227
+ if @registry.exist?(name)
228
+ @registry.get(name)
229
+ else
230
+ @registry.public_send(@gauge_or_counter, name, docstring: docstring, labels: @base_labels.keys + [:plugin_id, :type])
231
+ end
232
+ end
233
+ end
234
+ end
@@ -0,0 +1,98 @@
1
+ require 'fluent/plugin/input'
2
+ require 'fluent/plugin/in_monitor_agent'
3
+ require 'fluent/plugin/prometheus'
4
+
5
+ module Fluent::Plugin
6
+ class PrometheusTailMonitorInput < Fluent::Plugin::Input
7
+ Fluent::Plugin.register_input('prometheus_tail_monitor', self)
8
+ include Fluent::Plugin::PrometheusLabelParser
9
+
10
+ helpers :timer
11
+
12
+ config_param :interval, :time, default: 5
13
+ attr_reader :registry
14
+
15
+ MONITOR_IVARS = [
16
+ :tails,
17
+ ]
18
+
19
+ def initialize
20
+ super
21
+ @registry = ::Prometheus::Client.registry
22
+ end
23
+
24
+ def multi_workers_ready?
25
+ true
26
+ end
27
+
28
+ def configure(conf)
29
+ super
30
+ hostname = Socket.gethostname
31
+ expander_builder = Fluent::Plugin::Prometheus.placeholder_expander(log)
32
+ expander = expander_builder.build({ 'hostname' => hostname, 'worker_id' => fluentd_worker_id })
33
+ @base_labels = parse_labels_elements(conf)
34
+ @base_labels.each do |key, value|
35
+ unless value.is_a?(String)
36
+ raise Fluent::ConfigError, "record accessor syntax is not available in prometheus_tail_monitor"
37
+ end
38
+ @base_labels[key] = expander.expand(value)
39
+ end
40
+
41
+ @monitor_agent = Fluent::Plugin::MonitorAgentInput.new
42
+ end
43
+
44
+ def start
45
+ super
46
+
47
+ @metrics = {
48
+ position: get_gauge(
49
+ :fluentd_tail_file_position,
50
+ 'Current position of file.'),
51
+ inode: get_gauge(
52
+ :fluentd_tail_file_inode,
53
+ 'Current inode of file.'),
54
+ }
55
+ timer_execute(:in_prometheus_tail_monitor, @interval, &method(:update_monitor_info))
56
+ end
57
+
58
+ def update_monitor_info
59
+ opts = {
60
+ ivars: MONITOR_IVARS,
61
+ }
62
+
63
+ agent_info = @monitor_agent.plugins_info_all(opts).select {|info|
64
+ info['type'] == 'tail'.freeze
65
+ }
66
+
67
+ agent_info.each do |info|
68
+ tails = info['instance_variables'][:tails]
69
+ next if tails.nil?
70
+
71
+ tails.clone.each do |_, watcher|
72
+ # Access to internal variable of internal class...
73
+ # Very fragile implementation
74
+ pe = watcher.instance_variable_get(:@pe)
75
+ label = labels(info, watcher.path)
76
+ @metrics[:inode].set(pe.read_inode, labels: label)
77
+ @metrics[:position].set(pe.read_pos, labels: label)
78
+ end
79
+ end
80
+ end
81
+
82
+ def labels(plugin_info, path)
83
+ @base_labels.merge(
84
+ plugin_id: plugin_info["plugin_id"],
85
+ type: plugin_info["type"],
86
+ path: path,
87
+ )
88
+ end
89
+
90
+ def get_gauge(name, docstring)
91
+ if @registry.exist?(name)
92
+ @registry.get(name)
93
+ else
94
+ @registry.gauge(name, docstring: docstring, labels: @base_labels.keys + [:plugin_id, :type, :path])
95
+ end
96
+ end
97
+ end
98
+ end