phihos-fluent-plugin-prometheus 2.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/linux.yml +34 -0
  3. data/.gitignore +16 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +14 -0
  6. data/ChangeLog +43 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE +202 -0
  9. data/README.md +537 -0
  10. data/Rakefile +7 -0
  11. data/fluent-plugin-prometheus.gemspec +22 -0
  12. data/lib/fluent/plugin/filter_prometheus.rb +43 -0
  13. data/lib/fluent/plugin/in_prometheus/async_wrapper.rb +47 -0
  14. data/lib/fluent/plugin/in_prometheus.rb +230 -0
  15. data/lib/fluent/plugin/in_prometheus_monitor.rb +107 -0
  16. data/lib/fluent/plugin/in_prometheus_output_monitor.rb +234 -0
  17. data/lib/fluent/plugin/in_prometheus_tail_monitor.rb +98 -0
  18. data/lib/fluent/plugin/out_prometheus.rb +42 -0
  19. data/lib/fluent/plugin/prometheus/data_store.rb +93 -0
  20. data/lib/fluent/plugin/prometheus/placeholder_expander.rb +132 -0
  21. data/lib/fluent/plugin/prometheus.rb +418 -0
  22. data/lib/fluent/plugin/prometheus_metrics.rb +77 -0
  23. data/misc/fluentd_sample.conf +170 -0
  24. data/misc/nginx_proxy.conf +22 -0
  25. data/misc/prometheus.yaml +13 -0
  26. data/misc/prometheus_alerts.yaml +59 -0
  27. data/spec/fluent/plugin/filter_prometheus_spec.rb +118 -0
  28. data/spec/fluent/plugin/in_prometheus_monitor_spec.rb +42 -0
  29. data/spec/fluent/plugin/in_prometheus_spec.rb +225 -0
  30. data/spec/fluent/plugin/in_prometheus_tail_monitor_spec.rb +42 -0
  31. data/spec/fluent/plugin/out_prometheus_spec.rb +139 -0
  32. data/spec/fluent/plugin/prometheus/placeholder_expander_spec.rb +110 -0
  33. data/spec/fluent/plugin/prometheus_metrics_spec.rb +138 -0
  34. data/spec/fluent/plugin/shared.rb +248 -0
  35. data/spec/spec_helper.rb +10 -0
  36. metadata +176 -0
@@ -0,0 +1,77 @@
1
+ module Fluent::Plugin
2
+
3
+ ##
4
+ # PromMetricsAggregator aggregates multiples metrics exposed using Prometheus text-based format
5
+ # see https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md
6
+
7
+
8
+ class PrometheusMetrics
9
+ def initialize
10
+ @comments = []
11
+ @metrics = []
12
+ end
13
+
14
+ def to_string
15
+ (@comments + @metrics).join("\n")
16
+ end
17
+
18
+ def add_comment(comment)
19
+ @comments << comment
20
+ end
21
+
22
+ def add_metric_value(value)
23
+ @metrics << value
24
+ end
25
+
26
+ attr_writer :comments, :metrics
27
+ end
28
+
29
+ class PromMetricsAggregator
30
+ def initialize
31
+ @metrics = {}
32
+ end
33
+
34
+ def get_metric_name_from_comment(line)
35
+ tokens = line.split(' ')
36
+ if ['HELP', 'TYPE'].include?(tokens[1])
37
+ tokens[2]
38
+ else
39
+ ''
40
+ end
41
+ end
42
+
43
+ def add_metrics(metrics)
44
+ current_metric = ''
45
+ new_metric = false
46
+ lines = metrics.split("\n")
47
+ for line in lines
48
+ if line[0] == '#'
49
+ # Metric comment (# TYPE, # HELP)
50
+ parsed_metric = get_metric_name_from_comment(line)
51
+ if parsed_metric != ''
52
+ if parsed_metric != current_metric
53
+ # Starting a new metric comment block
54
+ new_metric = !@metrics.key?(parsed_metric)
55
+ if new_metric
56
+ @metrics[parsed_metric] = PrometheusMetrics.new()
57
+ end
58
+ current_metric = parsed_metric
59
+ end
60
+
61
+ if new_metric && parsed_metric == current_metric
62
+ # New metric, inject comments (# TYPE, # HELP)
63
+ @metrics[parsed_metric].add_comment(line)
64
+ end
65
+ end
66
+ else
67
+ # Metric value, simply append line
68
+ @metrics[current_metric].add_metric_value(line)
69
+ end
70
+ end
71
+ end
72
+
73
+ def get_metrics
74
+ @metrics.map{|k,v| v.to_string()}.join("\n") + (@metrics.length ? "\n" : "")
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,170 @@
1
+ ## Prometheus Input Plugin Configuration
2
+
3
+ # input plugin that exports metrics
4
+ <source>
5
+ @type prometheus
6
+ </source>
7
+
8
+ <source>
9
+ @type monitor_agent
10
+ </source>
11
+
12
+ <source>
13
+ @type forward
14
+ </source>
15
+
16
+ # input plugin that collects metrics from MonitorAgent
17
+ <source>
18
+ @type prometheus_monitor
19
+ <labels>
20
+ host ${hostname}
21
+ </labels>
22
+ </source>
23
+
24
+ # input plugin that collects metrics for output plugin
25
+ <source>
26
+ @type prometheus_output_monitor
27
+ <labels>
28
+ host ${hostname}
29
+ </labels>
30
+ </source>
31
+
32
+ # input plugin that collects metrics for in_tail plugin
33
+ <source>
34
+ @type prometheus_tail_monitor
35
+ <labels>
36
+ host ${hostname}
37
+ </labels>
38
+ </source>
39
+
40
+ ## Nginx Access Log Configuration
41
+
42
+ <source>
43
+ @type tail
44
+ format nginx
45
+ tag nginx
46
+ path /var/log/nginx/access.log
47
+ pos_file /tmp/fluent_nginx.pos
48
+ types size:integer
49
+ </source>
50
+
51
+ <filter nginx>
52
+ @type prometheus
53
+
54
+ # You can use counter type with specifying a key,
55
+ # and increments counter by the value
56
+ <metric>
57
+ name nginx_size_counter_bytes
58
+ type counter
59
+ desc nginx bytes sent
60
+ key size
61
+ <labels>
62
+ host ${hostname}
63
+ foo bar
64
+ </labels>
65
+ </metric>
66
+
67
+ # You can use counter type without specifying a key
68
+ # This just increments counter by 1
69
+ <metric>
70
+ name nginx_record_counts
71
+ type counter
72
+ desc the number of emited records
73
+ <labels>
74
+ host ${hostname}
75
+ </labels>
76
+ </metric>
77
+ </filter>
78
+
79
+ <match nginx>
80
+ @type copy
81
+ # for MonitorAgent sample
82
+ <store>
83
+ @id test_forward
84
+ @type forward
85
+ buffer_type memory
86
+ flush_interval 1s
87
+ max_retry_wait 2s
88
+ <buffer>
89
+ # max_retry_wait 10s
90
+ flush_interval 1s
91
+ # retry_type periodic
92
+ disable_retry_limit
93
+ </buffer>
94
+ # retry_limit 3
95
+ disable_retry_limit
96
+ <server>
97
+ host 127.0.0.1
98
+ port 20000
99
+ </server>
100
+ </store>
101
+ <store>
102
+ @type stdout
103
+ </store>
104
+ </match>
105
+
106
+ ## Nginx Proxy Log Configuration
107
+
108
+ <source>
109
+ @type tail
110
+ format ltsv
111
+ tag nginx_proxy
112
+ path /var/log/nginx/access_proxy.log
113
+ pos_file /tmp/fluent_nginx_proxy.pos
114
+ types size:integer,request_length:integer,bytes_sent:integer,body_bytes_sent:integer,request_time:float,upstream_response_time:float
115
+ </source>
116
+
117
+ <filter nginx_proxy>
118
+ @type prometheus
119
+
120
+ # common labels for all metrics
121
+ <labels>
122
+ host ${hostname}
123
+ method ${request_method}
124
+ status ${status}
125
+ </labels>
126
+
127
+ <metric>
128
+ name nginx_proxy_request_length_total_bytes
129
+ type counter
130
+ desc nginx proxy request length bytes
131
+ key request_length
132
+ </metric>
133
+ <metric>
134
+ name nginx_proxy_bytes_sent_total_bytes
135
+ type counter
136
+ desc nginx proxy bytes sent
137
+ key bytes_sent
138
+ </metric>
139
+ <metric>
140
+ name nginx_proxy_request_duration_total_milliseconds
141
+ type counter
142
+ desc nginx proxy request time
143
+ key request_time
144
+ </metric>
145
+ <metric>
146
+ name nginx_proxy_upstream_response_duration_total_milliseconds
147
+ type counter
148
+ desc nginx proxy upstream response time
149
+ key upstream_response_time
150
+ </metric>
151
+ <metric>
152
+ name nginx_proxy_request_duration_milliseconds
153
+ type summary
154
+ desc nginx proxy request duration summary
155
+ key request_time
156
+ </metric>
157
+ <metric>
158
+ name nginx_proxy_upstream_duration_milliseconds
159
+ type summary
160
+ desc nginx proxy upstream response duration summary
161
+ key upstream_response_time
162
+ </metric>
163
+ </filter>
164
+
165
+ <match nginx_proxy>
166
+ @type copy
167
+ <store>
168
+ @type stdout
169
+ </store>
170
+ </match>
@@ -0,0 +1,22 @@
1
+ log_format ltsv 'time:$time_iso8601\t'
2
+ 'remote_addr:$remote_addr\t'
3
+ 'request_method:$request_method\t'
4
+ 'request_length:$request_length\t'
5
+ 'request_uri:$request_uri\t'
6
+ 'uri:$uri\t'
7
+ 'status:$status\t'
8
+ 'bytes_sent:$bytes_sent\t'
9
+ 'body_bytes_sent:$body_bytes_sent\t'
10
+ 'referer:$http_referer\t'
11
+ 'useragent:$http_user_agent\t'
12
+ 'request_time:$request_time\t'
13
+ 'upstream_response_time:$upstream_response_time';
14
+
15
+ server {
16
+ access_log /var/log/nginx/access_proxy.log ltsv;
17
+ listen 9999;
18
+ location / {
19
+ proxy_pass https://www.google.com;
20
+ }
21
+ }
22
+
@@ -0,0 +1,13 @@
1
+ # A job to scrape an endpoint of Fluentd running on localhost.
2
+ scrape_configs:
3
+ - job_name: 'prometheus'
4
+ scrape_interval: 5s
5
+ static_configs:
6
+ - targets:
7
+ - 'localhost:9090'
8
+ - job_name: fluentd
9
+ scrape_interval: 5s
10
+ static_configs:
11
+ - targets:
12
+ - 'localhost:24231'
13
+ metrics_path: /metrics
@@ -0,0 +1,59 @@
1
+ ALERT FluentdNodeDown
2
+ IF up{job="fluentd"} == 0
3
+ FOR 10m
4
+ LABELS {
5
+ service = "fluentd",
6
+ severity = "warning"
7
+ }
8
+ ANNOTATIONS {
9
+ summary = "fluentd cannot be scraped",
10
+ description = "Prometheus could not scrape {{ $labels.job }} for more than 10 minutes",
11
+ }
12
+
13
+ ALERT FluentdNodeDown
14
+ IF up{job="fluentd"} == 0
15
+ FOR 30m
16
+ LABELS {
17
+ service = "fluentd",
18
+ severity = "critical"
19
+ }
20
+ ANNOTATIONS {
21
+ summary = "fluentd cannot be scraped",
22
+ description = "Prometheus could not scrape {{ $labels.job }} for more than 30 minutes",
23
+ }
24
+
25
+ ALERT FluentdQueueLength
26
+ IF rate(fluentd_status_buffer_queue_length[5m]) > 0.3
27
+ FOR 1m
28
+ LABELS {
29
+ service = "fluentd",
30
+ severity = "warning"
31
+ }
32
+ ANNOTATIONS {
33
+ summary = "fluentd node are failing",
34
+ description = "In the last 5 minutes, fluentd queues increased 30%. Current value is {{ $value }} ",
35
+ }
36
+
37
+ ALERT FluentdQueueLength
38
+ IF rate(fluentd_status_buffer_queue_length[5m]) > 0.5
39
+ FOR 1m
40
+ LABELS {
41
+ service = "fluentd",
42
+ severity = "critical"
43
+ }
44
+ ANNOTATIONS {
45
+ summary = "fluentd node are critical",
46
+ description = "In the last 5 minutes, fluentd queues increased 50%. Current value is {{ $value }} ",
47
+ }
48
+
49
+ ALERT FluentdRecordsCountsHigh
50
+ IF sum(rate(fluentd_output_status_emit_records{job="fluentd"}[5m])) BY (instance) > (3 * sum(rate(fluentd_output_status_emit_records{job="fluentd"}[15m])) BY (instance))
51
+ FOR 1m
52
+ LABELS {
53
+ service = "fluentd",
54
+ severity = "critical"
55
+ }
56
+ ANNOTATIONS {
57
+ summary = "fluentd records count are critical",
58
+ description = "In the last 5m, records counts increased 3 times, comparing to the latest 15 min.",
59
+ }
@@ -0,0 +1,118 @@
1
+ require 'spec_helper'
2
+ require 'fluent/test/driver/filter'
3
+ require 'fluent/plugin/filter_prometheus'
4
+ require_relative 'shared'
5
+
6
+ describe Fluent::Plugin::PrometheusFilter do
7
+ let(:tag) { 'prometheus.test' }
8
+ let(:driver) { Fluent::Test::Driver::Filter.new(Fluent::Plugin::PrometheusFilter).configure(config) }
9
+ let(:registry) { ::Prometheus::Client::Registry.new }
10
+
11
+ before do
12
+ allow(Prometheus::Client).to receive(:registry).and_return(registry)
13
+ end
14
+
15
+ describe '#configure' do
16
+ it_behaves_like 'output configuration'
17
+ end
18
+
19
+ describe '#run' do
20
+ let(:message) { {"foo" => 100, "bar" => 100, "baz" => 100, "qux" => 10} }
21
+
22
+ context 'simple config' do
23
+ let(:config) {
24
+ BASE_CONFIG + %(
25
+ <metric>
26
+ name simple
27
+ type counter
28
+ desc Something foo.
29
+ key foo
30
+ </metric>
31
+ )
32
+ }
33
+
34
+ it 'adds a new counter metric' do
35
+ expect(registry.metrics.map(&:name)).not_to eq([:simple])
36
+ driver.run(default_tag: tag) { driver.feed(event_time, message) }
37
+ expect(registry.metrics.map(&:name)).to eq([:simple])
38
+ end
39
+
40
+ it 'should keep original message' do
41
+ driver.run(default_tag: tag) { driver.feed(event_time, message) }
42
+ expect(driver.filtered_records.first).to eq(message)
43
+ end
44
+ end
45
+
46
+ it_behaves_like 'instruments record'
47
+ end
48
+
49
+ describe '#run with retention' do
50
+ let(:message) { { "foo" => 100, "bar" => 100, "baz" => 100, "qux" => 10 } }
51
+
52
+ context 'config with retention 1' do
53
+ let(:config) {
54
+ BASE_CONFIG + %(
55
+ <metric>
56
+ name simple
57
+ type counter
58
+ desc Something foo.
59
+ key foo
60
+ <labels>
61
+ bar ${bar}
62
+ baz ${baz}
63
+ qux ${qux}
64
+ </labels>
65
+ retention 1
66
+ retention_check_interval 1
67
+ </metric>
68
+ )
69
+ }
70
+
71
+ it 'expires metric after max 2s' do
72
+ expect(registry.metrics.map(&:name)).not_to eq([:simple])
73
+ driver.run(default_tag: tag) {
74
+ driver.feed(event_time, message)
75
+ expect(registry.metrics[0].get(labels: { :bar => 100, :baz => 100, :qux => 10 })).to eq(100)
76
+ sleep(2)
77
+ expect(registry.metrics[0].get(labels: { :bar => 100, :baz => 100, :qux => 10 })).to eq(0.0)
78
+ }
79
+ end
80
+ end
81
+ end
82
+
83
+ describe '#run with topk' do
84
+ let(:message1) { { "foo" => 200, "bar" => "a" } }
85
+ let(:message2) { { "foo" => 300, "bar" => "b" } }
86
+ let(:message3) { { "foo" => 100, "bar" => "c" } }
87
+
88
+ context 'config with retention 1' do
89
+ let(:config) {
90
+ BASE_CONFIG + %(
91
+ <metric>
92
+ name simple
93
+ type counter
94
+ desc Something foo.
95
+ key foo
96
+ <labels>
97
+ bar ${bar}
98
+ </labels>
99
+ topk 2
100
+ </metric>
101
+ )
102
+ }
103
+
104
+ it 'expires metric after max 2s' do
105
+ expect(registry.metrics.map(&:name)).not_to eq([:simple])
106
+ driver.run(default_tag: tag) {
107
+ driver.feed(event_time, message1)
108
+ driver.feed(event_time, message2)
109
+ driver.feed(event_time, message3)
110
+ }
111
+ expect(registry.metrics[0].values).to eq({
112
+ { :bar => "a" } => 200,
113
+ { :bar => "b" } => 300,
114
+ })
115
+ end
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,42 @@
1
+ require 'spec_helper'
2
+ require 'fluent/plugin/in_prometheus_monitor'
3
+ require 'fluent/test/driver/input'
4
+
5
+ describe Fluent::Plugin::PrometheusMonitorInput do
6
+ MONITOR_CONFIG = %[
7
+ @type prometheus_monitor
8
+ <labels>
9
+ host ${hostname}
10
+ foo bar
11
+ </labels>
12
+ ]
13
+
14
+ INVALID_MONITOR_CONFIG = %[
15
+ @type prometheus_monitor
16
+
17
+ <labels>
18
+ host ${hostname}
19
+ foo bar
20
+ invalid_use1 $.foo.bar
21
+ invalid_use2 $[0][1]
22
+ </labels>
23
+ ]
24
+
25
+ let(:config) { MONITOR_CONFIG }
26
+ let(:driver) { Fluent::Test::Driver::Input.new(Fluent::Plugin::PrometheusMonitorInput).configure(config) }
27
+
28
+ describe '#configure' do
29
+ describe 'valid' do
30
+ it 'does not raise error' do
31
+ expect{driver}.not_to raise_error
32
+ end
33
+ end
34
+
35
+ describe 'invalid' do
36
+ let(:config) { INVALID_MONITOR_CONFIG }
37
+ it 'expect raise error' do
38
+ expect{driver}.to raise_error
39
+ end
40
+ end
41
+ end
42
+ end