fluent-plugin-prometheus-smarter 1.8.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,132 @@
1
+ module Fluent
2
+ module Plugin
3
+ module Prometheus
4
+ class ExpandBuilder
5
+ def self.build(placeholder, log:)
6
+ new(log: log).build(placeholder)
7
+ end
8
+
9
+ def initialize(log:)
10
+ @log = log
11
+ end
12
+
13
+ def build(placeholder_values)
14
+ placeholders = {}
15
+ placeholder_values.each do |key, value|
16
+ case value
17
+ when Array
18
+ size = value.size
19
+ value.each_with_index do |v, i|
20
+ placeholders["${#{key}[#{i}]}"] = v
21
+ placeholders["${#{key}[#{i - size}]}"] = v
22
+ end
23
+ when Hash
24
+ value.each do |k, v|
25
+ placeholders[%(${#{key}["#{k}"]})] = v
26
+ end
27
+ else
28
+ if key == 'tag'
29
+ placeholders.merge!(build_tag(value))
30
+ else
31
+ placeholders["${#{key}}"] = value
32
+ end
33
+ end
34
+ end
35
+
36
+ Fluent::Plugin::Prometheus::ExpandBuilder::PlaceholderExpander.new(@log, placeholders)
37
+ end
38
+
39
+ private
40
+
41
+ def build_tag(tag)
42
+ tags = tag.split('.')
43
+
44
+ placeholders = { '${tag}' => tag }
45
+
46
+ size = tags.size
47
+
48
+ tags.each_with_index do |v, i|
49
+ placeholders["${tag_parts[#{i}]}"] = v
50
+ placeholders["${tag_parts[#{i - size}]}"] = v
51
+ end
52
+
53
+ tag_prefix(tags).each_with_index do |v, i|
54
+ placeholders["${tag_prefix[#{i}]}"] = v
55
+ end
56
+
57
+ tag_suffix(tags).each_with_index do |v, i|
58
+ placeholders["${tag_suffix[#{i}]}"] = v
59
+ end
60
+
61
+ placeholders
62
+ end
63
+
64
+ def tag_prefix(tags)
65
+ tags = tags.dup
66
+ return [] if tags.empty?
67
+
68
+ ret = [tags.shift]
69
+ tags.each.with_index(1) do |tag, i|
70
+ ret[i] = "#{ret[i-1]}.#{tag}"
71
+ end
72
+ ret
73
+ end
74
+
75
+ def tag_suffix(tags)
76
+ return [] if tags.empty?
77
+
78
+ tags = tags.dup.reverse
79
+ ret = [tags.shift]
80
+ tags.each.with_index(1) do |tag, i|
81
+ ret[i] = "#{tag}.#{ret[i-1]}"
82
+ end
83
+ ret
84
+ end
85
+
86
+ class PlaceholderExpander
87
+ PLACEHOLDER_REGEX = /(\${[^\[}]+(\[[^\]]+\])?})/.freeze
88
+
89
+ attr_reader :placeholder
90
+
91
+ def initialize(log, placeholder)
92
+ @placeholder = placeholder
93
+ @log = log
94
+ @expander_cache = {}
95
+ end
96
+
97
+ def merge_placeholder(placeholder)
98
+ @placeholder.merge!(placeholder)
99
+ end
100
+
101
+ def expand(str, dynamic_placeholders: nil)
102
+ expander = if dynamic_placeholders
103
+ if @expander_cache[dynamic_placeholders]
104
+ @expander_cache[dynamic_placeholders]
105
+ else
106
+ e = ExpandBuilder.build(dynamic_placeholders, log: @log)
107
+ e.merge_placeholder(@placeholder)
108
+ @expander_cache[dynamic_placeholders] = e
109
+ e
110
+ end
111
+ else
112
+ self
113
+ end
114
+
115
+ expander.expand!(str)
116
+ end
117
+
118
+ protected
119
+
120
+ def expand!(str)
121
+ str.gsub(PLACEHOLDER_REGEX) { |value|
122
+ @placeholder.fetch(value) do
123
+ @log.warn("unknown placeholder `#{value}` found")
124
+ value # return as it is
125
+ end
126
+ }
127
+ end
128
+ end
129
+ end
130
+ end
131
+ end
132
+ end
@@ -0,0 +1,77 @@
1
+ module Fluent::Plugin
2
+
3
+ ##
4
+ # PromMetricsAggregator aggregates multiples metrics exposed using Prometheus text-based format
5
+ # see https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md
6
+
7
+
8
+ class PrometheusMetrics
9
+ def initialize
10
+ @comments = []
11
+ @metrics = []
12
+ end
13
+
14
+ def to_string
15
+ (@comments + @metrics).join("\n")
16
+ end
17
+
18
+ def add_comment(comment)
19
+ @comments << comment
20
+ end
21
+
22
+ def add_metric_value(value)
23
+ @metrics << value
24
+ end
25
+
26
+ attr_writer :comments, :metrics
27
+ end
28
+
29
+ class PromMetricsAggregator
30
+ def initialize
31
+ @metrics = {}
32
+ end
33
+
34
+ def get_metric_name_from_comment(line)
35
+ tokens = line.split(' ')
36
+ if ['HELP', 'TYPE'].include?(tokens[1])
37
+ tokens[2]
38
+ else
39
+ ''
40
+ end
41
+ end
42
+
43
+ def add_metrics(metrics)
44
+ current_metric = ''
45
+ new_metric = false
46
+ lines = metrics.split("\n")
47
+ for line in lines
48
+ if line[0] == '#'
49
+ # Metric comment (# TYPE, # HELP)
50
+ parsed_metric = get_metric_name_from_comment(line)
51
+ if parsed_metric != ''
52
+ if parsed_metric != current_metric
53
+ # Starting a new metric comment block
54
+ new_metric = !@metrics.key?(parsed_metric)
55
+ if new_metric
56
+ @metrics[parsed_metric] = PrometheusMetrics.new()
57
+ end
58
+ current_metric = parsed_metric
59
+ end
60
+
61
+ if new_metric && parsed_metric == current_metric
62
+ # New metric, inject comments (# TYPE, # HELP)
63
+ @metrics[parsed_metric].add_comment(line)
64
+ end
65
+ end
66
+ else
67
+ # Metric value, simply append line
68
+ @metrics[current_metric].add_metric_value(line)
69
+ end
70
+ end
71
+ end
72
+
73
+ def get_metrics
74
+ @metrics.map{|k,v| v.to_string()}.join("\n") + (@metrics.length ? "\n" : "")
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,170 @@
1
+ ## Prometheus Input Plugin Configuration
2
+
3
+ # input plugin that exports metrics
4
+ <source>
5
+ @type prometheus
6
+ </source>
7
+
8
+ <source>
9
+ @type monitor_agent
10
+ </source>
11
+
12
+ <source>
13
+ @type forward
14
+ </source>
15
+
16
+ # input plugin that collects metrics from MonitorAgent
17
+ <source>
18
+ @type prometheus_monitor
19
+ <labels>
20
+ host ${hostname}
21
+ </labels>
22
+ </source>
23
+
24
+ # input plugin that collects metrics for output plugin
25
+ <source>
26
+ @type prometheus_output_monitor
27
+ <labels>
28
+ host ${hostname}
29
+ </labels>
30
+ </source>
31
+
32
+ # input plugin that collects metrics for in_tail plugin
33
+ <source>
34
+ @type prometheus_tail_monitor
35
+ <labels>
36
+ host ${hostname}
37
+ </labels>
38
+ </source>
39
+
40
+ ## Nginx Access Log Configuration
41
+
42
+ <source>
43
+ @type tail
44
+ format nginx
45
+ tag nginx
46
+ path /var/log/nginx/access.log
47
+ pos_file /tmp/fluent_nginx.pos
48
+ types size:integer
49
+ </source>
50
+
51
+ <filter nginx>
52
+ @type prometheus
53
+
54
+ # You can use counter type with specifying a key,
55
+ # and increments counter by the value
56
+ <metric>
57
+ name nginx_size_counter_bytes
58
+ type counter
59
+ desc nginx bytes sent
60
+ key size
61
+ <labels>
62
+ host ${hostname}
63
+ foo bar
64
+ </labels>
65
+ </metric>
66
+
67
+ # You can use counter type without specifying a key
68
+ # This just increments counter by 1
69
+ <metric>
70
+ name nginx_record_counts
71
+ type counter
72
+ desc the number of emited records
73
+ <labels>
74
+ host ${hostname}
75
+ </labels>
76
+ </metric>
77
+ </filter>
78
+
79
+ <match nginx>
80
+ @type copy
81
+ # for MonitorAgent sample
82
+ <store>
83
+ @id test_forward
84
+ @type forward
85
+ buffer_type memory
86
+ flush_interval 1s
87
+ max_retry_wait 2s
88
+ <buffer>
89
+ # max_retry_wait 10s
90
+ flush_interval 1s
91
+ # retry_type periodic
92
+ disable_retry_limit
93
+ </buffer>
94
+ # retry_limit 3
95
+ disable_retry_limit
96
+ <server>
97
+ host 127.0.0.1
98
+ port 20000
99
+ </server>
100
+ </store>
101
+ <store>
102
+ @type stdout
103
+ </store>
104
+ </match>
105
+
106
+ ## Nginx Proxy Log Configuration
107
+
108
+ <source>
109
+ @type tail
110
+ format ltsv
111
+ tag nginx_proxy
112
+ path /var/log/nginx/access_proxy.log
113
+ pos_file /tmp/fluent_nginx_proxy.pos
114
+ types size:integer,request_length:integer,bytes_sent:integer,body_bytes_sent:integer,request_time:float,upstream_response_time:float
115
+ </source>
116
+
117
+ <filter nginx_proxy>
118
+ @type prometheus
119
+
120
+ # common labels for all metrics
121
+ <labels>
122
+ host ${hostname}
123
+ method ${request_method}
124
+ status ${status}
125
+ </labels>
126
+
127
+ <metric>
128
+ name nginx_proxy_request_length_total_bytes
129
+ type counter
130
+ desc nginx proxy request length bytes
131
+ key request_length
132
+ </metric>
133
+ <metric>
134
+ name nginx_proxy_bytes_sent_total_bytes
135
+ type counter
136
+ desc nginx proxy bytes sent
137
+ key bytes_sent
138
+ </metric>
139
+ <metric>
140
+ name nginx_proxy_request_duration_total_milliseconds
141
+ type counter
142
+ desc nginx proxy request time
143
+ key request_time
144
+ </metric>
145
+ <metric>
146
+ name nginx_proxy_upstream_response_duration_total_milliseconds
147
+ type counter
148
+ desc nginx proxy upstream response time
149
+ key upstream_response_time
150
+ </metric>
151
+ <metric>
152
+ name nginx_proxy_request_duration_milliseconds
153
+ type summary
154
+ desc nginx proxy request duration summary
155
+ key request_time
156
+ </metric>
157
+ <metric>
158
+ name nginx_proxy_upstream_duration_milliseconds
159
+ type summary
160
+ desc nginx proxy upstream response duration summary
161
+ key upstream_response_time
162
+ </metric>
163
+ </filter>
164
+
165
+ <match nginx_proxy>
166
+ @type copy
167
+ <store>
168
+ @type stdout
169
+ </store>
170
+ </match>
@@ -0,0 +1,22 @@
1
+ log_format ltsv 'time:$time_iso8601\t'
2
+ 'remote_addr:$remote_addr\t'
3
+ 'request_method:$request_method\t'
4
+ 'request_length:$request_length\t'
5
+ 'request_uri:$request_uri\t'
6
+ 'uri:$uri\t'
7
+ 'status:$status\t'
8
+ 'bytes_sent:$bytes_sent\t'
9
+ 'body_bytes_sent:$body_bytes_sent\t'
10
+ 'referer:$http_referer\t'
11
+ 'useragent:$http_user_agent\t'
12
+ 'request_time:$request_time\t'
13
+ 'upstream_response_time:$upstream_response_time';
14
+
15
+ server {
16
+ access_log /var/log/nginx/access_proxy.log ltsv;
17
+ listen 9999;
18
+ location / {
19
+ proxy_pass https://www.google.com;
20
+ }
21
+ }
22
+
@@ -0,0 +1,13 @@
1
+ # A job to scrape an endpoint of Fluentd running on localhost.
2
+ scrape_configs:
3
+ - job_name: 'prometheus'
4
+ scrape_interval: 5s
5
+ static_configs:
6
+ - targets:
7
+ - 'localhost:9090'
8
+ - job_name: fluentd
9
+ scrape_interval: 5s
10
+ static_configs:
11
+ - targets:
12
+ - 'localhost:24231'
13
+ metrics_path: /metrics
@@ -0,0 +1,59 @@
1
+ ALERT FluentdNodeDown
2
+ IF up{job="fluentd"} == 0
3
+ FOR 10m
4
+ LABELS {
5
+ service = "fluentd",
6
+ severity = "warning"
7
+ }
8
+ ANNOTATIONS {
9
+ summary = "fluentd cannot be scraped",
10
+ description = "Prometheus could not scrape {{ $labels.job }} for more than 10 minutes",
11
+ }
12
+
13
+ ALERT FluentdNodeDown
14
+ IF up{job="fluentd"} == 0
15
+ FOR 30m
16
+ LABELS {
17
+ service = "fluentd",
18
+ severity = "critical"
19
+ }
20
+ ANNOTATIONS {
21
+ summary = "fluentd cannot be scraped",
22
+ description = "Prometheus could not scrape {{ $labels.job }} for more than 30 minutes",
23
+ }
24
+
25
+ ALERT FluentdQueueLength
26
+ IF rate(fluentd_status_buffer_queue_length[5m]) > 0.3
27
+ FOR 1m
28
+ LABELS {
29
+ service = "fluentd",
30
+ severity = "warning"
31
+ }
32
+ ANNOTATIONS {
33
+ summary = "fluentd node are failing",
34
+ description = "In the last 5 minutes, fluentd queues increased 30%. Current value is {{ $value }} ",
35
+ }
36
+
37
+ ALERT FluentdQueueLength
38
+ IF rate(fluentd_status_buffer_queue_length[5m]) > 0.5
39
+ FOR 1m
40
+ LABELS {
41
+ service = "fluentd",
42
+ severity = "critical"
43
+ }
44
+ ANNOTATIONS {
45
+ summary = "fluentd node are critical",
46
+ description = "In the last 5 minutes, fluentd queues increased 50%. Current value is {{ $value }} ",
47
+ }
48
+
49
+ ALERT FluentdRecordsCountsHigh
50
+ IF sum(rate(fluentd_output_status_emit_records{job="fluentd"}[5m])) BY (instance) > (3 * sum(rate(fluentd_output_status_emit_records{job="fluentd"}[15m])) BY (instance))
51
+ FOR 1m
52
+ LABELS {
53
+ service = "fluentd",
54
+ severity = "critical"
55
+ }
56
+ ANNOTATIONS {
57
+ summary = "fluentd records count are critical",
58
+ description = "In the last 5m, records counts increased 3 times, comparing to the latest 15 min.",
59
+ }