postfix-exporter 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 4b3403736d6ac37fd2733f6db98f566a36e426de634fe971b96a332dd13527dc
4
+ data.tar.gz: b20157262dd52237db592744c80713c260ea6eb224e2feb5eac7a60d5665ca40
5
+ SHA512:
6
+ metadata.gz: 8d99493844dfb43b7b15a3742430f50db2c70ec3506b0c1bd93557a4ca4aca45d22c4eca5a95010571eac486519740858e415f9bae2329ff2a630962056bd10d
7
+ data.tar.gz: a10120c7f4b0b7a31f36b4fd215bb44ea14868e8281491c473e26812e8b2cfbad0352c87666437087875fb3e095c4036e8b0316461ae82b0077567e938040846
@@ -0,0 +1,5 @@
1
+ Gemfile.lock
2
+ /pkg
3
+ /doc
4
+ /.yardoc
5
+ /.bundle
@@ -0,0 +1,50 @@
1
+ This is a simple exporter for various statistics relating to a Postfix
2
+ server.
3
+
4
+
5
+ # Features
6
+
7
+ * Examines mail queue periodically and exports `postfix_mail_queue_size`;
8
+
9
+ * Reads syslog entries as they happen, and exports disposition status
10
+ counters (`postfix_delivery_delays_count`) per DSN, as well as delay summaries per
11
+ DSN (`postfix_delivery_delays{quantile="..."}`);
12
+
13
+ * Total number of SMTP connections (`postfix_smtpd_connections_total`) and
14
+ currently-active connections (`postfix_smtpd_active_connections`);
15
+
16
+ * Count how many delivery attempts were received
17
+ (`postfix_incoming_delivery_attempts_total`), split out by whether we
18
+ accepted or rejected the message (`status`) and the exact DSN provided to
19
+ the client (`dsn`);
20
+
21
+ * Whether or not the Postfix `master` process is running (`postfix_up`);
22
+
23
+ * Drinks from the syslog stream directly.
24
+
25
+
26
+ # Deployment
27
+
28
+ The default way of rolling out this exporter is as a Docker container. For
29
+ that, you'll want to setup a volume for the postfix queue (making it
30
+ available in the container as `/var/spool/postfix`), and some env
31
+ vars. You can also run it directly (via the gem), with the same env vars,
32
+ and with the expectation that `/var/spool/postfix` is in the usual place.
33
+
34
+ If you configure the postfix-exporter to run in the same PID namespace as
35
+ whatever it is that's running Postfix itself (either `--pid=host` or in the
36
+ same NS namespace, a la k8s pods), then the `postfix_up` metric will be
37
+ valid, otherwise it'll be random (but unlikely to be correct).
38
+
39
+
40
+ ## Environment Variables
41
+
42
+ * `SYSLOG_SOCKET` -- where to read raw syslog events from. If you're
43
+ running Postfix in an isolated container, you can probably hook up
44
+ `/dev/log` directly between containers with a bit of shenanigans,
45
+ otherwise configure your syslog daemon to write out a copy of all `mail`
46
+ facility messages to a custom socket. Alternately, if you use logstash,
47
+ take a look at the [`syslogstash`
48
+ gem](https://rubygems.org/gems/syslogstash) and
49
+ [container](https://hub.docker.com/r/discourse/syslogstash/), and go forth
50
+ and sin no more.
@@ -0,0 +1,183 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rack'
4
+ require 'prometheus/middleware/exporter'
5
+ require 'socket'
6
+ require 'rack/handler/webrick'
7
+ require 'logger'
8
+
9
+ prometheus = Prometheus::Client.registry
10
+
11
+ prometheus.gauge(:postfix_exporter_start_time_seconds, "When this process started up").set({}, Time.now.to_f)
12
+
13
+ oldest = prometheus.gauge(:postfix_oldest_message_timestamp_seconds, "Queue time of the oldest message")
14
+ mailq = prometheus.gauge(:postfix_queue_size, "Number of messages in the mail queue")
15
+ q_err = prometheus.counter(:postfix_queue_processing_error_total, "Exceptions raised whilst scanning the Postfix queue")
16
+ up = prometheus.gauge(:postfix_up, "Whether the master process is running or not")
17
+
18
+ Thread.abort_on_exception = true
19
+
20
+ Thread.new do
21
+ loop do
22
+ begin
23
+ %w{incoming active corrupt hold}.each do |q|
24
+ mailq.set({ queue: q }, Dir["/var/spool/postfix/#{q}/*"].size)
25
+ end
26
+
27
+ # deferred is special, because it's often hueg it gets sharded into
28
+ # multiple subdirectories
29
+ mailq.set({ queue: 'deferred' }, Dir["/var/spool/postfix/deferred/*/*"].size)
30
+ rescue StandardError => ex
31
+ $stderr.puts "Error while monitoring queue sizes: #{ex.message} (#{ex.class})"
32
+ $stderr.puts ex.backtrace.map { |l| " #{l}" }.join("\n")
33
+ q_err.increment(class: ex.class.to_s, phase: "scan")
34
+ end
35
+
36
+ begin
37
+ master_pid = File.read("/var/spool/postfix/pid/master.pid").to_i
38
+
39
+ if master_pid > 1
40
+ Process.kill(0, master_pid)
41
+ # If we get here, then the process exists, and
42
+ # that'll do for our purposes
43
+ up.set({}, 1)
44
+ else
45
+ up.set({}, 0)
46
+ end
47
+ rescue Errno::ENOENT, Errno::ESRCH, Errno::EACCES
48
+ up.set({}, 0)
49
+ rescue Errno::EPERM
50
+ # Ironically, we don't need to be able to *actually*
51
+ # signal the process; EPERM means it exists and is running
52
+ # as someone more privileged than us, which is enough
53
+ # for our purposes
54
+ up.set({}, 1)
55
+ rescue StandardError => ex
56
+ $stderr.puts "Error while checking master process: #{ex.message} (#{ex.class})"
57
+ $stderr.puts ex.backtrace.map { |l| " #{l}" }.join("\n")
58
+ q_err.increment(class: ex.class.to_s, phase: "up")
59
+ end
60
+
61
+ sleep 5
62
+
63
+ end
64
+ end
65
+
66
+ Thread.new do
67
+ earliest_ctime = ->(glob) do
68
+ # There is seemingly no way to unset or remove a gauge metric in the Ruby
69
+ # implementation of the prom exporter. As a hack, we return the current
70
+ # time in cases where there is nothing to sample.
71
+ now = Time.now.to_i
72
+
73
+ Dir[glob].lazy.map do |n|
74
+ begin
75
+ File.stat(n).ctime.to_i
76
+ rescue Errno::ENOENT
77
+ now
78
+ end
79
+ end.min || now
80
+ end
81
+
82
+ loop do
83
+ begin
84
+ %w{incoming active corrupt hold}.each do |q|
85
+ oldest.set({ queue: q }, earliest_ctime["/var/spool/postfix/#{q}/*"])
86
+ end
87
+ oldest.set({ queue: 'deferred' }, earliest_ctime["/var/spool/postfix/deferred/*/*"])
88
+ rescue StandardError => ex
89
+ $stderr.puts "Error while sampling message ages: #{ex.message} (#{ex.class})"
90
+ $stderr.puts ex.backtrace.map { |l| " #{l}" }.join("\n")
91
+ q_err.increment(class: ex.class.to_s, phase: "stat")
92
+ end
93
+
94
+ # stat()ing all the files in a large queue could potentially be quite
95
+ # expensive, so we sample this data less frequently.
96
+ sleep 60
97
+
98
+ end
99
+ end
100
+
101
+ if ENV["SYSLOG_SOCKET"]
102
+ delays = prometheus.summary(:postfix_delivery_delays, "Distribution of time taken to deliver (or bounce) messages")
103
+ connects = prometheus.counter(:postfix_smtpd_connections_total, "Connections to smtpd")
104
+ active = prometheus.gauge(:postfix_smtpd_active_connections, "Current connections to smtpd")
105
+ incoming = prometheus.counter(:postfix_incoming_delivery_attempts_total, "Delivery attempts, labelled by dsn and status")
106
+ messages = prometheus.counter(:postfix_log_messages_total, "Syslog messages received, labelled by how it was handled")
107
+ log_errors = prometheus.counter(:postfix_log_processing_error_total, "Exceptions raised whilst processing log messages")
108
+
109
+ Thread.new do
110
+ begin
111
+ s = Socket.new(Socket::AF_UNIX, Socket::SOCK_DGRAM, 0)
112
+ s.bind(Socket.pack_sockaddr_un(ENV["SYSLOG_SOCKET"]))
113
+ rescue Errno::EEXIST, Errno::EADDRINUSE
114
+ File.unlink ENV["SYSLOG_SOCKET"]
115
+ retry
116
+ end
117
+
118
+ loop do
119
+ begin
120
+ msg = s.recvmsg.first
121
+ if msg =~ %r{postfix/.* delay=(\d+(\.\d+)?), .* dsn=(\d+\.\d+\.\d+), status=(\w+)}
122
+ delay = $1.to_f
123
+ dsn = $3
124
+ status = $4
125
+
126
+ if status == "bounced" or status == "sent"
127
+ delays.observe({dsn: dsn, status: status}, delay)
128
+ end
129
+
130
+ messages.increment(type: "delay")
131
+ elsif msg =~ %r{postfix/smtpd\[\d+\]: connect from }
132
+ connects.increment({})
133
+ active.send(:synchronize) { active.set({}, active.get({}) || 0 + 1) }
134
+ messages.increment(type: "connect")
135
+ elsif msg =~ %r{postfix/smtpd\[\d+\]: disconnect from }
136
+ active.send(:synchronize) do
137
+ new = (active.get({}) || 0) - 1
138
+ # If we start running mid-stream,
139
+ # we might end up seeing more
140
+ # disconnects than connections,
141
+ # which would be confusing
142
+ new = 0 if new < 0
143
+ active.set({}, new)
144
+ end
145
+ messages.increment(type: "disconnect")
146
+ elsif msg =~ %r{postfix/smtpd\[\d+\]: [A-F0-9]+: client=}
147
+ incoming.increment(dsn: "2.0.0", status: "queued")
148
+ messages.increment(type: "queued")
149
+ elsif msg =~ %r{postfix/smtpd\[\d+\]: NOQUEUE: reject: RCPT from \S+: \d{3} (\d+\.\d+\.\d+) }
150
+ incoming.increment(dsn: $1, status: "rejected")
151
+ messages.increment(type: "noqueue")
152
+ else
153
+ messages.increment(type: "ignored")
154
+ end
155
+ rescue StandardError => ex
156
+ $stderr.puts "Error while receiving postfix logs: #{ex.message} (#{ex.class})"
157
+ $stderr.puts ex.backtrace.map { |l| " #{l}" }.join("\n")
158
+ log_errors.increment(class: ex.class.to_s)
159
+ sleep 1
160
+ end
161
+ end
162
+ end
163
+ end
164
+
165
+ app = Rack::Builder.new
166
+ app.use Rack::Deflater, if: ->(_, _, _, body) { body.any? && body[0].length > 512 }
167
+ app.use Prometheus::Middleware::Exporter
168
+ app.run ->(env) { [404, {'Content-Type' => 'text/plain'}, ['NOPE NOPE NOPE NOPE']] }
169
+
170
+ logger = Logger.new($stderr)
171
+ logger.level = Logger::INFO
172
+ logger.formatter = proc { |s, t, p, m| "WEBrick: #{m}\n" }
173
+
174
+ # This is the only way to get the Rack-mediated webrick to listen on both
175
+ # INADDR_ANY and IN6ADDR_ANY on libcs that don't support getaddrinfo("*")
176
+ # (ie musl-libc). Setting `Host: '*'` barfs on the above-mentioned buggy(?)
177
+ # libcs, `Host: '::'` fails on newer rubies (because they use
178
+ # setsockopt(V6ONLY) by default), and with RACK_ENV at its default of
179
+ # "development", it only listens on localhost. And even *this* only works
180
+ # on Rack 2, because before that the non-development default listen address
181
+ # was "0.0.0.0"!
182
+ ENV['RACK_ENV'] = "none"
183
+ Rack::Handler::WEBrick.run app, Port: 9154, Logger: logger, AccessLog: []
@@ -0,0 +1,32 @@
1
+ begin
2
+ require 'git-version-bump'
3
+ rescue LoadError
4
+ nil
5
+ end
6
+
7
+ Gem::Specification.new do |s|
8
+ s.name = "postfix-exporter"
9
+
10
+ s.version = GVB.version rescue "0.0.0.1.NOGVB"
11
+ s.date = GVB.date rescue Time.now.strftime("%Y-%m-%d")
12
+
13
+ s.platform = Gem::Platform::RUBY
14
+
15
+ s.summary = "Export Prometheus statistics for a Postfix server"
16
+
17
+ s.authors = ["Matt Palmer"]
18
+ s.email = ["matt.palmer@discourse.org"]
19
+ s.homepage = "https://github.com/discourse/postfix-exporter"
20
+
21
+ s.files = `git ls-files -z`.split("\0").reject { |f| f =~ /^(G|spec|Dockerfile|Rakefile)/ }
22
+ s.executables = ["postfix-exporter"]
23
+
24
+ s.required_ruby_version = ">= 2.1.0"
25
+
26
+ s.add_runtime_dependency 'prometheus-client', '~> 0.7', '< 0.10'
27
+ s.add_runtime_dependency 'rack', '~> 2.0'
28
+
29
+ s.add_development_dependency 'bundler'
30
+ s.add_development_dependency 'github-release'
31
+ s.add_development_dependency 'rake'
32
+ end
metadata ADDED
@@ -0,0 +1,123 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: postfix-exporter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Matt Palmer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-10-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: prometheus-client
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.7'
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '0.10'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '0.7'
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '0.10'
33
+ - !ruby/object:Gem::Dependency
34
+ name: rack
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '2.0'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '2.0'
47
+ - !ruby/object:Gem::Dependency
48
+ name: bundler
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ - !ruby/object:Gem::Dependency
62
+ name: github-release
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ type: :development
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ - !ruby/object:Gem::Dependency
76
+ name: rake
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ type: :development
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ description:
90
+ email:
91
+ - matt.palmer@discourse.org
92
+ executables:
93
+ - postfix-exporter
94
+ extensions: []
95
+ extra_rdoc_files: []
96
+ files:
97
+ - ".gitignore"
98
+ - README.md
99
+ - bin/postfix-exporter
100
+ - postfix-exporter.gemspec
101
+ homepage: https://github.com/discourse/postfix-exporter
102
+ licenses: []
103
+ metadata: {}
104
+ post_install_message:
105
+ rdoc_options: []
106
+ require_paths:
107
+ - lib
108
+ required_ruby_version: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: 2.1.0
113
+ required_rubygems_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ requirements: []
119
+ rubygems_version: 3.0.3
120
+ signing_key:
121
+ specification_version: 4
122
+ summary: Export Prometheus statistics for a Postfix server
123
+ test_files: []