mtr_monitor 0.18.2 → 0.19.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/Makefile +43 -0
- data/README.md +27 -62
- data/Rakefile +29 -26
- data/docker-compose.yml +21 -2
- data/docs/provision-new-ec2-machine.md +53 -0
- data/http_endpoint/Dockerfile +3 -0
- data/http_endpoint/nginx.conf +26 -0
- data/lib/mtr_monitor.rb +7 -0
- data/lib/mtr_monitor/version.rb +1 -1
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aeabbe48387119e1600b78288faf7a8020eabb01
|
4
|
+
data.tar.gz: bfd527edfa5ff5269ef72763d7521e431de43624
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: edbf53bf490b0714e699b6f57cc40fb3f01058822cc5ea1ab0f15dbbf0c5ab142fb14261dd1c61f5cb09f08dd0190a17c4393320af5d589b89ce11bc5887ddfc
|
7
|
+
data.tar.gz: 7b98dd952a32caeadf513029bc9364164c103a6f39140a47b816dc033fedd5b81e742dd1ad749de113fe98130f77d222de757c7cd1ee3dc79063b8a79802b9a1
|
data/Gemfile.lock
CHANGED
data/Makefile
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
.PHONY: publish-gem gem-publish monitor-build monitor-push http-endpoint-build http-endpoint-push deploy-us-east-1 deploy-us-west-1 deploy-us-west-2
|
2
|
+
|
3
|
+
gem-build:
|
4
|
+
gem install bundler
|
5
|
+
bundle install
|
6
|
+
chmod 0600 /home/runner/.gem/credentials
|
7
|
+
gem build mtr_monitor.gemspec
|
8
|
+
|
9
|
+
gem-publish: gem-build
|
10
|
+
gem push mtr_monitor-*
|
11
|
+
|
12
|
+
monitor-build:
|
13
|
+
docker build -t renderedtext/mtr-monitor .
|
14
|
+
|
15
|
+
monitor-push: monitor-build
|
16
|
+
docker push renderedtext/mtr-monitor
|
17
|
+
|
18
|
+
http-endpoint-build:
|
19
|
+
cd http_endpoint && docker build -t renderedtext/mtr-nginx .
|
20
|
+
|
21
|
+
http-endpoint-push: http-endpoint-build
|
22
|
+
docker push renderedtext/mtr-nginx
|
23
|
+
|
24
|
+
deploy-us-east-1:
|
25
|
+
NAME="us-east-1-to-hetzner" \
|
26
|
+
SERVER="ec2.mtr-monitor.us-east-1.semaphoreci.com" \
|
27
|
+
HOSTNAME="mtr-monitor.us-east-1.semaphoreci.com" \
|
28
|
+
MTR_OPTIONS="-z -c 50 -f 12" \
|
29
|
+
bundle exec rake deploy
|
30
|
+
|
31
|
+
deploy-us-west-1:
|
32
|
+
NAME="us-west-1-to-hetzner" \
|
33
|
+
SERVER="ec2.mtr-monitor.us-west-1.semaphoreci.com" \
|
34
|
+
HOSTNAME="mtr-monitor.us-west-1.semaphoreci.com" \
|
35
|
+
MTR_OPTIONS="-z -c 50 -f 12" \
|
36
|
+
bundle exec rake deploy
|
37
|
+
|
38
|
+
deploy-us-west-2:
|
39
|
+
NAME="us-west-2-to-hetzner" \
|
40
|
+
SERVER="ec2.mtr-monitor.us-west-2.semaphoreci.com" \
|
41
|
+
HOSTNAME="mtr-monitor.us-west-2.semaphoreci.com" \
|
42
|
+
MTR_OPTIONS="-z -c 50 -f 12" \
|
43
|
+
bundle exec rake deploy
|
data/README.md
CHANGED
@@ -6,6 +6,9 @@ In December 2017, Hetzner, our hosting provider for the Build Platform, had a
|
|
6
6
|
major network incident that lasted for almost a whole week. Our users were
|
7
7
|
rightly frustrated.
|
8
8
|
|
9
|
+
You can find more information about the incident
|
10
|
+
[in our public Post Mortem](http://semaphoreci.com/blog/2018/01/18/dec-12-17-network-incident-report.html).
|
11
|
+
|
9
12
|
To prevent and monitor these situation in the future, we have set up a
|
10
13
|
transatlantic monitoring system based on MTR reports and Curl-ing important
|
11
14
|
vendors for our platform such are GitHub and DockerHub. This system should
|
@@ -15,34 +18,42 @@ DockerHub).
|
|
15
18
|
This project is part of the effort to have a readily available MTR reports
|
16
19
|
before, during and after incidents, that we can send to Hetzner.
|
17
20
|
|
18
|
-
The
|
19
|
-
|
20
|
-
|
21
|
+
The project consists of two parts. A MTR monitor that continiously tests the
|
22
|
+
quality of the network by running `mtr` from both sides of the Atlantic, and
|
23
|
+
CURL monitor that continiously tries to eastablish a HTTPS connection to the
|
24
|
+
other side of the Atlantic.
|
25
|
+
|
26
|
+
MTR reports are generated every 5 minutes and uploaded to an S3 bucket. Results
|
27
|
+
of CURL tests are displayed on the
|
28
|
+
[Platform — Network](https://semaphore.grafana.net/dashboard/db/platform-network?refresh=10s&orgId=1)
|
29
|
+
Grafana dashboard and are connected to PagerDuty based alerts.
|
21
30
|
|
22
31
|
Currently, we have the following routes covered:
|
23
32
|
|
24
33
|
- Germany(Hetzner) -> AWS US East 1 (part of Job Runner)
|
25
34
|
- Germany(Hetzner) -> AWS US West 1 (part of Job Runner)
|
26
35
|
- Germany(Hetzner) -> AWS US West 2 (part of Job Runner)
|
27
|
-
- Germany(Hetzner) -> GitHub (part of Job Runner)
|
28
|
-
- Germany(Hetzner) -> DockerHub (part of Job Runner)
|
29
|
-
- Germany(Hetzner) -> Stripe (part of Job Runner)
|
30
|
-
- Germany(Hetzner) -> SemaphoreCI (part of Job Runner)
|
31
36
|
- AWS US East 1 -> Builder sb1 in Hetzner (standalone AWS instance with Docker container)
|
32
37
|
- AWS US West 1 -> Builder sb1 in Hetzner (standalone AWS instance with Docker container)
|
33
38
|
- AWS US West 2 -> Builder sb1 in Hetzner (standalone AWS instance with Docker container)
|
34
39
|
|
35
|
-
|
36
|
-
|
37
|
-
dashboard on Grafana.
|
40
|
+
The tests from Germany are executed from every Builder machine, where this
|
41
|
+
project is injected as a gem.
|
38
42
|
|
39
|
-
The US based MTR monitors
|
43
|
+
The DNS records of the US based MTR monitors are the following:
|
40
44
|
|
41
45
|
- `mtr-monitor.us-east-1.semaphoreci.com`
|
42
46
|
- `mtr-monitor.us-west-1.semaphoreci.com`
|
43
47
|
- `mtr-monitor.us-west-2.semaphoreci.com`
|
44
48
|
|
45
|
-
|
49
|
+
These records point to the Load Balancer. If you want to SSH into the machines,
|
50
|
+
use the following commands:
|
51
|
+
|
52
|
+
- `ssh ubuntu@ec2.mtr-monitor.us-east-1.semaphoreci.com`
|
53
|
+
- `ssh ubuntu@ec2.mtr-monitor.us-west-1.semaphoreci.com`
|
54
|
+
- `ssh ubuntu@ec2.mtr-monitor.us-west-2.semaphoreci.com`
|
55
|
+
|
56
|
+
To create a new MTR monitor follow [this guide](docs/provision-new-ec2-machine.md).
|
46
57
|
|
47
58
|
## Location of the generated MTR reports
|
48
59
|
|
@@ -163,16 +174,10 @@ To update the version used in Job Runner:
|
|
163
174
|
The MTR monitor can be used as a standalone Docker container. This is our
|
164
175
|
current approach for monitors that are hitting Germany from the United States.
|
165
176
|
|
166
|
-
To run a standalone MTR monitor, run the following command:
|
167
|
-
|
168
|
-
``` bash
|
169
|
-
docker run --name mtr-monitor -d -v /var/log/mtr:/var/log/mtr -e NAME=<> -e DOMAIN=<> -e MTR_OPTIONS=<> -e S3_BUCKET=<> -e AWS_ACCESS_KEY_ID=<> -e AWS_SECRET_ACCESS_KEY=<> -e SLEEP_TIME=<> renderedtext/mtr_monitor
|
170
|
-
```
|
171
|
-
|
172
177
|
By default, the containers running on us-east-1, us-west-1, and us-west-2 are
|
173
178
|
automatically deployed on every merge into master in for this repository.
|
174
179
|
|
175
|
-
The
|
180
|
+
The container on the ec2 machines will trigger a MTR report generation every 5
|
176
181
|
minutes. Every time a Report is generated the following is executed:
|
177
182
|
|
178
183
|
- a new MTR report is generate on your local system under the `/var/log/mtr` directory
|
@@ -192,49 +197,9 @@ MTR hops are also submitted to Grafana. Based on these metrics you can observe
|
|
192
197
|
the packet loss, avg, best, and worst latency on the network. For more
|
193
198
|
information read the code in `lib/mtr_monitor/metrics.rb`.
|
194
199
|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
14.04 operating system.
|
199
|
-
|
200
|
-
2. SSH into the machine with the newly generated SSH keypair.
|
201
|
-
|
202
|
-
3. Add RT developers to the authorized keys file. For a list of public keys,
|
203
|
-
refer to `s3://renderedtext-secrets/stg1-semaphore/authorized-keys`.
|
204
|
-
|
205
|
-
4. Install docker. Run `curl https://get.docker.com | curl`.
|
206
|
-
|
207
|
-
5. Add the `ubuntu` user to docker group. `sudo usermod -aG docker ubuntu`
|
208
|
-
|
209
|
-
6. Re-login into the SSH session.
|
210
|
-
|
211
|
-
7. Pull and Run the MTR monitor:
|
212
|
-
|
213
|
-
``` bash
|
214
|
-
docker run --name mtr-monitor -d -v /var/log/mtr:/var/log/mtr -e NAME=<> -e DOMAIN=<> -e MTR_OPTIONS=<> -e S3_BUCKET=<> -e AWS_ACCESS_KEY_ID=<> -e AWS_SECRET_ACCESS_KEY=<> -e SLEEP_TIME=<> renderedtext/mtr_monitor
|
215
|
-
```
|
216
|
-
|
217
|
-
If you want to keep this machine permanently, add it to the list of continuously
|
218
|
-
deployed servers.
|
219
|
-
|
220
|
-
#### Continuously deploying MTR monitor to a EC2 machine
|
221
|
-
|
222
|
-
To deploy MTR monitor as docker container within EC2 machine:
|
223
|
-
1. Make sure Docker is installed:
|
224
|
-
```
|
225
|
-
curl https://get.docker.com | sudo bash
|
226
|
-
```
|
227
|
-
|
228
|
-
2. Run deploy rake task in [Rakefile](https://github.com/renderedtext/mtr-monitor/blob/master/Rakefile#L8) by passing server IP address like:
|
229
|
-
```
|
230
|
-
SERVER=1.2.3.4 bundle exec rake deploy
|
231
|
-
```
|
232
|
-
|
233
|
-
#### Set up Alerts and Monitoring for a MTR monitor
|
234
|
-
|
235
|
-
Each deployed MTR monitor needs to has its own panel on grafana [Platform - Network](https://semaphore.grafana.net/dashboard/db/platform-network?orgId=1) dashboard.
|
236
|
-
|
237
|
-
Make sure to setup BuildServers alert.
|
200
|
+
This is deployed as a docker-compose group of docker images. One docker images
|
201
|
+
generates the MTR reports, while the other onw exposes an nginx server that
|
202
|
+
responsd yes to incomming requests.
|
238
203
|
|
239
204
|
### MTR incident tracing
|
240
205
|
|
data/Rakefile
CHANGED
@@ -5,35 +5,38 @@ RSpec::Core::RakeTask.new(:spec)
|
|
5
5
|
|
6
6
|
task :default => :spec
|
7
7
|
|
8
|
-
desc "
|
8
|
+
desc "Generic way to deploy mtr-monitor to a server"
|
9
9
|
task :deploy do
|
10
10
|
server = ENV.fetch("SERVER")
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
12
|
+
File.open(".env", "w") do |f|
|
13
|
+
f.write("LOGDNA_INGESTION_KEY=#{ENV.fetch("LOGDNA_INGESTION_KEY")}\n")
|
14
|
+
f.write("DOMAIN=#{ENV.fetch("DOMAIN")}\n")
|
15
|
+
f.write("WATCHMAN_HOST=#{ENV.fetch("WATCHMAN_HOST")}\n")
|
16
|
+
f.write("SLEEP_TIME=#{ENV.fetch("SLEEP_TIME")}\n")
|
17
|
+
f.write("AWS_SECRET_ACCESS_KEY=#{ENV.fetch("AWS_SECRET_ACCESS_KEY")}\n")
|
18
|
+
f.write("AWS_ACCESS_KEY_ID=#{ENV.fetch("AWS_ACCESS_KEY_ID")}\n")
|
19
|
+
f.write("S3_BUCKET=#{ENV.fetch("S3_BUCKET")}\n")
|
20
|
+
f.write("MTR_OPTIONS=#{ENV.fetch("MTR_OPTIONS")}\n")
|
21
|
+
f.write("NAME=#{ENV.fetch("NAME")}\n")
|
22
|
+
f.write("DIG_IP_ADDRESS=#{ENV.fetch("DIG_IP_ADDRESS")}\n")
|
23
|
+
f.write("HOSTNAME=#{ENV.fetch("HOSTNAME")}\n")
|
24
|
+
end
|
25
|
+
|
26
|
+
run "scp -o StrictHostKeyChecking=no .env ubuntu@#{server}:/home/ubuntu/"
|
27
|
+
run "scp -o StrictHostKeyChecking=no docker-compose.yml ubuntu@#{server}:/home/ubuntu/"
|
28
|
+
run "ssh -o StrictHostKeychecking=no ubuntu@#{server} 'docker-compose down --rmi all && docker-compose up -d'"
|
29
|
+
run "rm .env"
|
30
|
+
end
|
31
|
+
|
32
|
+
def run(command)
|
33
|
+
puts "\nExecuting: #{command}"
|
34
|
+
|
35
|
+
output = `#{command}`
|
36
36
|
|
37
37
|
puts output
|
38
|
-
|
38
|
+
|
39
|
+
abort if $?.exitstatus != 0
|
40
|
+
|
41
|
+
output
|
39
42
|
end
|
data/docker-compose.yml
CHANGED
@@ -1,5 +1,24 @@
|
|
1
1
|
version: "2.0"
|
2
2
|
|
3
3
|
services:
|
4
|
-
|
5
|
-
|
4
|
+
mtr-monitor:
|
5
|
+
image: renderedtext/mtr-monitor:latest
|
6
|
+
hostname: ${HOSTNAME}
|
7
|
+
volumes:
|
8
|
+
- /var/log/mtr:/var/log/mtr
|
9
|
+
environment:
|
10
|
+
- LOGDNA_INGESTION_KEY
|
11
|
+
- DOMAIN
|
12
|
+
- WATCHMAN_HOST
|
13
|
+
- SLEEP_TIME
|
14
|
+
- AWS_SECRET_ACCESS_KEY
|
15
|
+
- AWS_ACCESS_KEY_ID
|
16
|
+
- S3_BUCKET
|
17
|
+
- MTR_OPTIONS
|
18
|
+
- NAME
|
19
|
+
- DIG_IP_ADDRESS
|
20
|
+
|
21
|
+
nginx:
|
22
|
+
image: renderedtext/mtr-nginx
|
23
|
+
ports:
|
24
|
+
- 80:80
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# Setting up a new EC2 machine for a MTR monitor
|
2
|
+
|
3
|
+
1. Buy a new EC2 machine on AWS. Choose, a `t2-nano` instance type with Ubuntu
|
4
|
+
14.04 operating system. Set up security group that allows access to SSH and
|
5
|
+
HTTP ports. Generated an SSH keypair.
|
6
|
+
|
7
|
+
2. In the ec2 console, name your server by following this scheme
|
8
|
+
`mtr-monitor-<location>`. For example, in Oregon, you should call it
|
9
|
+
`mtr-monitor-us-west-2`.
|
10
|
+
|
11
|
+
3. Create a Route 53 entry for the new server. Use the Public DNS of the server
|
12
|
+
and a CNAME rule in Route 53. Use the following scheme to name your server
|
13
|
+
`ec2.mtr-monitor.<location>.semaphoreci.com`. For example, in oregon, you
|
14
|
+
should use `ec2.mtr-monitor.us-west-2.semaphoreci.com`.
|
15
|
+
|
16
|
+
4. SSH into the machine. `chmod 0400 <pemfile> && ssh -i <pemfile> ubuntu@ec2.mtr-monitor.<location>.semaphoreci.com`
|
17
|
+
|
18
|
+
5. Add RT developers to the authorized keys file. For a list of public keys,
|
19
|
+
refer to `s3://renderedtext-secrets/stg1-semaphore/authorized-keys`.
|
20
|
+
|
21
|
+
6. Install docker. Run `curl https://get.docker.com | sh`.
|
22
|
+
Add the `ubuntu` user to docker group. `sudo usermod -aG docker ubuntu`.
|
23
|
+
Re-login into the SSH session.
|
24
|
+
|
25
|
+
7. Install docker-compose. Run:
|
26
|
+
|
27
|
+
``` bash
|
28
|
+
sudo curl -L https://github.com/docker/compose/releases/download/1.18.0/docker-compose-`uname -s`-`uname -m` -o /usr/local/bin/docker-compose
|
29
|
+
sudo chmod +x /usr/local/bin/docker-compose
|
30
|
+
docker-compose -v
|
31
|
+
```
|
32
|
+
|
33
|
+
8. Provide docker credentials for docker pull. `docker login` as rtrobot.
|
34
|
+
|
35
|
+
9. Add a new deployment target to the Makefile, and trigger the deploy from
|
36
|
+
Semaphore.
|
37
|
+
|
38
|
+
10. Set up a load balancer that exposes the HTTP endpoint of the server to the
|
39
|
+
public. Set the name of the load balancer to `mtr-monitor-<location>`. Choose
|
40
|
+
HTTPS to HTTP configuration. Create a new security group with the same name
|
41
|
+
that allows 443 port from all sources. Choose the standard semaphoreci.com
|
42
|
+
certificate from the list, and set up the default health check. Finally, add
|
43
|
+
the ec2 machine to the load balancer
|
44
|
+
|
45
|
+
11. Set up a DNS enpoint in Route 53 for the new load balancer `mtr-monitor.<location>.semaphoreci.com`.
|
46
|
+
|
47
|
+
#### Set up Alerts and Monitoring for a MTR monitor
|
48
|
+
|
49
|
+
Each deployed MTR monitor needs to has its own panel on grafana
|
50
|
+
[Platform - Network](https://semaphore.grafana.net/dashboard/db/platform-network?orgId=1)
|
51
|
+
dashboard.
|
52
|
+
|
53
|
+
Make sure to setup BuildServers alert.
|
@@ -0,0 +1,26 @@
|
|
1
|
+
worker_processes 1;
|
2
|
+
|
3
|
+
events {
|
4
|
+
worker_connections 1024;
|
5
|
+
}
|
6
|
+
|
7
|
+
http {
|
8
|
+
server {
|
9
|
+
listen 80;
|
10
|
+
listen [::]:80;
|
11
|
+
|
12
|
+
client_body_timeout 12;
|
13
|
+
client_header_timeout 12;
|
14
|
+
keepalive_timeout 15;
|
15
|
+
send_timeout 10;
|
16
|
+
|
17
|
+
client_body_buffer_size 10K;
|
18
|
+
client_header_buffer_size 1k;
|
19
|
+
client_max_body_size 1m;
|
20
|
+
large_client_header_buffers 2 1k;
|
21
|
+
|
22
|
+
location / {
|
23
|
+
return 200 'yes';
|
24
|
+
}
|
25
|
+
}
|
26
|
+
}
|
data/lib/mtr_monitor.rb
CHANGED
@@ -89,6 +89,8 @@ module MtrMonitor
|
|
89
89
|
gtt
|
90
90
|
elsif mtr_log.include?("ntt.net")
|
91
91
|
ntt
|
92
|
+
elsif mtr_log.include?("core-backbone.com")
|
93
|
+
core_backbone
|
92
94
|
else
|
93
95
|
unknown
|
94
96
|
end
|
@@ -109,6 +111,11 @@ module MtrMonitor
|
|
109
111
|
@logger.info("Network provider: NTT -- Emitted to LogDNA")
|
110
112
|
end
|
111
113
|
|
114
|
+
def core_backbone
|
115
|
+
@logdna.log("Core Backbone")
|
116
|
+
@logger.info("Network provider: Core Backbone -- Emitted to LogDNA")
|
117
|
+
end
|
118
|
+
|
112
119
|
def unknown
|
113
120
|
@logdna.log("Unknown network provider ")
|
114
121
|
@logger.info("Network provider: Unknown -- Emitted to LogDNA")
|
data/lib/mtr_monitor/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mtr_monitor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.19.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- RenderedText DevOps Team
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-01-
|
11
|
+
date: 2018-01-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: logdna
|
@@ -96,13 +96,17 @@ files:
|
|
96
96
|
- Gemfile
|
97
97
|
- Gemfile.lock
|
98
98
|
- LICENSE.txt
|
99
|
+
- Makefile
|
99
100
|
- README.md
|
100
101
|
- Rakefile
|
101
102
|
- bin/console
|
102
103
|
- bin/setup
|
103
104
|
- docker-compose.yml
|
105
|
+
- docs/provision-new-ec2-machine.md
|
104
106
|
- exe/mtr-incident-trace
|
105
107
|
- exe/mtr_report
|
108
|
+
- http_endpoint/Dockerfile
|
109
|
+
- http_endpoint/nginx.conf
|
106
110
|
- lib/mtr_monitor.rb
|
107
111
|
- lib/mtr_monitor/cleaner.rb
|
108
112
|
- lib/mtr_monitor/hop.rb
|