s3_rotate 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: d0a47b24554da5a2107fbb2c2ac19957f2053916c8d1202e6c1dac94358bfa2d
4
+ data.tar.gz: 50c281cd8b4453e2075570a8e6a1f1a905e865c5d253215fb193b0769b780650
5
+ SHA512:
6
+ metadata.gz: 9995b0a95d18a51414526b46c3670bac87a53c6355c25e50f6785787a198b3f61aa72547b68fceaaa650cbf822a75cf2dbf8b17f7d1f0fddf75ed50e1ef0785c
7
+ data.tar.gz: 725bfa0b4974e2ffca31775954c0c505ec87a2f8ad1560f4e6f045a11d67bab6261d5158e9ea9de3a66d2d8ff4b93663628509a2cee3233d1ee20f5ed30fe688
@@ -0,0 +1,5 @@
1
+ # gems
2
+ *.gem
3
+
4
+ # tmp
5
+ tmp
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --require spec_helper
data/Gemfile ADDED
@@ -0,0 +1,10 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # AWS connection (https://github.com/fog/fog)
4
+ gem 'fog-aws', '~> 3.5.2'
5
+
6
+ group :development, :test do
7
+ # unit tests
8
+ gem 'rspec'
9
+ gem 'rspec-mocks'
10
+ end
@@ -0,0 +1,55 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ builder (3.2.4)
5
+ diff-lcs (1.3)
6
+ excon (0.71.1)
7
+ fog-aws (3.5.2)
8
+ fog-core (~> 2.1)
9
+ fog-json (~> 1.1)
10
+ fog-xml (~> 0.1)
11
+ ipaddress (~> 0.8)
12
+ fog-core (2.2.0)
13
+ builder
14
+ excon (~> 0.71)
15
+ formatador (~> 0.2)
16
+ mime-types
17
+ fog-json (1.2.0)
18
+ fog-core
19
+ multi_json (~> 1.10)
20
+ fog-xml (0.1.3)
21
+ fog-core
22
+ nokogiri (>= 1.5.11, < 2.0.0)
23
+ formatador (0.2.5)
24
+ ipaddress (0.8.3)
25
+ mime-types (3.3.1)
26
+ mime-types-data (~> 3.2015)
27
+ mime-types-data (3.2019.1009)
28
+ mini_portile2 (2.4.0)
29
+ multi_json (1.14.1)
30
+ nokogiri (1.10.7)
31
+ mini_portile2 (~> 2.4.0)
32
+ rspec (3.9.0)
33
+ rspec-core (~> 3.9.0)
34
+ rspec-expectations (~> 3.9.0)
35
+ rspec-mocks (~> 3.9.0)
36
+ rspec-core (3.9.1)
37
+ rspec-support (~> 3.9.1)
38
+ rspec-expectations (3.9.0)
39
+ diff-lcs (>= 1.2.0, < 2.0)
40
+ rspec-support (~> 3.9.0)
41
+ rspec-mocks (3.9.1)
42
+ diff-lcs (>= 1.2.0, < 2.0)
43
+ rspec-support (~> 3.9.0)
44
+ rspec-support (3.9.2)
45
+
46
+ PLATFORMS
47
+ ruby
48
+
49
+ DEPENDENCIES
50
+ fog-aws (~> 3.5.2)
51
+ rspec
52
+ rspec-mocks
53
+
54
+ BUNDLED WITH
55
+ 1.17.2
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2019 Whova, Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,241 @@
1
+ # S3 Rotate
2
+ `S3 Rotate` provides easy backup rotation management on Amazon AWS S3.
3
+
4
+ `S3 Rotate` was developed to solve the following issues:
5
+ - be able to easily and automatically upload all kind of backup files to S3
6
+ - be able to safely and automatically cleanup old backup files on the local machine to prevent running out of disk space
7
+ - be able to safely and automatically rotate & cleanup old backup files on S3 to keep only what matters
8
+
9
+
10
+
11
+ ## Installation
12
+ In your `Gemfile`:
13
+ ```bash
14
+ gem install s3_rotate
15
+ ```
16
+
17
+ In your `file.rb`
18
+ ```ruby
19
+ require 's3_rotate'
20
+ ```
21
+
22
+
23
+
24
+ ## Use Case
25
+ Let's say you have two services for which you generate daily backups:
26
+ - A Gitlab server, generating daily backups under `/var/opt/gitlab/backups/` with the following format `1578804325_2020_01_11_12.6.2_gitlab_backup.tar` (`timestamp_YYYY_MM_dd_version_gitlab_backup.tar`)
27
+ - A DefectDojo server, generating daily backups under `/data/defect-dojo/backups` with the following format `dojo-2020_01_25.sql` (`dojo-YYYY_MM_dd.tar.gz`)
28
+
29
+ In particular, let's say you have the following on your host machine:
30
+ ```
31
+ $> ls -l /var/opt/gitlab/backups/
32
+ -rw-r--r-- 1 git git 3754987520 Jan 11 20:46 1578804325_2020_01_11_12.6.2_gitlab_backup.tar
33
+ -rw-r--r-- 1 git git 3754936320 Jan 12 20:45 1578890716_2020_01_12_12.6.2_gitlab_backup.tar
34
+ -rw-r--r-- 1 git git 3765555200 Jan 13 20:47 1578977175_2020_01_13_12.6.2_gitlab_backup.tar
35
+ -rw-r--r-- 1 git git 3765207040 Jan 14 20:52 1579063838_2020_01_14_12.6.2_gitlab_backup.tar
36
+ -rw-r--r-- 1 git git 3803136000 Jan 15 20:53 1579150281_2020_01_15_12.6.2_gitlab_backup.tar
37
+ -rw-r--r-- 1 git git 3819448320 Jan 16 20:49 1579236516_2020_01_16_12.6.2_gitlab_backup.tar
38
+ -rw-r--r-- 1 git git 3819223040 Jan 17 20:53 1579323130_2020_01_17_12.6.2_gitlab_backup.tar
39
+ -rw-r--r-- 1 git git 3819151360 Jan 18 20:50 1579409341_2020_01_18_12.6.2_gitlab_backup.tar
40
+ -rw-r--r-- 1 git git 3819499520 Jan 19 20:50 1579495780_2020_01_19_12.6.2_gitlab_backup.tar
41
+ -rw-r--r-- 1 git git 3816120320 Jan 20 20:48 1579582055_2020_01_20_12.6.2_gitlab_backup.tar
42
+ -rw-r--r-- 1 git git 3818106880 Jan 21 20:54 1579668786_2020_01_21_12.6.2_gitlab_backup.tar
43
+ -rw-r--r-- 1 git git 3815270400 Jan 22 20:50 1579754971_2020_01_22_12.6.2_gitlab_backup.tar
44
+ -rw-r--r-- 1 git git 3813365760 Jan 23 20:50 1579841383_2020_01_23_12.6.2_gitlab_backup.tar
45
+ -rw-r--r-- 1 git git 3814205440 Jan 24 20:48 1579927638_2020_01_24_12.6.2_gitlab_backup.tar
46
+ -rw-r--r-- 1 git git 3814236160 Jan 25 20:52 1580014249_2020_01_25_12.6.2_gitlab_backup.tar
47
+ -rw-r--r-- 1 git git 3818567680 Jan 26 20:45 1580100263_2020_01_26_12.6.2_gitlab_backup.tar
48
+
49
+
50
+ $> ls -l /data/defect-dojo/backups
51
+ -rw-r--r-- 1 root root 226529 Jan 25 20:15 dojo-2020_01_25.sql
52
+ -rw-r--r-- 1 root root 226529 Jan 26 20:15 dojo-2020_01_26.sql
53
+ ```
54
+
55
+ If left unmanaged, you may run into a couple of issues:
56
+ - `Number of Backups`: The number of backups will quickly grow, consuming disk space and requiring manual operation to cleanup the old backups
57
+ - `Size of Backups`: The size of each individual backup file will continuously increase, resulting in more frequent manual operations
58
+ - `Hardware Failures`: The backups are currently only located on one machine, making them vulnerable to hardware failures
59
+ - `Additional Backups`: Additional services backups may need to be handled on the long-run, each having a different way to generate backups (different disk location, different naming, ...)
60
+
61
+ The upload feature of `S3 Rotate` will upload to S3 any new backup file for Gitlab, DefectDojo, or anything else.
62
+ The newly uploaded backups are categorized as `daily`.
63
+
64
+ The rotate feature of `S3 Rotate` is more interesting and will do the following:
65
+ - `local rotate`: You can specify how many `local` backups you want to keep. Only the most recent ones within your limit will remain, the other ones will be deleted.
66
+ - `daily rotate`: Every week, one of the `daily` backup on AWS S3 is converted into a `weekly` backup on AWS S3. Additionally, you can specify how many `daily` backups you want to keep. Only the most recent ones within your limit will remain, the other ones will be deleted.
67
+ - `weekly rotate`: Every month, one of the `weekly` backup on AWS S3 is converted into a `monthly` backup on AWS S3. Additionally, you can specify how many `weekly` backups you want to keep. Only the most recent ones within your limit will remain, the other ones will be deleted.
68
+ - `monthly rotate`: You can specify how many `monthly` backups you want to keep. Only the most recent ones within your limit will remain, the other ones will be deleted.
69
+
70
+ For example, if you have the following:
71
+ - one backup every day from January 1st to March 31st
72
+ - only want to keep the 7 most recent backups locally
73
+ - only want to keep the 14 most recent daily backups on AWS S3
74
+ - only want to keep the 4 most recent weekly backups on AWS S3
75
+ - only want to keep the 3 most recent monthly backups on AWS S3
76
+
77
+ `S3 Rotate` will do the following:
78
+ - Every day, your new backup will be uploaded as a new daily backup on AWS S3
79
+ - If you have more than 7 backups locally, the oldest ones will be removed until you got 7 left
80
+ - If your most recent weekly backup is one week apart from one of your daily backup, that daily backup will be promoted into a weekly backup
81
+ - If you have more than 14 daily backups on AWS S3, the oldest ones will be removed until you got 7 left
82
+ - If your most recent monthly backup is one month apart from one of your weekly backup, that weekly backup will be promoted into a monthly backup
83
+ - If you have more than 4 weekly backups on AWS S3, the oldest ones will be removed until you got 4 left
84
+ - If you have more than 3 monthly backups on AWS S3, the oldest ones will be removed until you got 3 left
85
+
86
+ With this configuration, on March 31st, your local server will look like:
87
+ ```
88
+ /var/opt/gitlab/backups/ # 14 most recent local backups
89
+ timestamp-2020_03_31_12.6.2_gitlab_backup.tar
90
+ timestamp-2020_03_30_12.6.2_gitlab_backup.tar
91
+ timestamp-2020_03_29_12.6.2_gitlab_backup.tar
92
+ timestamp-2020_03_28_12.6.2_gitlab_backup.tar
93
+ timestamp-2020_03_27_12.6.2_gitlab_backup.tar
94
+ timestamp-2020_03_26_12.6.2_gitlab_backup.tar
95
+ timestamp-2020_03_25_12.6.2_gitlab_backup.tar
96
+ timestamp-2020_03_24_12.6.2_gitlab_backup.tar
97
+ timestamp-2020_03_23_12.6.2_gitlab_backup.tar
98
+ timestamp-2020_03_22_12.6.2_gitlab_backup.tar
99
+ timestamp-2020_03_21_12.6.2_gitlab_backup.tar
100
+ timestamp-2020_03_20_12.6.2_gitlab_backup.tar
101
+ timestamp-2020_03_19_12.6.2_gitlab_backup.tar
102
+ timestamp-2020_03_18_12.6.2_gitlab_backup.tar
103
+ ```
104
+
105
+ With this configuration, on March 31st, your AWS S3 bucket will look like:
106
+ ```
107
+ bucket/
108
+ gitlab/
109
+ daily/ # 7 most recent daily backups
110
+ 2020-03-31.tar
111
+ 2020-03-30.tar
112
+ 2020-03-29.tar
113
+ 2020-03-28.tar
114
+ 2020-03-27.tar
115
+ 2020-03-26.tar
116
+ 2020-03-25.tar
117
+
118
+ weekly/ # 4 most recent weekly backups
119
+ 2020-03-25.tar
120
+ 2020-03-18.tar
121
+ 2020-03-11.tar
122
+ 2020-03-04.tar
123
+
124
+ monthly/ # 3 most recent local backups
125
+ 2020-03-01.tar
126
+ 2020-02-01.tar
127
+ 2020-01-01.tar
128
+
129
+ defect-dojo/
130
+ [...]
131
+ [...]
132
+ ```
133
+
134
+ ## Usage
135
+ ### S3Rotate::BackupManager.upload
136
+ Prototype:
137
+ ```ruby
138
+ #
139
+ # Upload local backup files to AWS S3
140
+ # Only uploads new backups
141
+ # Only uploads backups as daily backups: use `rotate` to generate the weekly & monthly files
142
+ #
143
+ # @param backup_name String containing the name of the backup to upload
144
+ # @param local_backups_path String containing the path to the directory containing the backups
145
+ # @param date_regex Regex returning the date contained in the filename of each backup
146
+ #
147
+ # @return nothing
148
+ #
149
+ def upload(backup_name, local_backups_path, date_regex=/\d{4}-\d{2}-\d{2}/)
150
+ ```
151
+
152
+ Example:
153
+ ```ruby
154
+ require 's3_rotate'
155
+
156
+ backup_manager = S3Rotate::BackupManager.new(aws_access_key_id, aws_secret_access_key, bucket_name, region)
157
+ backup_manager.upload("defect-dojo-backup", "/var/opt/defect-dojo/backups")
158
+ ```
159
+
160
+ `S3Rotate::BackupManager.upload(backup_name, local_backups_path, date_regex)` uploads the new backups from a local directory to the server.
161
+ - `backup_name`: This is how you want to name your group of backup. This will be used to create a directory on the AWS S3 bucket under which your backup will be stored. It can be anything you want
162
+ - `local_backups_path`: This is the path to the local directory containing your backups to be uploaded
163
+ - `date_regex`: To rotate backups from daily to weekly, and from weekly to monthly, `S3 Backup` needs to determine which date is related to each backup file. This is done by extracting the date information from the filename, using a regex specified in `date_regex`.
164
+
165
+ `date_regex` is the most important part here: without it, `S3 Rotate` does not know when your backup was generated and can not rotate your backups properly. Here are some examples of regex:
166
+ - if your backups are like `1578804325_2020_01_11_12.6.2_gitlab_backup.tar`, you can use `date_regex=/\d{4}-\d{2}-\d{2}/` (this will match `2020_01_11_12`)
167
+ - if your backups are like `1578804325_gitlab_backup.tar`, you can use `date_regex=/(\d+)_gitlab_backup.tar/` (this will match `1578804325`)
168
+
169
+ As of now, `date_regex` can be:
170
+ - any string that can be parsed by `Date.parse`
171
+ - a timestamp
172
+
173
+ ### S3Rotate::BackupManager.rotate
174
+ Prototype:
175
+ ```ruby
176
+ #
177
+ # Rotate files (local, daily, weekly, monthly) and apply maximum limits for each type
178
+ #
179
+ # @param backup_name String containing the name of the backup to rotate
180
+ # @param local_backups_path String containing the path to the directory containing the backups
181
+ # @param max_local Integer specifying the maximum number of local backups to keep
182
+ # @param max_daily Integer specifying the maximum number of daily backups to keep
183
+ # @param max_weekly Integer specifying the maximum number of weekly backups to keep
184
+ # @param max_monthly Integer specifying the maximum number of monthly backups to keep
185
+ #
186
+ # @return nothing
187
+ #
188
+ def rotate(backup_name, local_backups_dir, max_local=3, max_daily=7, max_weekly=4, max_monthly=3)
189
+ ```
190
+
191
+ Example:
192
+ ```ruby
193
+ require 's3_rotate'
194
+
195
+ backup_manager = S3Rotate::BackupManager.new(aws_access_key_id, aws_secret_access_key, bucket_name, region)
196
+ backup_manager.rotate("defect-dojo-backup", "/var/opt/defect-dojo/backups")
197
+ ```
198
+
199
+ `S3Rotate::BackupManager.rotate` handles the backup rotation detailed previously in `Use Case`.
200
+ - `backup_name`: This is how you want to name your group of backup. This will be used to create a directory on the AWS S3 bucket under which your backup will be stored. It can be anything you want
201
+ - `local_backups_path`: This is the path to the local directory containing your backups to be rotated
202
+ - `max_local`: This is the maximum number of local backups you want to keep.
203
+ - `max_daily`: This is the maximum number of AWS S3 daily backups you want to keep.
204
+ - `max_weekly`: This is the maximum number of AWS S3 weekly backups you want to keep.
205
+ - `max_monthly`: This is the maximum number of AWS S3 monthly backups you want to keep.
206
+
207
+
208
+
209
+ ## Tests
210
+ This gem has 100% tests coverage.
211
+
212
+ You can run all the tests with:
213
+ ```bash
214
+ $> bundle exec rspec .
215
+ ```
216
+
217
+
218
+ ## Areas of improvements
219
+ - Days are currently the smallest unit of time, but it could be interesting to provide rotation per hours.
220
+ - Backups stored on S3 only have 3 types of information: the name of the backup, the type of backup (daily, weekly, monthly) and the date of the backup. It could be interesting to retain more information (for example, the version of the service that generated the backup). It is currently possible to do it using the backup name configuration, but it is not optimal in all situations.
221
+ - There is currently no way to disable rotation & cleanup for one unit of time (days, weeks, months), except by setting an arbitrarily high maximum number of files to retain.
222
+ - S3 Rotate only supports AWS S3, while it could be interesting to support additional providers.
223
+
224
+
225
+
226
+ ## Author
227
+ [Simon Ninon](https://github.com/Cylix), for [Whova](https://whova.com)
228
+
229
+
230
+
231
+ ## License
232
+ [MIT License](LICENSE)
233
+
234
+
235
+
236
+ ## Contribute
237
+ 1. Fork
238
+ 2. Create your branch (`git checkout -b my-branch`)
239
+ 3. Commit your new features (`git commit -am 'New features'`)
240
+ 4. Push (`git push origin my-branch`)
241
+ 5. Make a `Pull request`
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 's3_rotate'
4
+
5
+ backup_manager = S3Rotate::BackupManager.new('aws_access_key_id', 'aws_secret_access_key', 'bucket_name', 'region')
6
+ backup_manager.upload("defect-dojo-backup", "/var/opt/defect-dojo/backups")
7
+ backup_manager.rotate("defect-dojo-backup", "/var/opt/defect-dojo/backups")
@@ -0,0 +1,4 @@
1
+ require 's3_rotate/aws/s3_client'
2
+ require 's3_rotate/core/backup_manager'
3
+ require 's3_rotate/core/backup_rotator'
4
+ require 's3_rotate/core/backup_uploader'
@@ -0,0 +1,93 @@
1
+ require 'fog-aws'
2
+
3
+ module S3Rotate
4
+
5
+ class S3Client
6
+
7
+ # attributes
8
+ attr_accessor :access_key
9
+ attr_accessor :access_secret
10
+ attr_accessor :bucket_name
11
+ attr_accessor :region
12
+ attr_accessor :connection
13
+
14
+ #
15
+ # Initialize a new S3Client instance.
16
+ #
17
+ # @param key String representing the AWS ACCESS KEY ID.
18
+ # @param secret String representing the AWS ACCESS KEY SECRET.
19
+ # @param bucket String representing the name of the bucket to use.
20
+ # @param region String representing the region to conect to.
21
+ #
22
+ # @return the newly instanciated object.
23
+ #
24
+ def initialize(key, secret, bucket, region)
25
+ @access_key = key
26
+ @access_secret = secret
27
+ @bucket_name = bucket
28
+ @region = region
29
+ end
30
+
31
+ #
32
+ # Get the S3 bucket.
33
+ #
34
+ # @return Fog::Storage::AWS::Directory instance.
35
+ #
36
+ def bucket
37
+ @bucket ||= connection.directories.get(bucket_name)
38
+ end
39
+
40
+ #
41
+ # Get the S3 connection.
42
+ #
43
+ # @return Fog::Storage instance.
44
+ #
45
+ def connection
46
+ @connection ||= Fog::Storage.new(provider: 'AWS', aws_access_key_id: access_key, aws_secret_access_key: access_secret, region: region)
47
+ end
48
+
49
+ #
50
+ # Get the list of remote backups for a specific `backup_name` and `type`
51
+ #
52
+ # @param backup_name String containing the name of the backup to retrieve
53
+ # @param type String, one of `daily`, `weekly` of `monthly`
54
+ #
55
+ # @return Fog::Storage::AWS::Directory instance.
56
+ #
57
+ def remote_backups(backup_name, type)
58
+ connection.directories.get(bucket_name, prefix: "/#{backup_name}/#{type}")
59
+ end
60
+
61
+ #
62
+ # Check if a remote backup exists
63
+ #
64
+ # @param backup_name String containing the name of the backup to retrieve
65
+ # @param backup_date Date representing the date of the backup
66
+ # @param type String, one of `daily`, `weekly` of `monthly`
67
+ # @param extension Optional, String containing the file extension of the backup
68
+ #
69
+ # @return Boolean, True or False, whether the remote backup exists
70
+ #
71
+ def exists?(backup_name, backup_date, type, extension=nil)
72
+ connection.directories.get(bucket_name, prefix: "/#{backup_name}/#{type}/#{backup_date.to_s}#{extension}").files.any?
73
+ end
74
+
75
+ #
76
+ # Upload raw data to AWS S3
77
+ #
78
+ # @param backup_name String containing the name of the backup to upload
79
+ # @param backup_date Date representing the date of the backup
80
+ # @param type String representing the type of backup being uploaded, one of "daily", "weekly" or "monthly"
81
+ # @param extension String containing the file extension of the backup (nil if not needed)
82
+ # @param data String containing the data to be uploaded
83
+ #
84
+ # @return created S3 Bucket File
85
+ #
86
+ def upload(backup_name, backup_date, type, extension, data)
87
+ # 104857600 bytes => 100 megabytes
88
+ bucket.files.create(key: "/#{backup_name}/#{type}/#{backup_date.to_s}#{extension}", body: data, multipart_chunk_size: 104857600)
89
+ end
90
+
91
+ end
92
+
93
+ end