s3snapshot 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +7 -0
- data/.loadpath +5 -0
- data/.project +17 -0
- data/Gemfile +4 -0
- data/Gemfile.lock.bak +34 -0
- data/Rakefile +11 -0
- data/Readme.markdown +44 -0
- data/bin/s3snapshot +3 -0
- data/lib/s3snapshot/backup_manager.rb +280 -0
- data/lib/s3snapshot/cli.rb +155 -0
- data/lib/s3snapshot/dir_download.rb +81 -0
- data/lib/s3snapshot/dir_upload.rb +65 -0
- data/lib/s3snapshot/sync_op.rb +63 -0
- data/lib/s3snapshot/test_loader.rb +52 -0
- data/lib/s3snapshot/time_factory.rb +27 -0
- data/lib/s3snapshot/version.rb +3 -0
- data/lib/s3snapshot.rb +4 -0
- data/s3snapshot.gemspec +24 -0
- metadata +130 -0
data/.gitignore
ADDED
data/.loadpath
ADDED
data/.project
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<projectDescription>
|
3
|
+
<name>s3snapshot</name>
|
4
|
+
<comment></comment>
|
5
|
+
<projects>
|
6
|
+
</projects>
|
7
|
+
<buildSpec>
|
8
|
+
<buildCommand>
|
9
|
+
<name>org.rubypeople.rdt.core.rubybuilder</name>
|
10
|
+
<arguments>
|
11
|
+
</arguments>
|
12
|
+
</buildCommand>
|
13
|
+
</buildSpec>
|
14
|
+
<natures>
|
15
|
+
<nature>org.rubypeople.rdt.core.rubynature</nature>
|
16
|
+
</natures>
|
17
|
+
</projectDescription>
|
data/Gemfile
ADDED
data/Gemfile.lock.bak
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
s3snapshot (0.0.1)
|
5
|
+
fog (~> 0.8.1)
|
6
|
+
thor (~> 0.14.6)
|
7
|
+
|
8
|
+
GEM
|
9
|
+
remote: http://rubygems.org/
|
10
|
+
specs:
|
11
|
+
builder (3.0.0)
|
12
|
+
excon (0.6.3)
|
13
|
+
fog (0.8.1)
|
14
|
+
builder
|
15
|
+
excon (~> 0.6.1)
|
16
|
+
formatador (>= 0.1.3)
|
17
|
+
json
|
18
|
+
mime-types
|
19
|
+
net-ssh (>= 2.1.3)
|
20
|
+
nokogiri (>= 1.4.4)
|
21
|
+
ruby-hmac
|
22
|
+
formatador (0.1.3)
|
23
|
+
json (1.5.1)
|
24
|
+
mime-types (1.16)
|
25
|
+
net-ssh (2.1.4)
|
26
|
+
nokogiri (1.4.4)
|
27
|
+
ruby-hmac (0.4.0)
|
28
|
+
thor (0.14.6)
|
29
|
+
|
30
|
+
PLATFORMS
|
31
|
+
ruby
|
32
|
+
|
33
|
+
DEPENDENCIES
|
34
|
+
s3snapshot!
|
data/Rakefile
ADDED
data/Readme.markdown
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# Overview
|
2
|
+
|
3
|
+
This gem is designed to sync an immutable directory to a timestamped prefix on Amazon S3. In principal it is similar to time machine for Mac. We use this utility to create snapshots of our Cassandra data.
|
4
|
+
|
5
|
+
# Installing
|
6
|
+
|
7
|
+
gem install s3snapshot
|
8
|
+
|
9
|
+
# Operations
|
10
|
+
|
11
|
+
* Backup a directory
|
12
|
+
* Restore a specific snapshot time to a directory
|
13
|
+
* Restore latest complete snapshot to a directory
|
14
|
+
* List all prefixes
|
15
|
+
* List all times for prefixes
|
16
|
+
* Clean incomplete uploads (Use wisely, can delete a backup in progress)
|
17
|
+
* Perform rolling cleanup. Can keep a user defined number of daily and weekly backups with user specified day. Ally days and weeks are deltas calculated from the timestamp of the last successful backup to s3.
|
18
|
+
|
19
|
+
# Algorithm
|
20
|
+
|
21
|
+
Below is a general outline on how the plugin was designed to work. No meta data is stored on S3. Every time this plugin is launched it performs an analysis of S3 to ensure it is always using a correct state of backups. 2 instances should never access the same prefix concurrently, this could cause issues with data consistency.
|
22
|
+
|
23
|
+
## Snapshot path
|
24
|
+
|
25
|
+
All snapshot paths are of the format [prefix]/[snapshot utc time]. In our usage, we typically use the prefix of node+directory, such as test-cass-west-1_snapshot
|
26
|
+
|
27
|
+
## Rolling cleanup
|
28
|
+
|
29
|
+
Capture the current time UTC, and truncate to 00:00 hours
|
30
|
+
|
31
|
+
Remove all incomplete backups before start time
|
32
|
+
|
33
|
+
Analyze all time stamps, if more than one backup is present per day, only keep the latest complete backup for that day
|
34
|
+
|
35
|
+
For all backups, if older than the max weekly, remove it , of between oldest weekly and oldest daily, only keep it if it falls on the day specified
|
36
|
+
|
37
|
+
|
38
|
+
# Notes
|
39
|
+
|
40
|
+
Occasionally a cleanup operation will miss a time stamp. From my testing this appears to be due to the amazon eventual consistency, and the timestamp not being returned on a delim search. On the next run it is usually deleted
|
41
|
+
|
42
|
+
#Future features
|
43
|
+
|
44
|
+
* Use native s3 copying for unmodified files (similar to s3sync). If a file exists in the previous completed backup and has not changed, copy it on the new path in s3, this will avoid the need to upload unmodified files.
|
data/bin/s3snapshot
ADDED
@@ -0,0 +1,280 @@
|
|
1
|
+
require 's3snapshot/time_factory'
|
2
|
+
require 'dictionary'
|
3
|
+
|
4
|
+
module S3snapshot
|
5
|
+
#
|
6
|
+
#Handles retrieving all current backups and performing operations on them. This object is stateful, once snapshots are loaded it is cached.
|
7
|
+
#Create a new instance or call clear_snapshots to force a reload from aws
|
8
|
+
#
|
9
|
+
class BackupManager < SyncOp
|
10
|
+
|
11
|
+
#Number of seconds in a day
|
12
|
+
SECONDS_DAY = 60*60*24
|
13
|
+
SECONDS_WEEK = SECONDS_DAY * 7
|
14
|
+
|
15
|
+
#Instance variable of snapshots by prefix. This is cached because loading this information is expensive and slow
|
16
|
+
@snapshots = nil
|
17
|
+
|
18
|
+
|
19
|
+
def initialize(aws_id, aws_key, bucket_name)
|
20
|
+
super(aws_id, aws_key, bucket_name)
|
21
|
+
end
|
22
|
+
|
23
|
+
##
|
24
|
+
#Get all prefixes in the bucket
|
25
|
+
#
|
26
|
+
def prefixes
|
27
|
+
bucket.files.all(:delimiter => "/").common_prefixes
|
28
|
+
end
|
29
|
+
|
30
|
+
##
|
31
|
+
# returns a map of snapshots. The key is the time, the value is a boolean signaling if it's complete
|
32
|
+
##
|
33
|
+
def snapshots(prefix)
|
34
|
+
|
35
|
+
prefix_snap = get_snapshot(prefix)
|
36
|
+
|
37
|
+
unless prefix_snap.nil?
|
38
|
+
return prefix_snap
|
39
|
+
end
|
40
|
+
|
41
|
+
prefix_snap = Dictionary.new
|
42
|
+
|
43
|
+
timestamps(prefix).each do |timestamp|
|
44
|
+
|
45
|
+
time = Time.parse(timestamp)
|
46
|
+
|
47
|
+
prefix_snap[time] = read_complete?(prefix, time)
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
set_snapshot(prefix, prefix_snap)
|
52
|
+
|
53
|
+
prefix_snap
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
##
|
58
|
+
#Get the latest completed backup for the given prefix. Will return nil if one isn't available
|
59
|
+
##
|
60
|
+
def latest(prefix)
|
61
|
+
snapshots(prefix).each do |time, complete|
|
62
|
+
if complete
|
63
|
+
return time
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
nil
|
68
|
+
end
|
69
|
+
|
70
|
+
##
|
71
|
+
#clear the local cached copy of the snapshot
|
72
|
+
#
|
73
|
+
def clear_snapshots(prefix)
|
74
|
+
@snapshots[prefix] = nil
|
75
|
+
end
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
#Returns true if the backup is complete, false otherwise
|
80
|
+
def complete?(prefix, time)
|
81
|
+
value = snapshots(prefix)[time]
|
82
|
+
value.nil? ? false : value
|
83
|
+
end
|
84
|
+
|
85
|
+
##
|
86
|
+
# Returns true if the backup exists
|
87
|
+
#
|
88
|
+
def exists?(prefix, time)
|
89
|
+
!snapshots(prefix)[time].nil?
|
90
|
+
# backups = bucket.files.all(:prefix => timepath(prefix, time))
|
91
|
+
#
|
92
|
+
# !backups.nil? && backups.length > 0
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
##
|
97
|
+
#Removes all incomplete backups. Use wisely, will blitz a backup in progress
|
98
|
+
##
|
99
|
+
def clean(prefix)
|
100
|
+
snapshots(prefix).each do |time, complete|
|
101
|
+
unless complete
|
102
|
+
remove(prefix, time)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
#
|
108
|
+
#Delete all files from a snapshot. Will remove the complete file first to avoid other clients using the backup
|
109
|
+
#
|
110
|
+
def remove(prefix, timestamp)
|
111
|
+
complete_marker = bucket.files.get(complete_path(prefix, timestamp))
|
112
|
+
|
113
|
+
#Destroy the complete marker. This prevents other clients from thinking this backup is complete when the s3 bucket is read
|
114
|
+
unless complete_marker.nil?
|
115
|
+
complete_marker.destroy
|
116
|
+
end
|
117
|
+
|
118
|
+
files = list_files(prefix, timestamp)
|
119
|
+
|
120
|
+
files.each do |file|
|
121
|
+
file.destroy
|
122
|
+
end
|
123
|
+
|
124
|
+
get_snapshot(prefix).delete(timestamp)
|
125
|
+
|
126
|
+
end
|
127
|
+
|
128
|
+
|
129
|
+
##
|
130
|
+
# Return all files that exist in this backup bucket with the given time
|
131
|
+
##
|
132
|
+
def list_files(prefix, time)
|
133
|
+
bucket.files.all(:prefix => timepath(prefix, time) )
|
134
|
+
end
|
135
|
+
|
136
|
+
|
137
|
+
|
138
|
+
##
|
139
|
+
#Perform a rolling delete for the given prefix. Keeps the "newest" daily backup for the given day, and keeps a backup for the day of the week specified by day of week. day_of_week follows cron style syntax, 0 = sunday and 6 = saturday
|
140
|
+
#
|
141
|
+
def roll(prefix, num_days, num_weeks, day_of_week)
|
142
|
+
|
143
|
+
start = TimeFactory.utc_time
|
144
|
+
|
145
|
+
start = Time.utc(start.year, start.month, start.day)
|
146
|
+
|
147
|
+
clean(prefix)
|
148
|
+
|
149
|
+
merge_days(prefix, start)
|
150
|
+
|
151
|
+
|
152
|
+
snaps = snapshots(prefix)
|
153
|
+
|
154
|
+
#Nothing to do
|
155
|
+
if snaps.nil? ||snaps.empty?
|
156
|
+
return
|
157
|
+
end
|
158
|
+
|
159
|
+
newest_time =snaps.keys.last
|
160
|
+
|
161
|
+
#Truncate the oldest daily to 00 hours minutes and seconds based on the "newest" completed backup after the merge
|
162
|
+
oldest_daily = newest_time - SECONDS_DAY*num_days
|
163
|
+
|
164
|
+
oldest_daily = Time.utc(oldest_daily.year, oldest_daily.month, oldest_daily.day)
|
165
|
+
|
166
|
+
#Truncate the oldest weekly to 00 hours minutes and seconds
|
167
|
+
oldest_weekly = newest_time - SECONDS_WEEK*num_weeks
|
168
|
+
|
169
|
+
oldest_weekly = Time.utc(oldest_weekly.year, oldest_weekly.month, oldest_weekly.day)
|
170
|
+
|
171
|
+
|
172
|
+
#Now iterate over every day and keep the number of days. After that only keep the value that's on the number of weeks
|
173
|
+
snaps.each do |time, complete|
|
174
|
+
|
175
|
+
#We're done, we've fallen into the day range
|
176
|
+
if time >= oldest_daily
|
177
|
+
break
|
178
|
+
end
|
179
|
+
|
180
|
+
#Is older than the oldest daily,or not the right day of the week so should be deleted
|
181
|
+
if time < oldest_weekly || !same_day(time, day_of_week)
|
182
|
+
remove(prefix, time)
|
183
|
+
end
|
184
|
+
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
|
189
|
+
private
|
190
|
+
|
191
|
+
|
192
|
+
#Returns true if the backup is complete, false otherwise. Downloads the file so can be slow
|
193
|
+
def read_complete?(prefix, time)
|
194
|
+
|
195
|
+
found_prefixes = bucket.files.all(:prefix => timepath(prefix, time), :delimiter => COMPLETE_EXTENSION).common_prefixes
|
196
|
+
|
197
|
+
|
198
|
+
!found_prefixes.nil? && found_prefixes.length > 0
|
199
|
+
end
|
200
|
+
|
201
|
+
##
|
202
|
+
# Returnes true if the time occurs on the same day_of_week. day_of_week follows cron style syntax, 0 = sunday and 6 = saturday
|
203
|
+
#
|
204
|
+
def same_day(time, day_of_week)
|
205
|
+
|
206
|
+
unless day_of_week > -1 && day_of_week < 7
|
207
|
+
raise "Invalid day of week. Expected 0-6 but received #{day_of_week}"
|
208
|
+
end
|
209
|
+
|
210
|
+
time.wday == day_of_week
|
211
|
+
|
212
|
+
end
|
213
|
+
|
214
|
+
|
215
|
+
##
|
216
|
+
#Iterates over all snapshots and removes any duplicates for a day. Only keep the latest for a day
|
217
|
+
##
|
218
|
+
def merge_days(prefix, start)
|
219
|
+
#Use "yesterday" as a starting point
|
220
|
+
previous = nil
|
221
|
+
|
222
|
+
snapshots(prefix).each do |time, complete|
|
223
|
+
#Skip anything that's before the "start" time above
|
224
|
+
if time > start || !complete
|
225
|
+
next
|
226
|
+
end
|
227
|
+
|
228
|
+
#2 backups on the same day, keep the oldest
|
229
|
+
if samedate?(previous, time)
|
230
|
+
if(previous.to_i > time.to_i)
|
231
|
+
remove(prefix, time)
|
232
|
+
else
|
233
|
+
remove(prefix, previous)
|
234
|
+
previous = time
|
235
|
+
end
|
236
|
+
|
237
|
+
next
|
238
|
+
end
|
239
|
+
|
240
|
+
previous = time
|
241
|
+
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
#returns true if the first and second time occur in the same date (assumes utc time)
|
246
|
+
def samedate?(first, second)
|
247
|
+
# puts "first: #{first};#{first.nil? ? "nil": first.yday}, second #{second};#{second.nil? ? "nil": second.yday}"
|
248
|
+
!first.nil? && !second.nil? && first.yday == second.yday
|
249
|
+
end
|
250
|
+
|
251
|
+
def timestamps(prefix)
|
252
|
+
bucket.files.all(:prefix => prefix_string(prefix), :delimiter => "/").common_prefixes
|
253
|
+
end
|
254
|
+
|
255
|
+
def prefix_string(prefix)
|
256
|
+
"#{prefix}/"
|
257
|
+
end
|
258
|
+
|
259
|
+
|
260
|
+
##
|
261
|
+
# Get a snapshot by prefix
|
262
|
+
##
|
263
|
+
def get_snapshot(prefix)
|
264
|
+
@snapshots.nil? ? nil : @snapshots[prefix]
|
265
|
+
end
|
266
|
+
|
267
|
+
##
|
268
|
+
#Set the snapshot into the context
|
269
|
+
##
|
270
|
+
def set_snapshot(prefix, snaphash)
|
271
|
+
if @snapshots.nil?
|
272
|
+
@snapshots = Hash.new
|
273
|
+
end
|
274
|
+
|
275
|
+
@snapshots[prefix] = snaphash
|
276
|
+
end
|
277
|
+
|
278
|
+
|
279
|
+
end
|
280
|
+
end
|
@@ -0,0 +1,155 @@
|
|
1
|
+
require 'thor'
|
2
|
+
require 's3snapshot/dir_upload'
|
3
|
+
require 's3snapshot/dir_download'
|
4
|
+
require 's3snapshot/backup_manager'
|
5
|
+
|
6
|
+
module S3snapshot
|
7
|
+
class CLI < Thor
|
8
|
+
|
9
|
+
|
10
|
+
desc "backup", "upload a directory as a snapshot backup to s3"
|
11
|
+
|
12
|
+
method_option :awsid, :aliases => "-i", :desc => "The aws id", :type => :string, :required => true
|
13
|
+
method_option :awskey,:aliases => "-k", :desc => "The aws secret key", :type => :string, :required => true
|
14
|
+
method_option :bucket, :aliases => "-b", :desc => "The aws bucket to use", :type => :string, :required => true
|
15
|
+
method_option :directory, :aliases => "-d", :desc => "The directory to upload", :type => :string, :required => true
|
16
|
+
method_option :prefix, :aliases => "-p", :desc => "A prefix to prepend to the path before the timestamp. Useful in cluster to specifiy a node name, or a node+directory scheme. Prefix strategies can be mixed in a bucket, they must just be unique." , :type => :string, :required => true
|
17
|
+
|
18
|
+
##
|
19
|
+
#Uploads the directory to the s3 bucket with a prefix
|
20
|
+
def backup
|
21
|
+
directory = options[:directory]
|
22
|
+
puts "You are uploading directory #{directory}"
|
23
|
+
|
24
|
+
s3upload = DirUpload.new(options[:awsid], options[:awskey], options[:bucket], options[:prefix], directory )
|
25
|
+
s3upload.upload
|
26
|
+
end
|
27
|
+
|
28
|
+
desc "restore", "restore all files from a snapshot to a directory"
|
29
|
+
|
30
|
+
|
31
|
+
method_option :awsid, :aliases => "-i", :desc => "The aws id", :type => :string, :required => true
|
32
|
+
method_option :awskey,:aliases => "-k", :desc => "The aws secret key", :type => :string, :required => true
|
33
|
+
method_option :bucket, :aliases => "-b", :desc => "The aws bucket to use", :type => :string, :required => true
|
34
|
+
method_option :prefix, :aliases => "-p", :desc => "The prefix to prepend to before searching for snapshots" , :type => :string, :required => true
|
35
|
+
method_option :time, :aliases => "-t", :desc => "The timestamp to restore" , :type => :string, :required => true
|
36
|
+
method_option :dest, :aliases => "-d", :desc => "The destination directory for downloaded files" , :type => :string, :required => true
|
37
|
+
|
38
|
+
|
39
|
+
def restore
|
40
|
+
time = Time.parse(options[:time])
|
41
|
+
download = DirDownload.new(options[:awsid], options[:awskey], options[:bucket], options[:prefix], time, options[:dest])
|
42
|
+
download.download
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
desc "restorelatest", "restore the latest snapshot to a directory"
|
47
|
+
|
48
|
+
|
49
|
+
method_option :awsid, :aliases => "-i", :desc => "The aws id", :type => :string, :required => true
|
50
|
+
method_option :awskey,:aliases => "-k", :desc => "The aws secret key", :type => :string, :required => true
|
51
|
+
method_option :bucket, :aliases => "-b", :desc => "The aws bucket to use", :type => :string, :required => true
|
52
|
+
method_option :prefix, :aliases => "-p", :desc => "The prefix to prepend to before searching for snapshots" , :type => :string, :required => true
|
53
|
+
method_option :dest, :aliases => "-d", :desc => "The destination directory for downloaded files" , :type => :string, :required => true
|
54
|
+
|
55
|
+
|
56
|
+
def restorelatest
|
57
|
+
|
58
|
+
manager = BackupManager.new(options[:awsid], options[:awskey], options[:bucket])
|
59
|
+
latest_complete = manager.latest(options[:prefix])
|
60
|
+
|
61
|
+
if latest_complete.nil?
|
62
|
+
puts "Cannot find a complete snapshot with prefix #{options[:prefix]}"
|
63
|
+
exit 1
|
64
|
+
end
|
65
|
+
|
66
|
+
download = DirDownload.new(options[:awsid], options[:awskey], options[:bucket], options[:prefix], latest_complete, options[:dest])
|
67
|
+
download.download
|
68
|
+
end
|
69
|
+
|
70
|
+
desc "prefixes", "list all prefixes in an s3 bucket"
|
71
|
+
|
72
|
+
|
73
|
+
method_option :awsid, :aliases => "-i", :desc => "The aws id", :type => :string, :required => true
|
74
|
+
method_option :awskey,:aliases => "-k", :desc => "The aws secret key", :type => :string, :required => true
|
75
|
+
method_option :bucket, :aliases => "-b", :desc => "The aws bucket to use", :type => :string, :required => true
|
76
|
+
|
77
|
+
def prefixes
|
78
|
+
manager = BackupManager.new(options[:awsid], options[:awskey], options[:bucket])
|
79
|
+
|
80
|
+
puts "Found the following prefixes\n\n"
|
81
|
+
|
82
|
+
manager.prefixes.each do |prefix|
|
83
|
+
puts prefix[0..-2]
|
84
|
+
end
|
85
|
+
|
86
|
+
puts "\n"
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
desc "snapshots", "list all snapshots for a prefix in an s3 bucket"
|
91
|
+
|
92
|
+
|
93
|
+
method_option :awsid, :aliases => "-i", :desc => "The aws id", :type => :string, :required => true
|
94
|
+
method_option :awskey,:aliases => "-k", :desc => "The aws secret key", :type => :string, :required => true
|
95
|
+
method_option :bucket, :aliases => "-b", :desc => "The aws bucket to use", :type => :string, :required => true
|
96
|
+
method_option :prefix, :aliases => "-p", :desc => "The prefix to prepend to before searching for snapshots" , :type => :string, :required => true
|
97
|
+
|
98
|
+
|
99
|
+
def snapshots
|
100
|
+
manager = BackupManager.new(options[:awsid], options[:awskey], options[:bucket])
|
101
|
+
|
102
|
+
snap_map = manager.snapshots(options[:prefix])
|
103
|
+
|
104
|
+
puts "Found the following timestamps from prefix #{options[:prefix]}\n\n"
|
105
|
+
|
106
|
+
snap_map.each do |key, value|
|
107
|
+
result = value ? "complete" : "unknown"
|
108
|
+
|
109
|
+
puts "Time: #{key.iso8601}, Status: #{result}"
|
110
|
+
end
|
111
|
+
|
112
|
+
puts "\n"
|
113
|
+
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
desc "clean", "Remove all snapshots in the prefix that do not have a complete status. Use wisely, could remove backups in progress for a prefix causing corruption"
|
118
|
+
|
119
|
+
|
120
|
+
method_option :awsid, :aliases => "-i", :desc => "The aws id", :type => :string, :required => true
|
121
|
+
method_option :awskey,:aliases => "-k", :desc => "The aws secret key", :type => :string, :required => true
|
122
|
+
method_option :bucket, :aliases => "-b", :desc => "The aws bucket to use", :type => :string, :required => true
|
123
|
+
method_option :prefix, :aliases => "-p", :desc => "The prefix to prepend to before searching for snapshots" , :type => :string, :required => true
|
124
|
+
|
125
|
+
|
126
|
+
def clean
|
127
|
+
manager = BackupManager.new(options[:awsid], options[:awskey], options[:bucket])
|
128
|
+
|
129
|
+
manager.clean(options[:prefix])
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
desc "roll", "Analyze all snapshots and keep the latest daily for each day. Then keep n days specified from the command line, and n weeks for the specified day of week"
|
134
|
+
|
135
|
+
|
136
|
+
method_option :awsid, :aliases => "-i", :desc => "The aws id", :type => :string, :required => true
|
137
|
+
method_option :awskey,:aliases => "-k", :desc => "The aws secret key", :type => :string, :required => true
|
138
|
+
method_option :bucket, :aliases => "-b", :desc => "The aws bucket to use", :type => :string, :required => true
|
139
|
+
method_option :prefix, :aliases => "-p", :desc => "The prefix to prepend to before searching for snapshots" , :type => :string, :required => true
|
140
|
+
method_option :numdays, :aliases => "-n", :desc => "The number of days to keep" , :type => :numeric, :required => true
|
141
|
+
method_option :numweeks, :aliases => "-w", :desc => "The number of weeks to keep" , :type => :numeric, :required => true
|
142
|
+
method_option :dayofweek, :aliases => "-o", :desc => "The day of week to keep. 0 based like cron 0 = sunday 6 = saturday" , :type => :numeric, :required => true
|
143
|
+
|
144
|
+
|
145
|
+
|
146
|
+
def roll
|
147
|
+
manager = BackupManager.new(options[:awsid], options[:awskey], options[:bucket])
|
148
|
+
|
149
|
+
manager.roll(options[:prefix], options[:numdays], options[:numweeks], options[:dayofweek])
|
150
|
+
|
151
|
+
end
|
152
|
+
|
153
|
+
|
154
|
+
end
|
155
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'fog'
|
2
|
+
require 's3snapshot/sync_op'
|
3
|
+
require 's3snapshot/backup_manager'
|
4
|
+
require 'time'
|
5
|
+
require 'fileutils'
|
6
|
+
|
7
|
+
module S3snapshot
|
8
|
+
class DirDownload < SyncOp
|
9
|
+
|
10
|
+
@time = nil
|
11
|
+
@local_dir = nil
|
12
|
+
@prefix = nil
|
13
|
+
|
14
|
+
|
15
|
+
def initialize(aws_id, aws_key, bucket_name, prefix, time, local_dir )
|
16
|
+
super(aws_id, aws_key, bucket_name)
|
17
|
+
@prefix = prefix
|
18
|
+
@time = time
|
19
|
+
@local_dir = local_dir
|
20
|
+
end
|
21
|
+
|
22
|
+
def download
|
23
|
+
|
24
|
+
|
25
|
+
#Check if the backup is complete
|
26
|
+
manager = BackupManager.new(@aws_id, @aws_key, @bucket_name)
|
27
|
+
|
28
|
+
unless manager.exists?(@prefix, @time)
|
29
|
+
$stderr.puts "Backup with prefix '#{@prefix}' and time #{@time.iso8601} does not exist. Please check the prefix and time"
|
30
|
+
return
|
31
|
+
end
|
32
|
+
|
33
|
+
unless manager.complete?(@prefix, @time)
|
34
|
+
$stderr.puts "Backup with prefix '#{@prefix}' and time #{@time.iso8601} is not complete. The backup is either in progress or never finished. This snapshot is not safe to restore!"
|
35
|
+
return
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
#Get all files from this backup
|
40
|
+
files = manager.list_files(@prefix, @time)
|
41
|
+
|
42
|
+
#Make the local directory
|
43
|
+
unless File.directory?(@local_dir)
|
44
|
+
FileUtils.mkdir(@local_dir)
|
45
|
+
end
|
46
|
+
|
47
|
+
prefix_path = timepath(@prefix, @time)
|
48
|
+
|
49
|
+
|
50
|
+
files.each do |file|
|
51
|
+
destination_path = "#{@local_dir}/#{file.key[prefix_path.length+1..-1]}"
|
52
|
+
|
53
|
+
directory = destination_path[0..-File.basename(destination_path).length-1]
|
54
|
+
|
55
|
+
#Create the parent directory for the file if it doesn't exist
|
56
|
+
unless File.directory?(directory)
|
57
|
+
FileUtils.mkdir(directory)
|
58
|
+
end
|
59
|
+
|
60
|
+
puts "downloading '#{file.key}' to '#{destination_path}'"
|
61
|
+
|
62
|
+
#Open the file in read/write and create it if it doesn't exist, then write the content from s3 into it
|
63
|
+
File.open(destination_path, File::RDWR|File::CREAT){ |local| local.write(file.body)}
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
puts "Writing complete marker"
|
69
|
+
|
70
|
+
|
71
|
+
puts "backup complete!"
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'fog'
|
2
|
+
require 's3snapshot/sync_op'
|
3
|
+
require 's3snapshot/time_factory'
|
4
|
+
require 'time'
|
5
|
+
require 'fileutils'
|
6
|
+
|
7
|
+
|
8
|
+
module S3snapshot
|
9
|
+
class DirUpload < SyncOp
|
10
|
+
|
11
|
+
@tmpdir = nil
|
12
|
+
@local_dir = nil
|
13
|
+
@prefix = nil
|
14
|
+
|
15
|
+
def initialize(aws_id, aws_key, bucket_name, prefix, local_dir )
|
16
|
+
super(aws_id, aws_key, bucket_name)
|
17
|
+
@local_dir = local_dir
|
18
|
+
@prefix = prefix
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
def upload
|
23
|
+
|
24
|
+
start_time = TimeFactory.utc_time
|
25
|
+
|
26
|
+
prefix_path = timepath(@prefix, start_time)
|
27
|
+
|
28
|
+
files = get_local_files
|
29
|
+
|
30
|
+
files.each do |file|
|
31
|
+
path = "#{prefix_path}/#{file[@local_dir.length+1..-1]}"
|
32
|
+
|
33
|
+
puts "uploading '#{file}' to '#{@bucket_name}/#{path}'"
|
34
|
+
bucket.files.create(:key =>path, :body => File.read(file))
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
puts "Writing complete marker"
|
39
|
+
|
40
|
+
#Upload the complete marker
|
41
|
+
bucket.files.create(:key => complete_path(@prefix, start_time), :body => TimeFactory.utc_time.iso8601)
|
42
|
+
|
43
|
+
puts "backup complete!"
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
##
|
50
|
+
#Finds all files in a directory and returns them in an array
|
51
|
+
##
|
52
|
+
def get_local_files
|
53
|
+
files = Array.new
|
54
|
+
Dir.glob( File.join(@local_dir, '**', '*') ).each do |file|
|
55
|
+
unless File.directory?(file)
|
56
|
+
files << file
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
return files
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'fog'
|
2
|
+
|
3
|
+
module S3snapshot
|
4
|
+
class SyncOp
|
5
|
+
|
6
|
+
#Constant for the file that will be present with the complete timestamp if a directory was successfully backed up
|
7
|
+
COMPLETE_FILE = "s3snapshot"
|
8
|
+
COMPLETE_EXTENSION = "complete_marker"
|
9
|
+
COMPLETE_MARKER = "#{COMPLETE_FILE}.#{COMPLETE_EXTENSION}"
|
10
|
+
|
11
|
+
@bucket_name
|
12
|
+
@aws_id
|
13
|
+
@aws_key
|
14
|
+
|
15
|
+
#Our to aws connection
|
16
|
+
@aws
|
17
|
+
|
18
|
+
#The current bucket
|
19
|
+
@bucket
|
20
|
+
|
21
|
+
def initialize(aws_id, aws_key, bucket_name)
|
22
|
+
@bucket_name = bucket_name
|
23
|
+
@aws_id = aws_id
|
24
|
+
@aws_key = aws_key
|
25
|
+
end
|
26
|
+
|
27
|
+
##
|
28
|
+
# Return the cached aws connection or create a new one
|
29
|
+
##
|
30
|
+
def aws
|
31
|
+
@aws ||= Fog::Storage.new(:provider => 'AWS', :aws_access_key_id => @aws_id, :aws_secret_access_key => @aws_key)
|
32
|
+
end
|
33
|
+
|
34
|
+
##
|
35
|
+
#Get the cached bucket or create the new one
|
36
|
+
##
|
37
|
+
def bucket
|
38
|
+
# @bucket ||= aws.directories.get(@bucket_name)
|
39
|
+
aws.directories.get(@bucket_name)
|
40
|
+
end
|
41
|
+
|
42
|
+
#Generate the time path. If a prefix is specified the format is <prefix>/<timestamp> otherwise it is timestamp. All timestamps are in iso 8601 format and in the UTC time zone
|
43
|
+
def timepath(prefix, time)
|
44
|
+
"#{prefix}/#{time.utc.iso8601}"
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
#
|
49
|
+
#The path to the complete file with the given prefix and time
|
50
|
+
#
|
51
|
+
def complete_path(prefix, time)
|
52
|
+
"#{timepath(prefix, time)}/#{COMPLETE_MARKER}"
|
53
|
+
end
|
54
|
+
|
55
|
+
##
|
56
|
+
#Constructs a prefix in the format of [prefix]/[iso time]/complete_file
|
57
|
+
def complete_prefix(prefix, time)
|
58
|
+
"#{timepath(prefix, time)}/#{COMPLETE_FILE}"
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'thor'
|
2
|
+
require 's3snapshot/dir_upload'
|
3
|
+
require 's3snapshot/time_factory'
|
4
|
+
|
5
|
+
|
6
|
+
module S3snapshot
|
7
|
+
class TestLoader < Thor
|
8
|
+
##
|
9
|
+
#Test data generation
|
10
|
+
##
|
11
|
+
|
12
|
+
|
13
|
+
desc "gendata", "upload a directory as a snapshot backup to s3"
|
14
|
+
|
15
|
+
method_option :awsid, :aliases => "-i", :desc => "The aws id", :type => :string, :required => true
|
16
|
+
method_option :awskey,:aliases => "-k", :desc => "The aws secret key", :type => :string, :required => true
|
17
|
+
method_option :bucket, :aliases => "-b", :desc => "The aws bucket to use", :type => :string, :required => true
|
18
|
+
method_option :directory, :aliases => "-d", :desc => "The directory to upload", :type => :string, :required => true
|
19
|
+
method_option :prefix, :aliases => "-p", :desc => "A prefix to prepend to the path before the timestamp. Useful in cluster to specifiy a node name, or a node+directory scheme. Prefix strategies can be mixed in a bucket, they must just be unique." , :type => :string, :required => true
|
20
|
+
method_option :hours, :aliases => "-h", :desc => "The number of hours between backups to generate a timestamp for", :type => :numeric
|
21
|
+
method_option :numbackups, :aliases => "-n", :desc=> "The maximum number of iterations to run at 'hours' interval", :type => :numeric
|
22
|
+
|
23
|
+
##
|
24
|
+
#Uploads the directory to the s3 bucket with a prefix
|
25
|
+
def gendata
|
26
|
+
directory = options[:directory]
|
27
|
+
puts "You are uploading directory #{directory}"
|
28
|
+
|
29
|
+
hours = options[:hours]
|
30
|
+
backups = options[:numbackups]
|
31
|
+
|
32
|
+
# subtract off the number of hours * number of backups and convert the hours to seconds to set the start time
|
33
|
+
time = Time.now.utc - hours * backups * 3600
|
34
|
+
|
35
|
+
for i in (1.. backups)
|
36
|
+
|
37
|
+
TimeFactory.set_time (time)
|
38
|
+
s3upload = DirUpload.new(options[:awsid], options[:awskey], options[:bucket], options[:prefix], directory )
|
39
|
+
s3upload.upload
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
time += hours * 3600
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
S3snapshot::TestLoader.start
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module S3snapshot
|
2
|
+
##
|
3
|
+
#Simple time factory that will return the set time if set, otherwise it returns the current time in utc
|
4
|
+
##
|
5
|
+
class TimeFactory
|
6
|
+
@@set_time = nil
|
7
|
+
|
8
|
+
def self.utc_time
|
9
|
+
if @@set_time.nil?
|
10
|
+
return Time.now.utc
|
11
|
+
end
|
12
|
+
|
13
|
+
return @@set_time
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
def self.set_time(time)
|
19
|
+
@@set_time = time
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.unset_time
|
23
|
+
@@set_time = nil
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
data/lib/s3snapshot.rb
ADDED
data/s3snapshot.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "s3snapshot/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "s3snapshot"
|
7
|
+
s.version = S3snapshot::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Todd"]
|
10
|
+
s.email = ["foo@bar.com"]
|
11
|
+
s.homepage = ""
|
12
|
+
s.summary = %q{Uploads to s3}
|
13
|
+
s.description = %q{see summary}
|
14
|
+
|
15
|
+
s.rubyforge_project = "s3snapshot"
|
16
|
+
|
17
|
+
s.files = `git ls-files`.split("\n")
|
18
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
+
s.require_paths = ["lib"]
|
21
|
+
s.add_dependency "fog", "~>0.8.1"
|
22
|
+
s.add_dependency "thor", "~>0.14.6"
|
23
|
+
s.add_dependency "dictionary", "~>1.0.0"
|
24
|
+
end
|
metadata
ADDED
@@ -0,0 +1,130 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: s3snapshot
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 2
|
10
|
+
version: 0.0.2
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Todd
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-05-23 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: fog
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ~>
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 61
|
29
|
+
segments:
|
30
|
+
- 0
|
31
|
+
- 8
|
32
|
+
- 1
|
33
|
+
version: 0.8.1
|
34
|
+
type: :runtime
|
35
|
+
version_requirements: *id001
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: thor
|
38
|
+
prerelease: false
|
39
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ~>
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
hash: 43
|
45
|
+
segments:
|
46
|
+
- 0
|
47
|
+
- 14
|
48
|
+
- 6
|
49
|
+
version: 0.14.6
|
50
|
+
type: :runtime
|
51
|
+
version_requirements: *id002
|
52
|
+
- !ruby/object:Gem::Dependency
|
53
|
+
name: dictionary
|
54
|
+
prerelease: false
|
55
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
56
|
+
none: false
|
57
|
+
requirements:
|
58
|
+
- - ~>
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
hash: 23
|
61
|
+
segments:
|
62
|
+
- 1
|
63
|
+
- 0
|
64
|
+
- 0
|
65
|
+
version: 1.0.0
|
66
|
+
type: :runtime
|
67
|
+
version_requirements: *id003
|
68
|
+
description: see summary
|
69
|
+
email:
|
70
|
+
- foo@bar.com
|
71
|
+
executables:
|
72
|
+
- s3snapshot
|
73
|
+
extensions: []
|
74
|
+
|
75
|
+
extra_rdoc_files: []
|
76
|
+
|
77
|
+
files:
|
78
|
+
- .gitignore
|
79
|
+
- .loadpath
|
80
|
+
- .project
|
81
|
+
- Gemfile
|
82
|
+
- Gemfile.lock.bak
|
83
|
+
- Rakefile
|
84
|
+
- Readme.markdown
|
85
|
+
- bin/s3snapshot
|
86
|
+
- lib/s3snapshot.rb
|
87
|
+
- lib/s3snapshot/backup_manager.rb
|
88
|
+
- lib/s3snapshot/cli.rb
|
89
|
+
- lib/s3snapshot/dir_download.rb
|
90
|
+
- lib/s3snapshot/dir_upload.rb
|
91
|
+
- lib/s3snapshot/sync_op.rb
|
92
|
+
- lib/s3snapshot/test_loader.rb
|
93
|
+
- lib/s3snapshot/time_factory.rb
|
94
|
+
- lib/s3snapshot/version.rb
|
95
|
+
- s3snapshot.gemspec
|
96
|
+
homepage: ""
|
97
|
+
licenses: []
|
98
|
+
|
99
|
+
post_install_message:
|
100
|
+
rdoc_options: []
|
101
|
+
|
102
|
+
require_paths:
|
103
|
+
- lib
|
104
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ">="
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
hash: 3
|
110
|
+
segments:
|
111
|
+
- 0
|
112
|
+
version: "0"
|
113
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
114
|
+
none: false
|
115
|
+
requirements:
|
116
|
+
- - ">="
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
hash: 3
|
119
|
+
segments:
|
120
|
+
- 0
|
121
|
+
version: "0"
|
122
|
+
requirements: []
|
123
|
+
|
124
|
+
rubyforge_project: s3snapshot
|
125
|
+
rubygems_version: 1.8.2
|
126
|
+
signing_key:
|
127
|
+
specification_version: 3
|
128
|
+
summary: Uploads to s3
|
129
|
+
test_files: []
|
130
|
+
|