mir 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +5 -0
- data/README.md +57 -0
- data/db/migrate/002_create_resources.rb +2 -0
- data/lib/mir/application.rb +41 -5
- data/lib/mir/disk/amazon.rb +207 -19
- data/lib/mir/disk.rb +3 -0
- data/lib/mir/index.rb +44 -8
- data/lib/mir/models/app_setting.rb +20 -5
- data/lib/mir/models/resource.rb +23 -6
- data/lib/mir/options.rb +9 -1
- data/lib/mir/utils.rb +42 -0
- data/lib/mir/version.rb +1 -1
- data/spec/disk/amazon_spec.rb +20 -0
- data/spec/index_spec.rb +33 -0
- data/spec/spec_helper.rb +4 -1
- data/spec/utils_spec.rb +43 -0
- metadata +10 -2
data/LICENSE
ADDED
@@ -0,0 +1,5 @@
|
|
1
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
2
|
+
|
3
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
4
|
+
|
5
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
# Mir
|
2
|
+
|
3
|
+
Mir is a synchronization tool to help clone a directory to a remote storage
|
4
|
+
provider. Currently only S3 is provided. A couple of the features that differentiate
|
5
|
+
Mir from other Ruby S3 synchronization tools currently available are:
|
6
|
+
|
7
|
+
* Solves S3 connection problems associated with storage and retrieval of large files (>2GB) by transparently splitting and recombining files
|
8
|
+
* Only updates and sends resources that have fallen out of synchronization
|
9
|
+
* Creates checksum comparisons on reads and writes to ensure complete end-to-end transmission
|
10
|
+
* Provides a simple command line interface for pushing and pulling files
|
11
|
+
|
12
|
+
The inspiration for this tool is to provide similar functionality to the classic Rsync command, but to utilize cloud-based storage providers.
|
13
|
+
|
14
|
+
## Configuration
|
15
|
+
|
16
|
+
Mir uses a YAML file for configuration settings. Unless you specify otherwise, Mir will look for the file 'mir_settings.yml' in the HOME and /etc/mir directories.
|
17
|
+
|
18
|
+
settings:
|
19
|
+
max_upload_retries: 5
|
20
|
+
max_threads: 5
|
21
|
+
cloud_provider:
|
22
|
+
type: s3
|
23
|
+
bucket_name: gotham_backup
|
24
|
+
access_key_id: YOUR_ACCESS_KEY
|
25
|
+
secret_access_key: YOUR_SECRET_ACCESS_KEY
|
26
|
+
chunk_size: 5242880
|
27
|
+
database:
|
28
|
+
adapter: sqlite3
|
29
|
+
database: foobar.db
|
30
|
+
|
31
|
+
Configuration keys:
|
32
|
+
|
33
|
+
* *max_upload_retries*: This is the maximum number of attempts that Mir will try to upload your file
|
34
|
+
* *max_threads*: The maximum number of threads that will run at once
|
35
|
+
* *cloud_provider*: Currently only S3 is provided
|
36
|
+
* *chunk_size*: This is the maximum number of bytes that will be written to S3 per PUT request. This is useful for sending large files to S3 and avoiding connection errors.
|
37
|
+
* *database*: Connection information for your local database. This is delegated to ActiveRecord. See [See ActiveRecord#Base::establish_connection](http://api.rubyonrails.org/classes/ActiveRecord/Base.html#method-c-establish_connection) for more details.
|
38
|
+
|
39
|
+
## Usage
|
40
|
+
|
41
|
+
Install the gem:
|
42
|
+
|
43
|
+
gem install mir
|
44
|
+
|
45
|
+
Create mir_settings.yml in the HOME or /etc/mir directories. Adjust to taste
|
46
|
+
|
47
|
+
Push your local directory to S3
|
48
|
+
|
49
|
+
mir ~/mydirectory
|
50
|
+
|
51
|
+
To retrieve your remote directory
|
52
|
+
|
53
|
+
mir -c ~/mydirectory
|
54
|
+
|
55
|
+
## Notes
|
56
|
+
|
57
|
+
This project is considered in an alpha state and is not ready for use in any sort of production environment. Additionally, this has an embarrassingly small number of specs which should encourage you not to use this for your critical storage needs.
|
@@ -7,6 +7,7 @@ class CreateResources < ActiveRecord::Migration
|
|
7
7
|
t.datetime :last_modified
|
8
8
|
t.datetime :add_date, :default => DateTime.now
|
9
9
|
t.datetime :last_synchronized
|
10
|
+
t.datetime :last_indexed_at
|
10
11
|
t.boolean :is_directory, :default => false, :null => false
|
11
12
|
t.boolean :in_progress, :default => false
|
12
13
|
t.boolean :queued, :default => false
|
@@ -15,6 +16,7 @@ class CreateResources < ActiveRecord::Migration
|
|
15
16
|
|
16
17
|
add_index :resources, :filename, :unique => true
|
17
18
|
add_index :resources, :in_progress
|
19
|
+
add_index :resources, :last_indexed_at
|
18
20
|
add_index :resources, :queued
|
19
21
|
end
|
20
22
|
|
data/lib/mir/application.rb
CHANGED
@@ -19,8 +19,16 @@ module Mir
|
|
19
19
|
def initialize
|
20
20
|
@options = Mir::Options.parse(ARGV)
|
21
21
|
Mir.logger = Logger.new(options.log_destination)
|
22
|
+
Mir.logger.level = if options.debug
|
23
|
+
Logger::DEBUG
|
24
|
+
else
|
25
|
+
Logger::ERROR
|
26
|
+
end
|
22
27
|
end
|
23
28
|
|
29
|
+
##
|
30
|
+
# Begins the synchronization operation after initializing the file index and remote storage
|
31
|
+
# container
|
24
32
|
def start
|
25
33
|
if options.copy && options.flush
|
26
34
|
Mir.logger.error "Conflicting options: Cannot copy from remote source with an empty file index"
|
@@ -41,13 +49,19 @@ module Mir
|
|
41
49
|
|
42
50
|
options.copy ? pull(param_path) : push(param_path)
|
43
51
|
end
|
44
|
-
|
52
|
+
|
53
|
+
##
|
54
|
+
# Returns a global configuration instance
|
55
|
+
#
|
56
|
+
# @return [Mir::Config]
|
45
57
|
def self.config
|
46
58
|
@@config
|
47
59
|
end
|
48
60
|
|
61
|
+
##
|
62
|
+
# Alias for +config
|
49
63
|
def config
|
50
|
-
|
64
|
+
self.class.config
|
51
65
|
end
|
52
66
|
|
53
67
|
private
|
@@ -67,7 +81,10 @@ module Mir
|
|
67
81
|
nil
|
68
82
|
end
|
69
83
|
|
84
|
+
##
|
70
85
|
# Synchronize the local files to the remote disk
|
86
|
+
#
|
87
|
+
# @param [String] the absolute path of the folder that will be synchronized remotely
|
71
88
|
def push(target)
|
72
89
|
Mir.logger.info "Starting push operation"
|
73
90
|
|
@@ -87,26 +104,36 @@ module Mir
|
|
87
104
|
end
|
88
105
|
end
|
89
106
|
end
|
107
|
+
|
108
|
+
# If any assets have been deleted locally, also remove them from remote disk
|
109
|
+
index.orphans.each { |orphan| disk.delete(orphan.abs_path) }
|
110
|
+
index.clean! # Remove orphans from index
|
111
|
+
puts "Completed push operation #{time}"
|
90
112
|
Mir.logger.info time
|
91
113
|
end
|
92
114
|
|
93
|
-
|
115
|
+
##
|
94
116
|
# Uploads a collection of resouces. Blocks until all items in queue have been processed
|
117
|
+
#
|
95
118
|
# @param [WorkQueue] a submission queue to manage resource uploads
|
96
119
|
# @param [Array] an array of Models::Resource objects that need to be uploaded
|
97
120
|
def push_group(work_queue, resources)
|
98
121
|
resources.each do |resource|
|
122
|
+
Mir.logger.debug "Enqueueing #{resource.filename}"
|
99
123
|
work_queue.enqueue_b do
|
100
124
|
resource.start_progress
|
101
125
|
disk.write resource.abs_path
|
102
126
|
resource.update_success
|
127
|
+
puts "Pushed #{resource.abs_path}"
|
103
128
|
end
|
104
129
|
end
|
105
130
|
work_queue.join
|
106
131
|
end
|
107
132
|
|
133
|
+
#
|
108
134
|
# Scans a collection of resources for jobs that did no complete successfully and flags them
|
109
135
|
# for resubmission
|
136
|
+
#
|
110
137
|
# @param [Array] an array of Models::Resources
|
111
138
|
def handle_push_failures(resources)
|
112
139
|
resources.each do |resource|
|
@@ -130,17 +157,26 @@ module Mir
|
|
130
157
|
# otherwise download the file
|
131
158
|
Models::Resource.ordered_groups(DEFAULT_BATCH_SIZE) do |resources|
|
132
159
|
resources.each do |resource|
|
133
|
-
dest = File.join(write_dir.path, resource.filename)
|
160
|
+
dest = File.join(write_dir.path, resource.filename)
|
134
161
|
if resource.is_directory?
|
135
162
|
Utils.try_create_dir(dest)
|
136
163
|
elsif !resource.synchronized?(dest)
|
137
|
-
queue.enqueue_b
|
164
|
+
queue.enqueue_b do
|
165
|
+
disk.copy(resource.abs_path, dest)
|
166
|
+
if resource.synchronized?(dest)
|
167
|
+
Mir.logger.info "Successful download #{dest}"
|
168
|
+
puts "Pulled #{dest}"
|
169
|
+
else
|
170
|
+
Mir.logger.error "Incomplete download #{dest}"
|
171
|
+
end
|
172
|
+
end
|
138
173
|
end
|
139
174
|
end
|
140
175
|
queue.join
|
141
176
|
end
|
142
177
|
end
|
143
178
|
Mir.logger.info time
|
179
|
+
puts "Completed pull operation #{time}"
|
144
180
|
end
|
145
181
|
|
146
182
|
end
|
data/lib/mir/disk/amazon.rb
CHANGED
@@ -1,12 +1,24 @@
|
|
1
1
|
require "right_aws"
|
2
|
+
require "tempfile"
|
3
|
+
require "digest/md5"
|
2
4
|
|
3
5
|
module Mir
|
4
6
|
module Disk
|
5
7
|
class Amazon
|
6
8
|
|
7
|
-
|
9
|
+
# This is the default size in bytes at which files will be split and stored
|
10
|
+
# on S3. From trial and error, 5MB seems to be a good default size for chunking
|
11
|
+
# large files.
|
12
|
+
DEFAULT_CHUNK_SIZE = 5*(2**20)
|
8
13
|
|
9
|
-
|
14
|
+
attr_reader :bucket_name, :connection
|
15
|
+
|
16
|
+
#
|
17
|
+
# Converts a path name to a key that can be stored on s3
|
18
|
+
#
|
19
|
+
# @param [String] the path to the file
|
20
|
+
# @return [String] an S3-safe key with leading slashes removed
|
21
|
+
def self.s3_key(path)
|
10
22
|
if path[0] == File::SEPARATOR
|
11
23
|
path[1..-1]
|
12
24
|
else
|
@@ -18,6 +30,7 @@ module Mir
|
|
18
30
|
@bucket_name = settings[:bucket_name]
|
19
31
|
@access_key_id = settings[:access_key_id]
|
20
32
|
@secret_access_key = settings[:secret_access_key]
|
33
|
+
@chunk_size = settings[:chunk_size] || DEFAULT_CHUNK_SIZE
|
21
34
|
@connection = try_connect
|
22
35
|
end
|
23
36
|
|
@@ -26,14 +39,43 @@ module Mir
|
|
26
39
|
@connection.list_bucket.select(:key)
|
27
40
|
end
|
28
41
|
|
42
|
+
def chunk_size=(n)
|
43
|
+
raise ArgumentError unless n > 0
|
44
|
+
@chunk_size = n
|
45
|
+
end
|
46
|
+
|
47
|
+
def chunk_size
|
48
|
+
@chunk_size
|
49
|
+
end
|
50
|
+
|
51
|
+
# Whether the key exists in S3
|
52
|
+
#
|
53
|
+
# @param [String] the S3 key name
|
54
|
+
# @return [Boolean]
|
55
|
+
def key_exists?(key)
|
56
|
+
begin
|
57
|
+
connection.head(bucket_name, key)
|
58
|
+
rescue RightAws::AwsError => e
|
59
|
+
return false
|
60
|
+
end
|
61
|
+
|
62
|
+
true
|
63
|
+
end
|
64
|
+
|
29
65
|
# Copies the remote resource to the local filesystem
|
30
66
|
# @param [String] the remote name of the resource to copy
|
31
67
|
# @param [String] the local name of the destination
|
32
|
-
def copy(from,
|
33
|
-
open(
|
34
|
-
|
68
|
+
def copy(from, dest)
|
69
|
+
open(dest, 'w') do |file|
|
70
|
+
key = self.class.s3_key(from)
|
71
|
+
remote_file = MultiPartFile.new(self, key)
|
72
|
+
remote_file.get(dest)
|
35
73
|
end
|
36
|
-
|
74
|
+
end
|
75
|
+
|
76
|
+
# Retrieves the complete object from S3 without streaming
|
77
|
+
def read(key)
|
78
|
+
connection.get_object(bucket_name, key)
|
37
79
|
end
|
38
80
|
|
39
81
|
def connected?
|
@@ -41,29 +83,175 @@ module Mir
|
|
41
83
|
end
|
42
84
|
|
43
85
|
def volume
|
44
|
-
|
86
|
+
connection.bucket(bucket_name, true)
|
45
87
|
end
|
46
88
|
|
89
|
+
# Deletes the remote version of the file
|
90
|
+
# @return [Boolean] true if operation succeeded
|
91
|
+
def delete(file_path)
|
92
|
+
key = self.class.s3_key(file_path)
|
93
|
+
Mir.logger.info "Deleting remote object #{file_path}"
|
94
|
+
|
95
|
+
begin
|
96
|
+
remote_file = MultiPartFile.new(self, key)
|
97
|
+
rescue Disk::RemoteFileNotFound => e
|
98
|
+
Mir.logger.warn "Could not find remote resource '#{key}'"
|
99
|
+
return false
|
100
|
+
end
|
101
|
+
|
102
|
+
if remote_file.multipart?
|
103
|
+
delete_parts(key)
|
104
|
+
else
|
105
|
+
connection.delete(bucket_name, key)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
# Writes a file to Amazon S3. If the file size exceeds the chunk size, the file will
|
110
|
+
# be written in chunks
|
111
|
+
#
|
112
|
+
# @param [String] the absolute path of the file to be written
|
113
|
+
# @raise [Disk::IncompleteTransmission] raised when remote resource is different from local file
|
47
114
|
def write(file_path)
|
48
|
-
|
115
|
+
key = self.class.s3_key(file_path)
|
116
|
+
|
117
|
+
if File.size(file_path) <= chunk_size
|
118
|
+
connection.put(bucket_name, key, open(file_path))
|
119
|
+
raise Disk::IncompleteTransmission unless equals?(file_path, key)
|
120
|
+
else
|
121
|
+
delete_parts(file_path) # clean up remaining part files if any exist
|
122
|
+
|
123
|
+
open(file_path, "rb") do |source|
|
124
|
+
part_id = 1
|
125
|
+
while part = source.read(chunk_size) do
|
126
|
+
part_name = Mir::Utils.filename_with_sequence(key, part_id)
|
127
|
+
Mir.logger.debug "Writing part #{part_name}"
|
128
|
+
|
129
|
+
temp_file(part_name) do |tmp|
|
130
|
+
tmp.binmode
|
131
|
+
tmp.write(part)
|
132
|
+
tmp.rewind
|
133
|
+
connection.put(bucket_name, part_name, open(tmp.path))
|
134
|
+
raise Disk::IncompleteTransmission unless equals?(tmp.path, part_name)
|
135
|
+
end
|
136
|
+
|
137
|
+
part_id += 1
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
49
141
|
Mir.logger.info "Completed upload #{file_path}"
|
50
142
|
end
|
51
143
|
|
52
144
|
private
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
145
|
+
|
146
|
+
# Determines whether a local file matches the remote file
|
147
|
+
#
|
148
|
+
# @param [String] the complete path name to the file
|
149
|
+
# @param [String] the S3 key name for the object
|
150
|
+
# @return [Boolean] whether the MD5 hash of the local file matches the remote value
|
151
|
+
def equals?(filename, key)
|
152
|
+
meta_ob = connection.retrieve_object(:bucket => bucket_name, :key => key)
|
153
|
+
remote_md5 = meta_ob[:headers]["etag"].slice(4..-5)
|
154
|
+
Digest::MD5.file(filename).to_s == remote_md5
|
155
|
+
end
|
156
|
+
|
157
|
+
def try_connect
|
158
|
+
begin
|
159
|
+
conn = RightAws::S3Interface.new(@access_key_id, @secret_access_key, {
|
160
|
+
:multi_thread => true,
|
161
|
+
:logger => Mir.logger
|
162
|
+
})
|
163
|
+
@connection_success = true
|
164
|
+
return conn
|
165
|
+
rescue Exception => e
|
166
|
+
@connection_success = false
|
167
|
+
Mir.logger.error "Could not establish connection with S3: '#{e.message}'"
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
# Yields a temp file object that is immediately discarded after use
|
172
|
+
#
|
173
|
+
# @param [String] the filename
|
174
|
+
# @yields [Tempfile]
|
175
|
+
def temp_file(name, &block)
|
176
|
+
file = Tempfile.new(File.basename(name))
|
177
|
+
begin
|
178
|
+
yield file
|
179
|
+
ensure
|
180
|
+
file.close
|
181
|
+
file.unlink
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
# Used to delete a file that has been broken into chunks
|
186
|
+
#
|
187
|
+
# @return [Boolean] true if succeeded
|
188
|
+
def delete_parts(file_path)
|
189
|
+
flag = true
|
190
|
+
connection.incrementally_list_bucket(bucket_name,
|
191
|
+
{ :prefix => self.class.s3_key(file_path),
|
192
|
+
:max_keys => 100 }) do |group|
|
193
|
+
|
194
|
+
group[:contents].each do |item|
|
195
|
+
if connection.delete(bucket_name, item[:key])
|
196
|
+
Mir.logger.debug("Deleted '#{item[:key]}'")
|
197
|
+
else
|
198
|
+
flag = false
|
199
|
+
end
|
64
200
|
end
|
65
201
|
end
|
202
|
+
flag
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
# Used to hide the inner details of multipart file uploads and downloads. It is important
|
207
|
+
# that this class does not throw any exceptions as these exceptions may be swallowed further
|
208
|
+
# up the stack by worker threads
|
209
|
+
class MultiPartFile
|
210
|
+
|
211
|
+
# @param [Disk] the remote disk
|
212
|
+
# @param [String] the name of the resource
|
213
|
+
def initialize(disk, name)
|
214
|
+
@disk, @name = disk, name
|
215
|
+
multiname = Utils.filename_with_sequence(name, 1)
|
216
|
+
|
217
|
+
if disk.key_exists?(name)
|
218
|
+
@multipart = false
|
219
|
+
elsif disk.key_exists?(multiname)
|
220
|
+
@multipart = true
|
221
|
+
else
|
222
|
+
raise Disk::RemoteFileNotFound
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
attr_reader :disk, :name
|
66
227
|
|
228
|
+
# Whether the resource is broken into chunks on the remote store
|
229
|
+
def multipart?
|
230
|
+
@multipart
|
231
|
+
end
|
232
|
+
|
233
|
+
# Downloads the resource to the destination. If the file is stored in parts it is download
|
234
|
+
# sequentially in pieces
|
235
|
+
def get(dest)
|
236
|
+
output = File.new(dest, "wb")
|
237
|
+
begin
|
238
|
+
if multipart?
|
239
|
+
seq = 1
|
240
|
+
while part = Utils.filename_with_sequence(name, seq) do
|
241
|
+
break unless disk.key_exists? part
|
242
|
+
output.write disk.read(part)
|
243
|
+
seq += 1
|
244
|
+
end
|
245
|
+
else
|
246
|
+
output.write disk.read(name)
|
247
|
+
end
|
248
|
+
rescue Exception => e
|
249
|
+
Mir.logger.error e
|
250
|
+
ensure
|
251
|
+
output.close
|
252
|
+
end
|
253
|
+
end
|
67
254
|
end
|
255
|
+
|
68
256
|
end
|
69
257
|
end
|
data/lib/mir/disk.rb
CHANGED
data/lib/mir/index.rb
CHANGED
@@ -1,13 +1,19 @@
|
|
1
1
|
require 'active_record'
|
2
2
|
require "active_support/inflector"
|
3
3
|
|
4
|
-
#
|
4
|
+
# The index class is responsible for maintaining knowledge of files uploaded
|
5
|
+
# onto the remote file system. The index does this by scanning the directory to
|
6
|
+
# be synchronized and evaluating whether a file needs to be uploaded or has changed
|
7
|
+
# since the last indexing date
|
8
|
+
|
5
9
|
module Mir
|
6
10
|
class Index
|
7
11
|
|
8
12
|
MIGRATIONS_PATH = File.join(File.dirname(__FILE__), "..", "..", "db", "migrate")
|
9
13
|
|
10
14
|
# Returns a databse object used to connect to the indexing database
|
15
|
+
#
|
16
|
+
# @param [String] the absolute path of the directory to be synchronized
|
11
17
|
# @param [Hash] database configuration settings. See ActiveRecord#Base::establish_connection
|
12
18
|
def initialize(sync_path, connection_params)
|
13
19
|
@sync_path = sync_path
|
@@ -16,7 +22,9 @@ module Mir
|
|
16
22
|
|
17
23
|
attr_reader :sync_path
|
18
24
|
|
25
|
+
#
|
19
26
|
# Creates necessary database and tables if this is the first time connecting
|
27
|
+
#
|
20
28
|
# @option opts [Boolean] :verbose Enable on ActiveRecord reporting
|
21
29
|
# @option opts [Boolean] :force_flush Rebuild index no matter what
|
22
30
|
def setup(options = {})
|
@@ -34,24 +42,50 @@ module Mir
|
|
34
42
|
rebuild if !tables_created? or options[:force_flush]
|
35
43
|
end
|
36
44
|
|
37
|
-
|
45
|
+
##
|
46
|
+
# Scans the synchronization path and evaluates whether a resource has changed
|
47
|
+
# since the last index or is new and needs to be added to the index.
|
38
48
|
def update
|
39
49
|
Mir.logger.info "Updating backup index for '#{sync_path}'"
|
50
|
+
Models::AppSetting.last_indexed_at = @last_indexed_at = DateTime.now
|
51
|
+
|
40
52
|
Dir.glob(File.join(sync_path, "**", "*")) do |f|
|
41
53
|
fname = relative_path(f)
|
42
54
|
file = File.new(f)
|
43
55
|
resource = Models::Resource.find_by_filename(fname)
|
44
56
|
|
45
57
|
if resource.nil?
|
46
|
-
Mir.logger.
|
58
|
+
Mir.logger.debug "Adding file to index #{fname}"
|
47
59
|
resource = Models::Resource.create_from_file_and_name(file, fname)
|
48
|
-
|
49
|
-
resource.flag_for_update
|
50
|
-
end
|
60
|
+
elsif !resource.synchronized?(file)
|
61
|
+
resource.flag_for_update
|
62
|
+
end
|
63
|
+
resource.update_attribute(:last_indexed_at, last_indexed_at)
|
51
64
|
end
|
65
|
+
|
52
66
|
Mir.logger.info "Index updated"
|
53
67
|
end
|
54
68
|
|
69
|
+
##
|
70
|
+
# Returns any files not present since the last re-indexing. This is useful
|
71
|
+
# for finding files that have been deleted post-index.
|
72
|
+
#
|
73
|
+
# @return [Mir::Models::Resource]
|
74
|
+
def orphans
|
75
|
+
Models::Resource.not_indexed_on(last_indexed_at)
|
76
|
+
end
|
77
|
+
|
78
|
+
def last_indexed_at
|
79
|
+
@last_indexed_at ||= Models::AppSetting.last_indexed_at
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
##
|
84
|
+
# Removes any files from the index that are no longer present locally
|
85
|
+
def clean!
|
86
|
+
Models::Resource.delete_all_except(last_indexed_at)
|
87
|
+
end
|
88
|
+
|
55
89
|
private
|
56
90
|
# Returns the path of a file relative to the backup directory
|
57
91
|
def relative_path(file)
|
@@ -63,10 +97,12 @@ module Mir
|
|
63
97
|
tables.each { |t| ActiveRecord::Migration.drop_table(t.table_name) if t.table_exists? }
|
64
98
|
ActiveRecord::Migration.drop_table(:schema_migrations) if @connection.table_exists? :schema_migrations
|
65
99
|
ActiveRecord::Migrator.migrate MIGRATIONS_PATH
|
66
|
-
Models::AppSetting.
|
67
|
-
Models::AppSetting.create(:name => Models::AppSetting::INSTALL_DATE, :value => DateTime.now.to_s)
|
100
|
+
Models::AppSetting.initialize_table(sync_path)
|
68
101
|
end
|
69
102
|
|
103
|
+
|
104
|
+
##
|
105
|
+
# TODO: no reason to lazy load these activemodels
|
70
106
|
def load_tables
|
71
107
|
@tables = []
|
72
108
|
models = File.join(File.dirname(__FILE__), "models", "*.rb")
|
@@ -1,13 +1,28 @@
|
|
1
1
|
module Mir
|
2
2
|
module Models
|
3
3
|
class AppSetting < ActiveRecord::Base
|
4
|
-
|
5
|
-
|
6
|
-
|
4
|
+
|
5
|
+
##
|
6
|
+
# Builds entries for the variables that will be used by this application
|
7
|
+
#
|
8
|
+
# @param [String] the path to be synchronized with S3
|
9
|
+
def self.initialize_table(sync_path)
|
10
|
+
create(:name => :sync_path, :value => sync_path)
|
11
|
+
create(:name => :install_date, :value => DateTime.now)
|
12
|
+
create(:name => :last_indexed_at, :value => nil)
|
13
|
+
end
|
7
14
|
|
8
15
|
def self.backup_path
|
9
|
-
|
10
|
-
|
16
|
+
where(:name => :sync_path).first.value
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.last_indexed_at=(val)
|
20
|
+
record = where(:name => :last_indexed_at).first
|
21
|
+
record.update_attribute(:value, val)
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.last_indexed_at
|
25
|
+
where(:name => :last_indexed_at).first.value
|
11
26
|
end
|
12
27
|
|
13
28
|
end
|
data/lib/mir/models/resource.rb
CHANGED
@@ -5,6 +5,8 @@ module Mir
|
|
5
5
|
module Models
|
6
6
|
class Resource < ActiveRecord::Base
|
7
7
|
|
8
|
+
scope :not_indexed_on, lambda { |date| where("last_indexed_at != ?", date) }
|
9
|
+
|
8
10
|
# Builds a resource for the backup index from a file
|
9
11
|
# @param [File] a file object
|
10
12
|
# @param [String] the name of the file on the remote disk
|
@@ -20,6 +22,14 @@ module Mir
|
|
20
22
|
:is_directory => is_dir)
|
21
23
|
end
|
22
24
|
|
25
|
+
##
|
26
|
+
# Removes all resources not that were not indexed on the specified date
|
27
|
+
#
|
28
|
+
# @param [DateTime]
|
29
|
+
def self.delete_all_except(index_date)
|
30
|
+
not_indexed_on(index_date).delete_all
|
31
|
+
end
|
32
|
+
|
23
33
|
# Returns true when jobs are still queued
|
24
34
|
def self.pending_jobs?
|
25
35
|
self.where(:queued => true).size > 0
|
@@ -49,15 +59,20 @@ module Mir
|
|
49
59
|
end
|
50
60
|
end
|
51
61
|
|
62
|
+
#
|
52
63
|
# Compares a file asset to the index to deterimine whether the file needs to be updated
|
64
|
+
#
|
53
65
|
# @param [String] a path to a file or directory
|
54
|
-
# @
|
66
|
+
# @return [Boolean] returns true when the file's checksum is equal to the value stored in
|
67
|
+
# the index, or when the file is a directory
|
55
68
|
def synchronized?(file)
|
56
|
-
if File.
|
57
|
-
return true
|
58
|
-
|
69
|
+
if File.directory?(file)
|
70
|
+
return true
|
71
|
+
elsif !File.exist?(file) or in_progress? or queued?
|
72
|
+
return false
|
73
|
+
else
|
74
|
+
Digest::MD5.file(file).to_s == self.checksum
|
59
75
|
end
|
60
|
-
false
|
61
76
|
end
|
62
77
|
|
63
78
|
# Whether the item can be synchronized to a remote disk
|
@@ -68,7 +83,9 @@ module Mir
|
|
68
83
|
|
69
84
|
# Places the resource into a queueble state
|
70
85
|
def flag_for_update
|
71
|
-
update_attributes :queued => true,
|
86
|
+
update_attributes :queued => true,
|
87
|
+
:checksum => Digest::MD5.file(abs_path).to_s,
|
88
|
+
:last_modified => File.new(abs_path).ctime
|
72
89
|
end
|
73
90
|
|
74
91
|
def start_progress
|
data/lib/mir/options.rb
CHANGED
@@ -5,9 +5,13 @@ require 'ostruct'
|
|
5
5
|
module Mir
|
6
6
|
class Options
|
7
7
|
|
8
|
-
USAGE_BANNER = "Usage: mir [options] [
|
8
|
+
USAGE_BANNER = "Usage: mir [options] [target]"
|
9
9
|
|
10
10
|
def self.parse(args)
|
11
|
+
if args.size < 1
|
12
|
+
puts USAGE_BANNER
|
13
|
+
exit
|
14
|
+
end
|
11
15
|
options = OpenStruct.new
|
12
16
|
options.debug = false
|
13
17
|
options.verbose = false
|
@@ -30,6 +34,10 @@ module Mir
|
|
30
34
|
options.copy = true
|
31
35
|
end
|
32
36
|
|
37
|
+
opts.on("-d", "--debug", "Enable debug logging") do
|
38
|
+
options.debug = true
|
39
|
+
end
|
40
|
+
|
33
41
|
opts.on("--settings", String, "The YAML settings file") do |path|
|
34
42
|
options.settings_path = path
|
35
43
|
end
|
data/lib/mir/utils.rb
CHANGED
@@ -1,7 +1,49 @@
|
|
1
1
|
module Mir
|
2
2
|
module Utils
|
3
|
+
|
4
|
+
# Generates filename for a split file
|
5
|
+
# filename_with_sequence("foobar.txt", 23) => foobar.txt.00000023
|
6
|
+
# @param [String] filename
|
7
|
+
# @param [Integer] the sequence number
|
8
|
+
def self.filename_with_sequence(name, seq)
|
9
|
+
[name, ".", "%08d" % seq].join
|
10
|
+
end
|
11
|
+
|
3
12
|
def self.try_create_dir(path)
|
4
13
|
Dir.mkdir(path) unless Dir.exist?(path)
|
5
14
|
end
|
15
|
+
|
16
|
+
# Splits a file into pieces that may be reassembled later
|
17
|
+
# @param [File or String] File to be split
|
18
|
+
# @param [Integer] the number of bytes per each chunked file
|
19
|
+
# @param [String] where the split files should be stored
|
20
|
+
def self.split_file(file, chunk_size, dest)
|
21
|
+
try_create_dir(dest) unless Dir.exist?(dest)
|
22
|
+
|
23
|
+
fname = File.join(dest, File.basename(file))
|
24
|
+
seq = 1
|
25
|
+
|
26
|
+
open(file, "rb") do |f|
|
27
|
+
while split_data = f.read(chunk_size) do
|
28
|
+
split_name = self.filename_with_sequence(fname, seq)
|
29
|
+
open(split_name, "wb") { |dest| dest.write(split_data) }
|
30
|
+
seq += 1
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
# Recombines a file from pieces
|
37
|
+
# @param [String] the directory that holds the split files
|
38
|
+
# @param [String] the path to the assembled file
|
39
|
+
def self.recombine(source_dir, dest)
|
40
|
+
parts = Dir.glob(File.join(source_dir, "*"))
|
41
|
+
open(File.expand_path(dest), "wb") do |file|
|
42
|
+
parts.each do |part|
|
43
|
+
p_io = File.new(part)
|
44
|
+
file.write p_io.read
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
6
48
|
end
|
7
49
|
end
|
data/lib/mir/version.rb
CHANGED
@@ -0,0 +1,20 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe Mir::Disk::Amazon do
|
4
|
+
|
5
|
+
let(:settings) do
|
6
|
+
{
|
7
|
+
:bucket_name => "bucket",
|
8
|
+
:access_key_id => "xxx",
|
9
|
+
:secret_access_key => "xxx"
|
10
|
+
}
|
11
|
+
end
|
12
|
+
|
13
|
+
let(:disk) { Mir::Disk::Amazon.new(settings) }
|
14
|
+
|
15
|
+
it "should default to 5mb chunk size for uploads" do
|
16
|
+
disk = Mir::Disk::Amazon.new(settings)
|
17
|
+
disk.chunk_size.should == 5*2**20
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
data/spec/index_spec.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe Mir::Index do
|
4
|
+
|
5
|
+
describe "#update" do
|
6
|
+
let(:index) { Mir::Index.new("/tmp", {}) }
|
7
|
+
let(:fake_file) { mock("file", :directory? => false) }
|
8
|
+
let(:resource) { mock("resource") }
|
9
|
+
|
10
|
+
before(:each) do
|
11
|
+
Mir::Models::AppSetting.should_receive(:last_indexed_at=)
|
12
|
+
Dir.should_receive(:glob).and_yield("filename")
|
13
|
+
File.should_receive(:new).and_return(fake_file)
|
14
|
+
resource.should_receive(:update_attribute).and_return(true)
|
15
|
+
index.stub!(:last_indexed_at)
|
16
|
+
end
|
17
|
+
|
18
|
+
it "adds an asset to the index if it has not yet been added" do
|
19
|
+
Mir::Models::Resource.should_receive(:find_by_filename).and_return(nil)
|
20
|
+
Mir::Models::Resource.should_receive(:create_from_file_and_name).and_return(resource)
|
21
|
+
index.update
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should flag a resource if the local copy is out of sync with the index" do
|
25
|
+
Mir::Models::Resource.should_receive(:find_by_filename).and_return(resource)
|
26
|
+
resource.should_receive(:synchronized?).and_return(false)
|
27
|
+
resource.should_receive(:flag_for_update).and_return(true)
|
28
|
+
index.update
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
end
|
data/spec/spec_helper.rb
CHANGED
data/spec/utils_spec.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
require "tempfile"
|
3
|
+
require "fileutils"
|
4
|
+
require "digest/md5"
|
5
|
+
|
6
|
+
describe Mir::Utils do
|
7
|
+
|
8
|
+
context "Splitting files" do
|
9
|
+
before(:all) do
|
10
|
+
@tmp_dir = "/tmp/splitter-tests"
|
11
|
+
FileUtils.mkdir(@tmp_dir) unless File.exist? @tmp_dir
|
12
|
+
@fake_file_path = "/tmp/512byteFile"
|
13
|
+
@file = open(@fake_file_path, "w") do |f|
|
14
|
+
f.write <<-EOS
|
15
|
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor
|
16
|
+
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud
|
17
|
+
exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute
|
18
|
+
irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla
|
19
|
+
pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia
|
20
|
+
deserunt mollit anim id est laborum.
|
21
|
+
EOS
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
after(:all) do
|
26
|
+
FileUtils.rm(Dir.glob(File.join(@tmp_dir, "*")))
|
27
|
+
FileUtils.rm(@fake_file_path)
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should create smaller files from one large file" do
|
31
|
+
path = Mir::Utils.split_file(@fake_file_path, 8, @tmp_dir)
|
32
|
+
split_files = Dir.glob(File.join(@tmp_dir, "*"))
|
33
|
+
split_files.size.should == 64
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should recombine smaller files into one large file" do
|
37
|
+
dest = "/tmp/recombined.txt"
|
38
|
+
path = Mir::Utils.recombine(@tmp_dir, dest)
|
39
|
+
Digest::MD5.file(dest).should == Digest::MD5.file(@fake_file_path)
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: mir
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.1.
|
5
|
+
version: 0.1.3
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Nate Miller
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-09-
|
13
|
+
date: 2011-09-22 00:00:00 -07:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
@@ -91,6 +91,8 @@ extra_rdoc_files: []
|
|
91
91
|
files:
|
92
92
|
- .gitignore
|
93
93
|
- Gemfile
|
94
|
+
- LICENSE
|
95
|
+
- README.md
|
94
96
|
- Rakefile
|
95
97
|
- bin/mir
|
96
98
|
- db/migrate/001_create_app_settings.rb
|
@@ -108,7 +110,10 @@ files:
|
|
108
110
|
- lib/mir/utils.rb
|
109
111
|
- lib/mir/version.rb
|
110
112
|
- mir.gemspec
|
113
|
+
- spec/disk/amazon_spec.rb
|
114
|
+
- spec/index_spec.rb
|
111
115
|
- spec/spec_helper.rb
|
116
|
+
- spec/utils_spec.rb
|
112
117
|
has_rdoc: true
|
113
118
|
homepage: ""
|
114
119
|
licenses: []
|
@@ -138,4 +143,7 @@ signing_key:
|
|
138
143
|
specification_version: 3
|
139
144
|
summary: A utility for backing up resources
|
140
145
|
test_files:
|
146
|
+
- spec/disk/amazon_spec.rb
|
147
|
+
- spec/index_spec.rb
|
141
148
|
- spec/spec_helper.rb
|
149
|
+
- spec/utils_spec.rb
|