nearline 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/nearline.rb +3 -0
- data/lib/nearline/archived_file.rb +32 -95
- data/lib/nearline/block.rb +14 -29
- data/lib/nearline/file_content.rb +13 -32
- data/lib/nearline/file_sequencer.rb +130 -0
- data/lib/nearline/manifest.rb +201 -18
- data/lib/nearline/module_methods.rb +29 -12
- data/lib/nearline/schema.rb +2 -2
- data/lib/nearline/system.rb +7 -7
- data/tasks/gemspec.rake +1 -1
- metadata +57 -49
data/lib/nearline.rb
CHANGED
@@ -4,106 +4,43 @@ module Nearline
|
|
4
4
|
# Represents file metadata and possible related FileContent
|
5
5
|
# for a single file on a single system
|
6
6
|
class ArchivedFile < ActiveRecord::Base
|
7
|
+
require 'digest/sha1'
|
7
8
|
require 'fileutils'
|
8
9
|
|
9
10
|
belongs_to :file_content
|
10
11
|
belongs_to :system
|
11
12
|
has_and_belongs_to_many :manifests
|
12
13
|
|
13
|
-
|
14
|
-
def self.create_for(
|
15
|
-
|
16
|
-
|
17
|
-
# The path doesn't actually exist and fails a File.stat
|
14
|
+
|
15
|
+
def self.create_for(file_information)
|
16
|
+
# The path doesn't actually exist and fails a File.lstat
|
18
17
|
return nil if file_information.path_hash.nil?
|
19
|
-
|
20
|
-
# If we find an exising entry, use it
|
21
|
-
hash = manifest.system.archived_file_lookup_hash
|
22
|
-
hit = hash[file_information.path_hash]
|
23
|
-
|
24
|
-
unless hit.nil?
|
25
|
-
af = ArchivedFile.find(hit)
|
26
|
-
manifest.archived_files << af
|
27
|
-
return af
|
28
|
-
end
|
29
18
|
|
30
19
|
# We need to create a record for either a directory or file
|
31
20
|
archived_file = ArchivedFile.new(
|
32
21
|
file_information.archived_file_parameters
|
33
22
|
)
|
34
|
-
|
23
|
+
|
35
24
|
# Find a new directory
|
36
25
|
if (file_information.is_directory)
|
37
26
|
archived_file.save!
|
38
|
-
manifest.archived_files << archived_file
|
27
|
+
file_information.manifest.archived_files << archived_file
|
39
28
|
return archived_file
|
40
29
|
end
|
41
30
|
|
42
|
-
# Find a new file that needs persisted
|
31
|
+
# Find a new file that needs persisted
|
43
32
|
archived_file.file_content.file_size =
|
44
33
|
[file_information.stat.size].pack('Q').unpack('L').first # HACK for Windows
|
45
|
-
archived_file = archived_file.persist(manifest)
|
46
|
-
archived_file.
|
47
|
-
|
34
|
+
archived_file = archived_file.persist(file_information.manifest)
|
35
|
+
unless archived_file.nil? || archived_file.frozen?
|
36
|
+
archived_file.save!
|
37
|
+
file_information.manifest.archived_files << archived_file
|
38
|
+
end
|
48
39
|
archived_file
|
49
40
|
|
50
41
|
# TODO: Symbolic links, block devices, ...?
|
51
42
|
end
|
52
|
-
|
53
|
-
class FileInformation
|
54
|
-
attr_reader :path_hash, :stat, :is_directory, :archived_file_parameters
|
55
|
-
def initialize(file_path, manifest)
|
56
|
-
@manifest = manifest
|
57
|
-
@file_path = file_path
|
58
|
-
@stat = read_stat
|
59
|
-
@is_directory = File.directory?(file_path)
|
60
|
-
@path_hash = generate_path_hash
|
61
|
-
@archived_file_parameters = build_parameters
|
62
|
-
end
|
63
|
-
|
64
|
-
def read_stat
|
65
|
-
stat = nil
|
66
|
-
begin
|
67
|
-
stat = File.stat(@file_path)
|
68
|
-
rescue
|
69
|
-
@manifest.add_log("File not found on stat: #{@file_path}")
|
70
|
-
end
|
71
|
-
stat
|
72
|
-
end
|
73
|
-
|
74
|
-
def generate_path_hash
|
75
|
-
return nil if @stat.nil?
|
76
|
-
target = [@manifest.system.name,
|
77
|
-
@file_path,
|
78
|
-
@stat.uid,
|
79
|
-
@stat.gid,
|
80
|
-
@stat.mtime.to_i,
|
81
|
-
@stat.mode].join(':')
|
82
|
-
Digest::SHA1.hexdigest(target)
|
83
|
-
end
|
84
|
-
|
85
|
-
def file_content_entry_for_files_only
|
86
|
-
return FileContent.new unless @is_directory
|
87
|
-
return nil
|
88
|
-
end
|
89
|
-
|
90
|
-
def build_parameters
|
91
|
-
return nil if @stat.nil?
|
92
|
-
{
|
93
|
-
:system => @manifest.system,
|
94
|
-
:path => @file_path,
|
95
|
-
:path_hash => @path_hash,
|
96
|
-
:file_content => file_content_entry_for_files_only,
|
97
|
-
:uid => @stat.uid,
|
98
|
-
:gid => @stat.gid,
|
99
|
-
:mtime => @stat.mtime.to_i,
|
100
|
-
:mode => @stat.mode,
|
101
|
-
:is_directory => @is_directory
|
102
|
-
}
|
103
|
-
end
|
104
|
-
|
105
|
-
end
|
106
|
-
|
43
|
+
|
107
44
|
def restore(*args)
|
108
45
|
@options = args.extract_options!
|
109
46
|
if (self.is_directory)
|
@@ -158,20 +95,20 @@ module Nearline
|
|
158
95
|
# won't know that until we complete the process and have to
|
159
96
|
# clean up our mess.
|
160
97
|
def persist(manifest)
|
161
|
-
|
162
|
-
file_size = 0
|
98
|
+
seq = nil
|
163
99
|
begin
|
164
|
-
|
100
|
+
seq = read_file
|
165
101
|
rescue
|
166
|
-
|
102
|
+
error = "Got error '#{$!}' on path: #{self.path}"
|
103
|
+
manifest.add_log error
|
167
104
|
self.orphan_check
|
168
105
|
return nil
|
169
106
|
end
|
170
|
-
|
171
|
-
size_check(file_size, manifest)
|
107
|
+
|
108
|
+
size_check(seq.file_size, manifest)
|
172
109
|
|
173
110
|
# Do we have a unique sequence?
|
174
|
-
key =
|
111
|
+
key = seq.fingerprint
|
175
112
|
return self if unique_sequence_processed?(key, manifest)
|
176
113
|
|
177
114
|
# Handle the case where the sequence is not unique...
|
@@ -180,19 +117,14 @@ module Nearline
|
|
180
117
|
self
|
181
118
|
end
|
182
119
|
|
183
|
-
def
|
184
|
-
sequencer = FileSequencer.new(self.file_content)
|
185
|
-
file_size = 0
|
186
|
-
buffer = ""
|
120
|
+
def read_file
|
187
121
|
File.open(self.path, "rb") do |io|
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
whole_file_hash.update(buffer)
|
192
|
-
sequencer.preserve_content(buffer)
|
122
|
+
seq = FileSequencer.new(io, self.file_content)
|
123
|
+
while (!io.eof)
|
124
|
+
seq.persist_segment
|
193
125
|
end
|
126
|
+
return seq
|
194
127
|
end
|
195
|
-
return file_size
|
196
128
|
end
|
197
129
|
|
198
130
|
def size_check(file_size, manifest)
|
@@ -222,10 +154,15 @@ module Nearline
|
|
222
154
|
false
|
223
155
|
end
|
224
156
|
|
157
|
+
# In the special case of an identical sequence existing,
|
158
|
+
# we can safely delete all related sequences and then destroy
|
159
|
+
# the file content object without the (far slower) orphan checking
|
160
|
+
# process
|
225
161
|
def clean_up_duplicate_content
|
226
|
-
self.file_content.
|
162
|
+
Sequence.delete_all "file_content_id = #{self.file_content.id}"
|
163
|
+
self.file_content.destroy
|
227
164
|
end
|
228
|
-
|
165
|
+
|
229
166
|
def replace_content(key)
|
230
167
|
self.file_content = FileContent.find_by_fingerprint(key)
|
231
168
|
self.save!
|
data/lib/nearline/block.rb
CHANGED
@@ -12,12 +12,21 @@ module Nearline
|
|
12
12
|
|
13
13
|
has_many :sequences
|
14
14
|
|
15
|
-
|
15
|
+
# Maximum block size in bytes
|
16
|
+
@@max_block_size = (64 * 1024)-1
|
17
|
+
cattr_accessor :max_block_size
|
18
|
+
|
19
|
+
# Level of block compression attempted
|
20
|
+
# 0 = skip compression entirely
|
21
|
+
@@block_compression_level = 5
|
22
|
+
cattr_accessor :block_compression_level
|
16
23
|
|
17
24
|
def attempt_compression
|
18
|
-
return if (self.is_compressed)
|
19
|
-
|
20
|
-
|
25
|
+
return if (self.is_compressed || @@block_compression_level == 0)
|
26
|
+
candidate_content = Zlib::Deflate.deflate(
|
27
|
+
self.bulk_content,
|
28
|
+
@@block_compression_level
|
29
|
+
)
|
21
30
|
if candidate_content.length < self.bulk_content.length
|
22
31
|
self.is_compressed = true
|
23
32
|
self.bulk_content = candidate_content
|
@@ -37,31 +46,7 @@ module Nearline
|
|
37
46
|
end
|
38
47
|
@content = self.bulk_content
|
39
48
|
end
|
40
|
-
|
41
|
-
def self.id_for_content(x)
|
42
|
-
block = Block.new(:bulk_content => x)
|
43
|
-
block.calculate_fingerprint
|
44
|
-
hit = Block.connection.select_one(
|
45
|
-
"select id from blocks where fingerprint='#{block.fingerprint}'"
|
46
|
-
)
|
47
|
-
unless hit.nil?
|
48
|
-
return hit['id']
|
49
|
-
end
|
50
|
-
block.attempt_compression
|
51
|
-
block.save!
|
52
|
-
block.id
|
53
|
-
end
|
54
|
-
|
55
|
-
def self.for_content(x)
|
56
|
-
block = Models::Block.new(:bulk_content => x)
|
57
|
-
block.calculate_fingerprint
|
58
|
-
found = find_by_fingerprint(block.fingerprint)
|
59
|
-
return found if !found.nil?
|
60
|
-
block.attempt_compression
|
61
|
-
block.save!
|
62
|
-
block
|
63
|
-
end
|
64
|
-
|
49
|
+
|
65
50
|
def orphan_check
|
66
51
|
if self.sequences.size == 0
|
67
52
|
self.destroy
|
@@ -4,7 +4,7 @@ module Nearline
|
|
4
4
|
# Has the responsibility of identifying, restoring and
|
5
5
|
# verifying content
|
6
6
|
class FileContent < ActiveRecord::Base
|
7
|
-
has_many :sequences
|
7
|
+
has_many :sequences, :order => "sequence"
|
8
8
|
has_many :archived_files
|
9
9
|
|
10
10
|
def orphan_check
|
@@ -23,22 +23,25 @@ module Nearline
|
|
23
23
|
return hit.nil?
|
24
24
|
end
|
25
25
|
|
26
|
-
|
26
|
+
private
|
27
|
+
|
28
|
+
def each_sequence
|
27
29
|
sequences.each do |seq|
|
28
30
|
block = Block.find(seq.block_id)
|
29
|
-
|
31
|
+
yield block
|
30
32
|
end
|
31
33
|
end
|
34
|
+
|
35
|
+
public
|
36
|
+
|
37
|
+
def restore_to(io)
|
38
|
+
each_sequence { |block| io.write(block.content) }
|
39
|
+
end
|
32
40
|
|
33
41
|
def verified?
|
34
|
-
if (!self.verified_at.nil?)
|
35
|
-
return true
|
36
|
-
end
|
42
|
+
return true if (!self.verified_at.nil?)
|
37
43
|
whole_file_hash = Digest::SHA1.new
|
38
|
-
|
39
|
-
block = Block.find(seq.block_id)
|
40
|
-
whole_file_hash.update(block.content)
|
41
|
-
end
|
44
|
+
each_sequence { |block| whole_file_hash.update(block.content) }
|
42
45
|
if fingerprint == whole_file_hash.hexdigest
|
43
46
|
self.verified_at = Time.now
|
44
47
|
self.save!
|
@@ -47,7 +50,6 @@ module Nearline
|
|
47
50
|
false
|
48
51
|
end
|
49
52
|
|
50
|
-
|
51
53
|
end
|
52
54
|
|
53
55
|
# Has the responsibility of preserving
|
@@ -61,26 +63,5 @@ module Nearline
|
|
61
63
|
end
|
62
64
|
end
|
63
65
|
|
64
|
-
class FileSequencer
|
65
|
-
def initialize(file_content)
|
66
|
-
@inc = 0
|
67
|
-
@file_content = file_content
|
68
|
-
@file_content.save!
|
69
|
-
end
|
70
|
-
|
71
|
-
def preserve_content(content)
|
72
|
-
@inc += 1
|
73
|
-
block_id = Block.id_for_content(content)
|
74
|
-
sequence = Sequence.new(
|
75
|
-
:sequence => @inc,
|
76
|
-
:file_content_id => @file_content.id,
|
77
|
-
:block_id => block_id
|
78
|
-
)
|
79
|
-
sequence.save!
|
80
|
-
sequence
|
81
|
-
end
|
82
|
-
|
83
|
-
end
|
84
|
-
|
85
66
|
end
|
86
67
|
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
module Nearline
|
2
|
+
module Models
|
3
|
+
|
4
|
+
# Used for mass block entry and sequencing
|
5
|
+
class FileSequencer
|
6
|
+
attr_reader :file_size
|
7
|
+
|
8
|
+
# Number of blocks to serialize in a batch
|
9
|
+
@@max_blocks = 500;
|
10
|
+
cattr_accessor :max_blocks
|
11
|
+
|
12
|
+
def initialize(io, file_content)
|
13
|
+
@io = io
|
14
|
+
@file_content = file_content
|
15
|
+
if (@file_content.id.nil?)
|
16
|
+
@file_content.save!
|
17
|
+
end
|
18
|
+
@s = []
|
19
|
+
@b = []
|
20
|
+
@file_size = 0
|
21
|
+
@offset = 0
|
22
|
+
@whole_file_hash = Digest::SHA1.new
|
23
|
+
end
|
24
|
+
|
25
|
+
def fingerprint
|
26
|
+
@whole_file_hash.hexdigest
|
27
|
+
end
|
28
|
+
|
29
|
+
def persist_segment
|
30
|
+
pull_blocks
|
31
|
+
sequence_known_blocks
|
32
|
+
attempt_compression_of_remaining_blocks
|
33
|
+
insert_new_blocks
|
34
|
+
sequence_known_blocks
|
35
|
+
insert_sequences
|
36
|
+
clear_for_next_persist
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def clear_for_next_persist
|
42
|
+
@s = []
|
43
|
+
@b = []
|
44
|
+
@offset += @@max_blocks
|
45
|
+
end
|
46
|
+
|
47
|
+
def sequence_known_blocks
|
48
|
+
f = found_fingerprint_map
|
49
|
+
add_sequence_entries_clearing_blocks(f)
|
50
|
+
end
|
51
|
+
|
52
|
+
def found_fingerprint_map
|
53
|
+
f = {}
|
54
|
+
fp_raw = []
|
55
|
+
@b.each {|a| fp_raw << a.fingerprint unless a.nil?}
|
56
|
+
return f if fp_raw.size == 0
|
57
|
+
fingerprints = fp_raw.collect {|fp| "'#{fp}'"}.join(', ')
|
58
|
+
query = "select distinct id, fingerprint from blocks "+
|
59
|
+
"where fingerprint in (#{fingerprints})"
|
60
|
+
r = Nearline::Models::Block.connection.select_all(query)
|
61
|
+
r.each { |e| f[e["fingerprint"]] = e["id"] }
|
62
|
+
f
|
63
|
+
end
|
64
|
+
|
65
|
+
def add_sequence_entries_clearing_blocks(f)
|
66
|
+
@b.size.times do |i|
|
67
|
+
block = @b[i]
|
68
|
+
unless block.nil?
|
69
|
+
if f[block.fingerprint]
|
70
|
+
@s.push(Sequence.new(
|
71
|
+
:sequence => i + @offset + 1,
|
72
|
+
:block_id => f[block.fingerprint],
|
73
|
+
:file_content_id => @file_content.id
|
74
|
+
))
|
75
|
+
@b[i] = nil
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def insert_sequences
|
82
|
+
@s.each do |s|
|
83
|
+
s.save!
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def attempt_compression_of_remaining_blocks
|
88
|
+
f = {}
|
89
|
+
@b.each do |block|
|
90
|
+
unless block.nil? or f[block.fingerprint]
|
91
|
+
block.attempt_compression
|
92
|
+
f[block.fingerprint] = true
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def insert_new_blocks
|
98
|
+
f = {}
|
99
|
+
@b.each do |b|
|
100
|
+
unless b.nil? || f[b.fingerprint]
|
101
|
+
b.save!
|
102
|
+
f[b.fingerprint] = true
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def pull_blocks
|
108
|
+
count = 0
|
109
|
+
while (!@io.eof && count < @@max_blocks)
|
110
|
+
count += 1
|
111
|
+
|
112
|
+
# Move to Block
|
113
|
+
buffer = @io.read(Block.max_block_size)
|
114
|
+
|
115
|
+
@file_size += buffer.size
|
116
|
+
|
117
|
+
# Move to Block
|
118
|
+
blk = Block.new(:bulk_content => buffer)
|
119
|
+
@whole_file_hash.update(buffer)
|
120
|
+
|
121
|
+
# Move to Block
|
122
|
+
blk.calculate_fingerprint
|
123
|
+
@b << blk
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
130
|
+
end
|
data/lib/nearline/manifest.rb
CHANGED
@@ -5,10 +5,7 @@ module Nearline
|
|
5
5
|
class FileFinder
|
6
6
|
require 'find'
|
7
7
|
def self.recurse(paths, exclusions)
|
8
|
-
regex_exclusions =
|
9
|
-
for exclusion in exclusions
|
10
|
-
regex_exclusions << /#{exclusion}/
|
11
|
-
end
|
8
|
+
regex_exclusions = exclusion_regexes(exclusions)
|
12
9
|
paths.each do |path|
|
13
10
|
Find.find(path) do |f|
|
14
11
|
regex_exclusions.each do |ex|
|
@@ -18,6 +15,72 @@ module Nearline
|
|
18
15
|
end
|
19
16
|
end
|
20
17
|
end
|
18
|
+
|
19
|
+
def self.exclusion_regexes(exclusions)
|
20
|
+
regex_exclusions = []
|
21
|
+
for exclusion in exclusions
|
22
|
+
regex_exclusions << /#{exclusion}/
|
23
|
+
end
|
24
|
+
regex_exclusions
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Handles file paths and metadata for a file in a manifest
|
29
|
+
class FileInformation
|
30
|
+
attr_reader :path_hash, :stat, :is_directory,
|
31
|
+
:archived_file_parameters, :manifest, :file_path
|
32
|
+
|
33
|
+
def initialize(file_path, manifest)
|
34
|
+
@manifest = manifest
|
35
|
+
@file_path = file_path
|
36
|
+
@stat = read_stat
|
37
|
+
@is_directory = File.directory?(file_path)
|
38
|
+
@path_hash = generate_path_hash
|
39
|
+
@archived_file_parameters = build_parameters
|
40
|
+
end
|
41
|
+
|
42
|
+
def read_stat
|
43
|
+
stat = nil
|
44
|
+
begin
|
45
|
+
# TODO: change to lstat when we handle links
|
46
|
+
stat = File.stat(@file_path)
|
47
|
+
rescue
|
48
|
+
@manifest.add_log("File not found on stat: #{@file_path}")
|
49
|
+
end
|
50
|
+
stat
|
51
|
+
end
|
52
|
+
|
53
|
+
def generate_path_hash
|
54
|
+
return nil if @stat.nil?
|
55
|
+
target = [@manifest.system.name,
|
56
|
+
@file_path,
|
57
|
+
@stat.uid,
|
58
|
+
@stat.gid,
|
59
|
+
@stat.mtime.to_i,
|
60
|
+
@stat.mode].join(':')
|
61
|
+
Digest::SHA1.hexdigest(target)
|
62
|
+
end
|
63
|
+
|
64
|
+
def file_content_entry_for_files_only
|
65
|
+
return FileContent.new unless @is_directory
|
66
|
+
return nil
|
67
|
+
end
|
68
|
+
|
69
|
+
def build_parameters
|
70
|
+
return nil if @stat.nil?
|
71
|
+
{
|
72
|
+
:system => @manifest.system,
|
73
|
+
:path => @file_path,
|
74
|
+
:path_hash => @path_hash,
|
75
|
+
:file_content => file_content_entry_for_files_only,
|
76
|
+
:uid => @stat.uid,
|
77
|
+
:gid => @stat.gid,
|
78
|
+
:mtime => @stat.mtime.to_i,
|
79
|
+
:mode => @stat.mode,
|
80
|
+
:is_directory => @is_directory
|
81
|
+
}
|
82
|
+
end
|
83
|
+
|
21
84
|
end
|
22
85
|
|
23
86
|
# A Manifest represents the corpus of ArchivedFiles and
|
@@ -32,7 +95,11 @@ module Nearline
|
|
32
95
|
attr_accessor :backup_paths
|
33
96
|
# Just needed when you create a manifest
|
34
97
|
attr_accessor :backup_exclusions
|
35
|
-
|
98
|
+
|
99
|
+
# Maximum number of files to stat and process in a batch
|
100
|
+
@@max_files_cached = 10000
|
101
|
+
cattr_accessor :max_files_cached
|
102
|
+
|
36
103
|
def self.new_for_name(system_name)
|
37
104
|
system = System.for_name(system_name)
|
38
105
|
system.manifests << m = Nearline::Models::Manifest.new
|
@@ -43,10 +110,44 @@ module Nearline
|
|
43
110
|
def self.backup(system, backup_paths, backup_exclusions)
|
44
111
|
manifest = self.new(:system => system)
|
45
112
|
manifest.save!
|
113
|
+
manifest.backup(backup_paths, backup_exclusions)
|
114
|
+
end
|
115
|
+
|
116
|
+
def backup(backup_paths, backup_exclusions)
|
117
|
+
FileFinder.recurse(backup_paths, backup_exclusions) do |file_path|
|
118
|
+
handle_file_path(file_path)
|
119
|
+
end
|
120
|
+
finish_remaining_file_infos
|
46
121
|
|
47
|
-
|
48
|
-
|
49
|
-
|
122
|
+
self.completed_at = Time.now
|
123
|
+
self.save!
|
124
|
+
self
|
125
|
+
end
|
126
|
+
|
127
|
+
def handle_file_path(file_path)
|
128
|
+
@file_infos = @file_infos || []
|
129
|
+
@file_infos << FileInformation.new(file_path, self)
|
130
|
+
|
131
|
+
if @file_infos.size > @@max_files_cached
|
132
|
+
process_file_infos
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def finish_remaining_file_infos
|
137
|
+
process_file_infos
|
138
|
+
end
|
139
|
+
|
140
|
+
def process_file_infos
|
141
|
+
return if @file_infos.size == 0
|
142
|
+
|
143
|
+
lookup = existing_archived_file_lookup
|
144
|
+
@file_infos.each do |file_info|
|
145
|
+
$stdout.write file_info.file_path + " "
|
146
|
+
if (af = lookup[file_info.path_hash])
|
147
|
+
self.archived_files << af
|
148
|
+
else
|
149
|
+
af = ArchivedFile.create_for(file_info)
|
150
|
+
end
|
50
151
|
if (!af.nil?)
|
51
152
|
$stdout.write "#{Time.at(af.mtime).asctime}"
|
52
153
|
if (!af.file_content.nil?)
|
@@ -55,12 +156,18 @@ module Nearline
|
|
55
156
|
$stdout.write("\n")
|
56
157
|
end
|
57
158
|
end
|
58
|
-
|
59
|
-
manifest.completed_at = Time.now
|
60
|
-
manifest.save!
|
61
|
-
manifest
|
159
|
+
@file_infos = []
|
62
160
|
end
|
63
161
|
|
162
|
+
def existing_archived_file_lookup
|
163
|
+
return {} if @file_infos.size == 0
|
164
|
+
path_hashes = @file_infos.collect {|e| "'#{e.path_hash}'"}.join(", ")
|
165
|
+
conditions = "path_hash in (#{path_hashes})"
|
166
|
+
hits = ArchivedFile.find(:all, :conditions => conditions)
|
167
|
+
existing_files = {}
|
168
|
+
hits.each { |e| existing_files[e.path_hash] = e }
|
169
|
+
existing_files
|
170
|
+
end
|
64
171
|
|
65
172
|
# Find all Manifest entries (across all Systems) which have never finished.
|
66
173
|
#
|
@@ -73,17 +180,24 @@ module Nearline
|
|
73
180
|
|
74
181
|
def self.restore_all_missing(system, latest_date_time = Time.now)
|
75
182
|
manifest = system.latest_manifest_as_of(latest_date_time)
|
76
|
-
manifest.
|
183
|
+
manifest.iterate_all_missing do |af|
|
184
|
+
af.restore
|
185
|
+
end
|
77
186
|
end
|
78
187
|
|
79
|
-
|
80
|
-
|
188
|
+
def self.what_would_restore(system, latest_date_time = Time.now)
|
189
|
+
manifest = system.latest_manifest_as_of(latest_date_time)
|
190
|
+
manifest.iterate_all_missing {}
|
191
|
+
end
|
192
|
+
|
193
|
+
# Iterate all missing files from this manifest, yielding each
|
194
|
+
def iterate_all_missing
|
81
195
|
files_restored = []
|
82
196
|
self.archived_files.each do |af|
|
83
197
|
begin
|
84
198
|
File.stat(af.path)
|
85
199
|
rescue
|
86
|
-
af
|
200
|
+
yield af
|
87
201
|
files_restored << af.path
|
88
202
|
end
|
89
203
|
end
|
@@ -97,14 +211,83 @@ module Nearline
|
|
97
211
|
end
|
98
212
|
|
99
213
|
def before_destroy
|
100
|
-
|
214
|
+
destroy_archived_files_with_content
|
215
|
+
destroy_archived_files_without_content
|
216
|
+
destroy_archived_files_manifests
|
217
|
+
destroy_logs
|
218
|
+
self.destroy_without_habtm_shim_for_archived_files
|
219
|
+
end
|
220
|
+
|
221
|
+
private
|
222
|
+
|
223
|
+
def archived_file_content_query(op)
|
224
|
+
<<-END_SQL
|
225
|
+
select distinct fc.id
|
226
|
+
from archived_files af,
|
227
|
+
archived_files_manifests afm, file_contents fc
|
228
|
+
where
|
229
|
+
afm.manifest_id #{op} #{self.id} and
|
230
|
+
afm.archived_file_id = af.id and
|
231
|
+
af.file_content_id = fc.id
|
232
|
+
END_SQL
|
233
|
+
end
|
234
|
+
|
235
|
+
def destroy_archived_files_with_content
|
236
|
+
|
237
|
+
fc_in = self.connection.select_all(archived_file_content_query("=")).collect{|e| e["id"]}
|
238
|
+
fc_out = self.connection.select_all(archived_file_content_query("!=")).collect{|e| e["id"]}
|
239
|
+
fc_to_destroy = (fc_in - fc_out).join ", "
|
240
|
+
|
241
|
+
if (fc_to_destroy.size > 0)
|
242
|
+
af_to_destroy = self.connection.select_all(<<-END_QUERY
|
243
|
+
select af.id from archived_files af, archived_files_manifests afm
|
244
|
+
where afm.manifest_id=#{self.id} and afm.archived_file_id = af.id and
|
245
|
+
af.file_content_id in (#{fc_to_destroy})
|
246
|
+
END_QUERY
|
247
|
+
).collect{|e| e["id"]}
|
248
|
+
else
|
249
|
+
af_to_destroy = []
|
250
|
+
end
|
251
|
+
|
252
|
+
Nearline::Models::ArchivedFile.find(af_to_destroy).each do |af|
|
253
|
+
af.orphan_check
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
def archived_files_query(op)
|
258
|
+
<<-END_QUERY
|
259
|
+
select distinct af.id
|
260
|
+
from archived_files af,
|
261
|
+
archived_files_manifests afm
|
262
|
+
where af.file_content_id is null and
|
263
|
+
af.id = afm.archived_file_id and
|
264
|
+
afm.manifest_id #{op} #{self.id}
|
265
|
+
END_QUERY
|
266
|
+
end
|
267
|
+
|
268
|
+
def destroy_archived_files_without_content
|
269
|
+
af_in = self.connection.select_all(archived_files_query("=")).collect{|e| e["id"]}
|
270
|
+
af_out = self.connection.select_all(archived_files_query("!=")).collect{|e| e["id"]}
|
271
|
+
|
272
|
+
af_to_destroy = Nearline::Models::ArchivedFile.find(af_in - af_out)
|
273
|
+
af_to_destroy.each do |af|
|
101
274
|
af.orphan_check
|
102
275
|
end
|
276
|
+
end
|
277
|
+
|
278
|
+
|
279
|
+
def destroy_archived_files_manifests
|
280
|
+
self.connection.delete("delete from archived_files_manifests where manifest_id=#{self.id}")
|
281
|
+
end
|
282
|
+
|
283
|
+
def destroy_logs
|
103
284
|
logs.each do |log|
|
104
285
|
log.destroy
|
105
|
-
end
|
286
|
+
end
|
106
287
|
end
|
107
288
|
|
289
|
+
public
|
290
|
+
|
108
291
|
def total_size
|
109
292
|
size = 0
|
110
293
|
archived_files.each do |af|
|
@@ -1,8 +1,10 @@
|
|
1
1
|
module Nearline
|
2
2
|
module_function
|
3
3
|
|
4
|
-
#
|
5
|
-
VERSION = "0.0.
|
4
|
+
# Version of the software
|
5
|
+
VERSION = "0.0.5"
|
6
|
+
# Last version that changed the database structure
|
7
|
+
DB_VERSION = "0.0.4"
|
6
8
|
|
7
9
|
# Array of every Nearline Model using an ActiveRecord connection
|
8
10
|
AR_MODELS = Nearline::Models.constants.map do |m|
|
@@ -21,6 +23,8 @@ module Nearline
|
|
21
23
|
# Stomps on any ActiveRecord::Base.establish_connection you might
|
22
24
|
# have already established.
|
23
25
|
#
|
26
|
+
# ***NOTE: MYSQL is the only recommended database at this time.***
|
27
|
+
#
|
24
28
|
# === Examples
|
25
29
|
# Nearline.connect!({:adapter => 'sqlite3', :database => 'data/sqlite.db'})
|
26
30
|
#
|
@@ -49,6 +53,9 @@ module Nearline
|
|
49
53
|
#
|
50
54
|
# Accepts a Hash to establish the connection or
|
51
55
|
# a String referring to an entry in config/database.yml.
|
56
|
+
#
|
57
|
+
# ***NOTE: MYSQL is the only recommended database at this time.***
|
58
|
+
#
|
52
59
|
# === Examples
|
53
60
|
# Nearline.connect({:adapter => 'sqlite3', :database => 'data/sqlite.db'})
|
54
61
|
#
|
@@ -90,9 +97,7 @@ module Nearline
|
|
90
97
|
# Nearline.backup('my_laptop', ['/home/me', '/var/svn']
|
91
98
|
#
|
92
99
|
def backup(system_name, backup_paths,backup_exclusions= [])
|
93
|
-
|
94
|
-
raise SchemaVersionException.for_version(schema_version)
|
95
|
-
end
|
100
|
+
raise_failing_version_check
|
96
101
|
Nearline::Models::System.backup(
|
97
102
|
system_name,
|
98
103
|
Utilities.string_to_array(backup_paths),
|
@@ -120,13 +125,17 @@ module Nearline
|
|
120
125
|
#
|
121
126
|
# Returns an Array of paths restored
|
122
127
|
def restore(system_name, latest_date_time = Time.now)
|
123
|
-
|
124
|
-
raise SchemaVersionException.for_version(schema_version)
|
125
|
-
end
|
128
|
+
raise_failing_version_check
|
126
129
|
Nearline::Models::System.restore_all_missing(system_name, latest_date_time)
|
127
130
|
end
|
128
131
|
|
129
|
-
|
132
|
+
# Returns an array of paths that would be restored given the provided
|
133
|
+
# parameters
|
134
|
+
def what_would_restore(system_name, latest_date_time = Time.now)
|
135
|
+
raise_failing_version_check
|
136
|
+
Nearline::Models::System.what_would_restore(system_name, latest_date_time)
|
137
|
+
end
|
138
|
+
|
130
139
|
# Returns the nearline version of the database
|
131
140
|
def schema_version
|
132
141
|
begin
|
@@ -138,15 +147,23 @@ module Nearline
|
|
138
147
|
end
|
139
148
|
end
|
140
149
|
|
150
|
+
def raise_failing_version_check
|
151
|
+
unless version_check?
|
152
|
+
raise SchemaVersionException.for_version(schema_version)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
141
156
|
# Returns true only if the Nearline version matches the schema
|
142
157
|
def version_check?
|
143
|
-
Nearline::
|
158
|
+
Nearline::DB_VERSION == schema_version()
|
144
159
|
end
|
145
160
|
|
146
161
|
class SchemaVersionException < Exception
|
147
162
|
def self.for_version(v)
|
148
|
-
SchemaVersionException.new(
|
149
|
-
|
163
|
+
SchemaVersionException.new(<<-END_ERROR)
|
164
|
+
Schema #{v} is not the same version as nearline database
|
165
|
+
version #{Nearline::DB_VERSION} used in Nearline #{Nearline::VERSION}!
|
166
|
+
END_ERROR
|
150
167
|
end
|
151
168
|
end
|
152
169
|
|
data/lib/nearline/schema.rb
CHANGED
@@ -18,7 +18,7 @@ module Nearline
|
|
18
18
|
end
|
19
19
|
|
20
20
|
def empty_schema
|
21
|
-
Nearline::Models::
|
21
|
+
Nearline::Models::System.destroy_all
|
22
22
|
end
|
23
23
|
|
24
24
|
def generate_schema
|
@@ -99,7 +99,7 @@ module Nearline
|
|
99
99
|
t.column :version, :string
|
100
100
|
end
|
101
101
|
|
102
|
-
execute "insert into nearline_version (version) values ('#{Nearline::
|
102
|
+
execute "insert into nearline_version (version) values ('#{Nearline::DB_VERSION}')"
|
103
103
|
end
|
104
104
|
end
|
105
105
|
|
data/lib/nearline/system.rb
CHANGED
@@ -52,15 +52,15 @@ module Nearline
|
|
52
52
|
Manifest.restore_all_missing(self, latest_date_time)
|
53
53
|
end
|
54
54
|
|
55
|
-
def
|
56
|
-
|
57
|
-
|
58
|
-
for af in self.archived_files
|
59
|
-
@lookup_hash[af.path_hash] = af.id
|
60
|
-
end
|
61
|
-
@lookup_hash
|
55
|
+
def self.what_would_restore(system_name, latest_date_time)
|
56
|
+
system = self.for_name(system_name)
|
57
|
+
system.what_would_restore(latest_date_time)
|
62
58
|
end
|
63
59
|
|
60
|
+
def what_would_restore(latest_date_time = Time.now)
|
61
|
+
Manifest.what_would_restore(self, latest_date_time)
|
62
|
+
end
|
63
|
+
|
64
64
|
def before_destroy
|
65
65
|
for manifest in self.manifests
|
66
66
|
manifest.destroy
|
data/tasks/gemspec.rake
CHANGED
@@ -9,6 +9,7 @@ SPEC = Gem::Specification.new do |s|
|
|
9
9
|
s.version = Nearline::VERSION
|
10
10
|
s.author = "Robert J. Osborne"
|
11
11
|
s.email = "rjo1970@gmail.com"
|
12
|
+
s.homepage = "http://rubyforge.org/projects/nearline"
|
12
13
|
s.summary = "Nearline is a near-line backup and recovery solution"
|
13
14
|
s.description = %{
|
14
15
|
Nearline is a library to make managing near-line file repositories
|
@@ -18,7 +19,6 @@ SPEC = Gem::Specification.new do |s|
|
|
18
19
|
s.files = FileList["{tests,lib,doc,tasks}/**/*"].exclude("rdoc").to_a
|
19
20
|
s.add_dependency("activerecord", '>= 2.0.2')
|
20
21
|
s.require_path = "lib"
|
21
|
-
s.autorequire = "nearline"
|
22
22
|
s.test_file = "test/nearline_test.rb"
|
23
23
|
s.has_rdoc = true
|
24
24
|
end
|
metadata
CHANGED
@@ -1,67 +1,75 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.4
|
3
|
-
specification_version: 1
|
4
2
|
name: nearline
|
5
3
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.0.
|
7
|
-
date: 2008-04-18 00:00:00 -04:00
|
8
|
-
summary: Nearline is a near-line backup and recovery solution
|
9
|
-
require_paths:
|
10
|
-
- lib
|
11
|
-
email: rjo1970@gmail.com
|
12
|
-
homepage:
|
13
|
-
rubyforge_project: nearline
|
14
|
-
description: Nearline is a library to make managing near-line file repositories simple and elegant in pure Ruby.
|
15
|
-
autorequire: nearline
|
16
|
-
default_executable:
|
17
|
-
bindir: bin
|
18
|
-
has_rdoc: true
|
19
|
-
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
-
requirements:
|
21
|
-
- - ">"
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: 0.0.0
|
24
|
-
version:
|
4
|
+
version: 0.0.5
|
25
5
|
platform: ruby
|
26
|
-
signing_key:
|
27
|
-
cert_chain:
|
28
|
-
post_install_message:
|
29
6
|
authors:
|
30
7
|
- Robert J. Osborne
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-05-22 00:00:00 -04:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: activerecord
|
17
|
+
version_requirement:
|
18
|
+
version_requirements: !ruby/object:Gem::Requirement
|
19
|
+
requirements:
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 2.0.2
|
23
|
+
version:
|
24
|
+
description: Nearline is a library to make managing near-line file repositories simple and elegant in pure Ruby.
|
25
|
+
email: rjo1970@gmail.com
|
26
|
+
executables: []
|
27
|
+
|
28
|
+
extensions: []
|
29
|
+
|
30
|
+
extra_rdoc_files: []
|
31
|
+
|
31
32
|
files:
|
32
33
|
- lib/nearline
|
33
|
-
- lib/nearline/
|
34
|
+
- lib/nearline/schema.rb
|
35
|
+
- lib/nearline/system.rb
|
36
|
+
- lib/nearline/module_methods.rb
|
37
|
+
- lib/nearline/file_sequencer.rb
|
34
38
|
- lib/nearline/block.rb
|
35
|
-
- lib/nearline/file_content.rb
|
36
39
|
- lib/nearline/log.rb
|
40
|
+
- lib/nearline/archived_file.rb
|
41
|
+
- lib/nearline/file_content.rb
|
37
42
|
- lib/nearline/manifest.rb
|
38
|
-
- lib/nearline/module_methods.rb
|
39
|
-
- lib/nearline/schema.rb
|
40
|
-
- lib/nearline/system.rb
|
41
43
|
- lib/nearline.rb
|
42
|
-
- tasks/clean.rake
|
43
44
|
- tasks/gemspec.rake
|
44
|
-
- tasks/
|
45
|
+
- tasks/clean.rake
|
45
46
|
- tasks/test.rake
|
46
|
-
|
47
|
-
|
47
|
+
- tasks/rcov.rake
|
48
|
+
has_rdoc: true
|
49
|
+
homepage: http://rubyforge.org/projects/nearline
|
50
|
+
post_install_message:
|
48
51
|
rdoc_options: []
|
49
52
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
53
|
+
require_paths:
|
54
|
+
- lib
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: "0"
|
60
|
+
version:
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: "0"
|
66
|
+
version:
|
56
67
|
requirements: []
|
57
68
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
- !ruby/object:Gem::Version
|
66
|
-
version: 2.0.2
|
67
|
-
version:
|
69
|
+
rubyforge_project: nearline
|
70
|
+
rubygems_version: 1.1.1
|
71
|
+
signing_key:
|
72
|
+
specification_version: 2
|
73
|
+
summary: Nearline is a near-line backup and recovery solution
|
74
|
+
test_files:
|
75
|
+
- test/nearline_test.rb
|