nearline 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/nearline.rb +3 -0
- data/lib/nearline/archived_file.rb +32 -95
- data/lib/nearline/block.rb +14 -29
- data/lib/nearline/file_content.rb +13 -32
- data/lib/nearline/file_sequencer.rb +130 -0
- data/lib/nearline/manifest.rb +201 -18
- data/lib/nearline/module_methods.rb +29 -12
- data/lib/nearline/schema.rb +2 -2
- data/lib/nearline/system.rb +7 -7
- data/tasks/gemspec.rake +1 -1
- metadata +57 -49
data/lib/nearline.rb
CHANGED
@@ -4,106 +4,43 @@ module Nearline
|
|
4
4
|
# Represents file metadata and possible related FileContent
|
5
5
|
# for a single file on a single system
|
6
6
|
class ArchivedFile < ActiveRecord::Base
|
7
|
+
require 'digest/sha1'
|
7
8
|
require 'fileutils'
|
8
9
|
|
9
10
|
belongs_to :file_content
|
10
11
|
belongs_to :system
|
11
12
|
has_and_belongs_to_many :manifests
|
12
13
|
|
13
|
-
|
14
|
-
def self.create_for(
|
15
|
-
|
16
|
-
|
17
|
-
# The path doesn't actually exist and fails a File.stat
|
14
|
+
|
15
|
+
def self.create_for(file_information)
|
16
|
+
# The path doesn't actually exist and fails a File.lstat
|
18
17
|
return nil if file_information.path_hash.nil?
|
19
|
-
|
20
|
-
# If we find an exising entry, use it
|
21
|
-
hash = manifest.system.archived_file_lookup_hash
|
22
|
-
hit = hash[file_information.path_hash]
|
23
|
-
|
24
|
-
unless hit.nil?
|
25
|
-
af = ArchivedFile.find(hit)
|
26
|
-
manifest.archived_files << af
|
27
|
-
return af
|
28
|
-
end
|
29
18
|
|
30
19
|
# We need to create a record for either a directory or file
|
31
20
|
archived_file = ArchivedFile.new(
|
32
21
|
file_information.archived_file_parameters
|
33
22
|
)
|
34
|
-
|
23
|
+
|
35
24
|
# Find a new directory
|
36
25
|
if (file_information.is_directory)
|
37
26
|
archived_file.save!
|
38
|
-
manifest.archived_files << archived_file
|
27
|
+
file_information.manifest.archived_files << archived_file
|
39
28
|
return archived_file
|
40
29
|
end
|
41
30
|
|
42
|
-
# Find a new file that needs persisted
|
31
|
+
# Find a new file that needs persisted
|
43
32
|
archived_file.file_content.file_size =
|
44
33
|
[file_information.stat.size].pack('Q').unpack('L').first # HACK for Windows
|
45
|
-
archived_file = archived_file.persist(manifest)
|
46
|
-
archived_file.
|
47
|
-
|
34
|
+
archived_file = archived_file.persist(file_information.manifest)
|
35
|
+
unless archived_file.nil? || archived_file.frozen?
|
36
|
+
archived_file.save!
|
37
|
+
file_information.manifest.archived_files << archived_file
|
38
|
+
end
|
48
39
|
archived_file
|
49
40
|
|
50
41
|
# TODO: Symbolic links, block devices, ...?
|
51
42
|
end
|
52
|
-
|
53
|
-
class FileInformation
|
54
|
-
attr_reader :path_hash, :stat, :is_directory, :archived_file_parameters
|
55
|
-
def initialize(file_path, manifest)
|
56
|
-
@manifest = manifest
|
57
|
-
@file_path = file_path
|
58
|
-
@stat = read_stat
|
59
|
-
@is_directory = File.directory?(file_path)
|
60
|
-
@path_hash = generate_path_hash
|
61
|
-
@archived_file_parameters = build_parameters
|
62
|
-
end
|
63
|
-
|
64
|
-
def read_stat
|
65
|
-
stat = nil
|
66
|
-
begin
|
67
|
-
stat = File.stat(@file_path)
|
68
|
-
rescue
|
69
|
-
@manifest.add_log("File not found on stat: #{@file_path}")
|
70
|
-
end
|
71
|
-
stat
|
72
|
-
end
|
73
|
-
|
74
|
-
def generate_path_hash
|
75
|
-
return nil if @stat.nil?
|
76
|
-
target = [@manifest.system.name,
|
77
|
-
@file_path,
|
78
|
-
@stat.uid,
|
79
|
-
@stat.gid,
|
80
|
-
@stat.mtime.to_i,
|
81
|
-
@stat.mode].join(':')
|
82
|
-
Digest::SHA1.hexdigest(target)
|
83
|
-
end
|
84
|
-
|
85
|
-
def file_content_entry_for_files_only
|
86
|
-
return FileContent.new unless @is_directory
|
87
|
-
return nil
|
88
|
-
end
|
89
|
-
|
90
|
-
def build_parameters
|
91
|
-
return nil if @stat.nil?
|
92
|
-
{
|
93
|
-
:system => @manifest.system,
|
94
|
-
:path => @file_path,
|
95
|
-
:path_hash => @path_hash,
|
96
|
-
:file_content => file_content_entry_for_files_only,
|
97
|
-
:uid => @stat.uid,
|
98
|
-
:gid => @stat.gid,
|
99
|
-
:mtime => @stat.mtime.to_i,
|
100
|
-
:mode => @stat.mode,
|
101
|
-
:is_directory => @is_directory
|
102
|
-
}
|
103
|
-
end
|
104
|
-
|
105
|
-
end
|
106
|
-
|
43
|
+
|
107
44
|
def restore(*args)
|
108
45
|
@options = args.extract_options!
|
109
46
|
if (self.is_directory)
|
@@ -158,20 +95,20 @@ module Nearline
|
|
158
95
|
# won't know that until we complete the process and have to
|
159
96
|
# clean up our mess.
|
160
97
|
def persist(manifest)
|
161
|
-
|
162
|
-
file_size = 0
|
98
|
+
seq = nil
|
163
99
|
begin
|
164
|
-
|
100
|
+
seq = read_file
|
165
101
|
rescue
|
166
|
-
|
102
|
+
error = "Got error '#{$!}' on path: #{self.path}"
|
103
|
+
manifest.add_log error
|
167
104
|
self.orphan_check
|
168
105
|
return nil
|
169
106
|
end
|
170
|
-
|
171
|
-
size_check(file_size, manifest)
|
107
|
+
|
108
|
+
size_check(seq.file_size, manifest)
|
172
109
|
|
173
110
|
# Do we have a unique sequence?
|
174
|
-
key =
|
111
|
+
key = seq.fingerprint
|
175
112
|
return self if unique_sequence_processed?(key, manifest)
|
176
113
|
|
177
114
|
# Handle the case where the sequence is not unique...
|
@@ -180,19 +117,14 @@ module Nearline
|
|
180
117
|
self
|
181
118
|
end
|
182
119
|
|
183
|
-
def
|
184
|
-
sequencer = FileSequencer.new(self.file_content)
|
185
|
-
file_size = 0
|
186
|
-
buffer = ""
|
120
|
+
def read_file
|
187
121
|
File.open(self.path, "rb") do |io|
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
whole_file_hash.update(buffer)
|
192
|
-
sequencer.preserve_content(buffer)
|
122
|
+
seq = FileSequencer.new(io, self.file_content)
|
123
|
+
while (!io.eof)
|
124
|
+
seq.persist_segment
|
193
125
|
end
|
126
|
+
return seq
|
194
127
|
end
|
195
|
-
return file_size
|
196
128
|
end
|
197
129
|
|
198
130
|
def size_check(file_size, manifest)
|
@@ -222,10 +154,15 @@ module Nearline
|
|
222
154
|
false
|
223
155
|
end
|
224
156
|
|
157
|
+
# In the special case of an identical sequence existing,
|
158
|
+
# we can safely delete all related sequences and then destroy
|
159
|
+
# the file content object without the (far slower) orphan checking
|
160
|
+
# process
|
225
161
|
def clean_up_duplicate_content
|
226
|
-
self.file_content.
|
162
|
+
Sequence.delete_all "file_content_id = #{self.file_content.id}"
|
163
|
+
self.file_content.destroy
|
227
164
|
end
|
228
|
-
|
165
|
+
|
229
166
|
def replace_content(key)
|
230
167
|
self.file_content = FileContent.find_by_fingerprint(key)
|
231
168
|
self.save!
|
data/lib/nearline/block.rb
CHANGED
@@ -12,12 +12,21 @@ module Nearline
|
|
12
12
|
|
13
13
|
has_many :sequences
|
14
14
|
|
15
|
-
|
15
|
+
# Maximum block size in bytes
|
16
|
+
@@max_block_size = (64 * 1024)-1
|
17
|
+
cattr_accessor :max_block_size
|
18
|
+
|
19
|
+
# Level of block compression attempted
|
20
|
+
# 0 = skip compression entirely
|
21
|
+
@@block_compression_level = 5
|
22
|
+
cattr_accessor :block_compression_level
|
16
23
|
|
17
24
|
def attempt_compression
|
18
|
-
return if (self.is_compressed)
|
19
|
-
|
20
|
-
|
25
|
+
return if (self.is_compressed || @@block_compression_level == 0)
|
26
|
+
candidate_content = Zlib::Deflate.deflate(
|
27
|
+
self.bulk_content,
|
28
|
+
@@block_compression_level
|
29
|
+
)
|
21
30
|
if candidate_content.length < self.bulk_content.length
|
22
31
|
self.is_compressed = true
|
23
32
|
self.bulk_content = candidate_content
|
@@ -37,31 +46,7 @@ module Nearline
|
|
37
46
|
end
|
38
47
|
@content = self.bulk_content
|
39
48
|
end
|
40
|
-
|
41
|
-
def self.id_for_content(x)
|
42
|
-
block = Block.new(:bulk_content => x)
|
43
|
-
block.calculate_fingerprint
|
44
|
-
hit = Block.connection.select_one(
|
45
|
-
"select id from blocks where fingerprint='#{block.fingerprint}'"
|
46
|
-
)
|
47
|
-
unless hit.nil?
|
48
|
-
return hit['id']
|
49
|
-
end
|
50
|
-
block.attempt_compression
|
51
|
-
block.save!
|
52
|
-
block.id
|
53
|
-
end
|
54
|
-
|
55
|
-
def self.for_content(x)
|
56
|
-
block = Models::Block.new(:bulk_content => x)
|
57
|
-
block.calculate_fingerprint
|
58
|
-
found = find_by_fingerprint(block.fingerprint)
|
59
|
-
return found if !found.nil?
|
60
|
-
block.attempt_compression
|
61
|
-
block.save!
|
62
|
-
block
|
63
|
-
end
|
64
|
-
|
49
|
+
|
65
50
|
def orphan_check
|
66
51
|
if self.sequences.size == 0
|
67
52
|
self.destroy
|
@@ -4,7 +4,7 @@ module Nearline
|
|
4
4
|
# Has the responsibility of identifying, restoring and
|
5
5
|
# verifying content
|
6
6
|
class FileContent < ActiveRecord::Base
|
7
|
-
has_many :sequences
|
7
|
+
has_many :sequences, :order => "sequence"
|
8
8
|
has_many :archived_files
|
9
9
|
|
10
10
|
def orphan_check
|
@@ -23,22 +23,25 @@ module Nearline
|
|
23
23
|
return hit.nil?
|
24
24
|
end
|
25
25
|
|
26
|
-
|
26
|
+
private
|
27
|
+
|
28
|
+
def each_sequence
|
27
29
|
sequences.each do |seq|
|
28
30
|
block = Block.find(seq.block_id)
|
29
|
-
|
31
|
+
yield block
|
30
32
|
end
|
31
33
|
end
|
34
|
+
|
35
|
+
public
|
36
|
+
|
37
|
+
def restore_to(io)
|
38
|
+
each_sequence { |block| io.write(block.content) }
|
39
|
+
end
|
32
40
|
|
33
41
|
def verified?
|
34
|
-
if (!self.verified_at.nil?)
|
35
|
-
return true
|
36
|
-
end
|
42
|
+
return true if (!self.verified_at.nil?)
|
37
43
|
whole_file_hash = Digest::SHA1.new
|
38
|
-
|
39
|
-
block = Block.find(seq.block_id)
|
40
|
-
whole_file_hash.update(block.content)
|
41
|
-
end
|
44
|
+
each_sequence { |block| whole_file_hash.update(block.content) }
|
42
45
|
if fingerprint == whole_file_hash.hexdigest
|
43
46
|
self.verified_at = Time.now
|
44
47
|
self.save!
|
@@ -47,7 +50,6 @@ module Nearline
|
|
47
50
|
false
|
48
51
|
end
|
49
52
|
|
50
|
-
|
51
53
|
end
|
52
54
|
|
53
55
|
# Has the responsibility of preserving
|
@@ -61,26 +63,5 @@ module Nearline
|
|
61
63
|
end
|
62
64
|
end
|
63
65
|
|
64
|
-
class FileSequencer
|
65
|
-
def initialize(file_content)
|
66
|
-
@inc = 0
|
67
|
-
@file_content = file_content
|
68
|
-
@file_content.save!
|
69
|
-
end
|
70
|
-
|
71
|
-
def preserve_content(content)
|
72
|
-
@inc += 1
|
73
|
-
block_id = Block.id_for_content(content)
|
74
|
-
sequence = Sequence.new(
|
75
|
-
:sequence => @inc,
|
76
|
-
:file_content_id => @file_content.id,
|
77
|
-
:block_id => block_id
|
78
|
-
)
|
79
|
-
sequence.save!
|
80
|
-
sequence
|
81
|
-
end
|
82
|
-
|
83
|
-
end
|
84
|
-
|
85
66
|
end
|
86
67
|
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
module Nearline
|
2
|
+
module Models
|
3
|
+
|
4
|
+
# Used for mass block entry and sequencing
|
5
|
+
class FileSequencer
|
6
|
+
attr_reader :file_size
|
7
|
+
|
8
|
+
# Number of blocks to serialize in a batch
|
9
|
+
@@max_blocks = 500;
|
10
|
+
cattr_accessor :max_blocks
|
11
|
+
|
12
|
+
def initialize(io, file_content)
|
13
|
+
@io = io
|
14
|
+
@file_content = file_content
|
15
|
+
if (@file_content.id.nil?)
|
16
|
+
@file_content.save!
|
17
|
+
end
|
18
|
+
@s = []
|
19
|
+
@b = []
|
20
|
+
@file_size = 0
|
21
|
+
@offset = 0
|
22
|
+
@whole_file_hash = Digest::SHA1.new
|
23
|
+
end
|
24
|
+
|
25
|
+
def fingerprint
|
26
|
+
@whole_file_hash.hexdigest
|
27
|
+
end
|
28
|
+
|
29
|
+
def persist_segment
|
30
|
+
pull_blocks
|
31
|
+
sequence_known_blocks
|
32
|
+
attempt_compression_of_remaining_blocks
|
33
|
+
insert_new_blocks
|
34
|
+
sequence_known_blocks
|
35
|
+
insert_sequences
|
36
|
+
clear_for_next_persist
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def clear_for_next_persist
|
42
|
+
@s = []
|
43
|
+
@b = []
|
44
|
+
@offset += @@max_blocks
|
45
|
+
end
|
46
|
+
|
47
|
+
def sequence_known_blocks
|
48
|
+
f = found_fingerprint_map
|
49
|
+
add_sequence_entries_clearing_blocks(f)
|
50
|
+
end
|
51
|
+
|
52
|
+
def found_fingerprint_map
|
53
|
+
f = {}
|
54
|
+
fp_raw = []
|
55
|
+
@b.each {|a| fp_raw << a.fingerprint unless a.nil?}
|
56
|
+
return f if fp_raw.size == 0
|
57
|
+
fingerprints = fp_raw.collect {|fp| "'#{fp}'"}.join(', ')
|
58
|
+
query = "select distinct id, fingerprint from blocks "+
|
59
|
+
"where fingerprint in (#{fingerprints})"
|
60
|
+
r = Nearline::Models::Block.connection.select_all(query)
|
61
|
+
r.each { |e| f[e["fingerprint"]] = e["id"] }
|
62
|
+
f
|
63
|
+
end
|
64
|
+
|
65
|
+
def add_sequence_entries_clearing_blocks(f)
|
66
|
+
@b.size.times do |i|
|
67
|
+
block = @b[i]
|
68
|
+
unless block.nil?
|
69
|
+
if f[block.fingerprint]
|
70
|
+
@s.push(Sequence.new(
|
71
|
+
:sequence => i + @offset + 1,
|
72
|
+
:block_id => f[block.fingerprint],
|
73
|
+
:file_content_id => @file_content.id
|
74
|
+
))
|
75
|
+
@b[i] = nil
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def insert_sequences
|
82
|
+
@s.each do |s|
|
83
|
+
s.save!
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def attempt_compression_of_remaining_blocks
|
88
|
+
f = {}
|
89
|
+
@b.each do |block|
|
90
|
+
unless block.nil? or f[block.fingerprint]
|
91
|
+
block.attempt_compression
|
92
|
+
f[block.fingerprint] = true
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def insert_new_blocks
|
98
|
+
f = {}
|
99
|
+
@b.each do |b|
|
100
|
+
unless b.nil? || f[b.fingerprint]
|
101
|
+
b.save!
|
102
|
+
f[b.fingerprint] = true
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def pull_blocks
|
108
|
+
count = 0
|
109
|
+
while (!@io.eof && count < @@max_blocks)
|
110
|
+
count += 1
|
111
|
+
|
112
|
+
# Move to Block
|
113
|
+
buffer = @io.read(Block.max_block_size)
|
114
|
+
|
115
|
+
@file_size += buffer.size
|
116
|
+
|
117
|
+
# Move to Block
|
118
|
+
blk = Block.new(:bulk_content => buffer)
|
119
|
+
@whole_file_hash.update(buffer)
|
120
|
+
|
121
|
+
# Move to Block
|
122
|
+
blk.calculate_fingerprint
|
123
|
+
@b << blk
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
130
|
+
end
|
data/lib/nearline/manifest.rb
CHANGED
@@ -5,10 +5,7 @@ module Nearline
|
|
5
5
|
class FileFinder
|
6
6
|
require 'find'
|
7
7
|
def self.recurse(paths, exclusions)
|
8
|
-
regex_exclusions =
|
9
|
-
for exclusion in exclusions
|
10
|
-
regex_exclusions << /#{exclusion}/
|
11
|
-
end
|
8
|
+
regex_exclusions = exclusion_regexes(exclusions)
|
12
9
|
paths.each do |path|
|
13
10
|
Find.find(path) do |f|
|
14
11
|
regex_exclusions.each do |ex|
|
@@ -18,6 +15,72 @@ module Nearline
|
|
18
15
|
end
|
19
16
|
end
|
20
17
|
end
|
18
|
+
|
19
|
+
def self.exclusion_regexes(exclusions)
|
20
|
+
regex_exclusions = []
|
21
|
+
for exclusion in exclusions
|
22
|
+
regex_exclusions << /#{exclusion}/
|
23
|
+
end
|
24
|
+
regex_exclusions
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Handles file paths and metadata for a file in a manifest
|
29
|
+
class FileInformation
|
30
|
+
attr_reader :path_hash, :stat, :is_directory,
|
31
|
+
:archived_file_parameters, :manifest, :file_path
|
32
|
+
|
33
|
+
def initialize(file_path, manifest)
|
34
|
+
@manifest = manifest
|
35
|
+
@file_path = file_path
|
36
|
+
@stat = read_stat
|
37
|
+
@is_directory = File.directory?(file_path)
|
38
|
+
@path_hash = generate_path_hash
|
39
|
+
@archived_file_parameters = build_parameters
|
40
|
+
end
|
41
|
+
|
42
|
+
def read_stat
|
43
|
+
stat = nil
|
44
|
+
begin
|
45
|
+
# TODO: change to lstat when we handle links
|
46
|
+
stat = File.stat(@file_path)
|
47
|
+
rescue
|
48
|
+
@manifest.add_log("File not found on stat: #{@file_path}")
|
49
|
+
end
|
50
|
+
stat
|
51
|
+
end
|
52
|
+
|
53
|
+
def generate_path_hash
|
54
|
+
return nil if @stat.nil?
|
55
|
+
target = [@manifest.system.name,
|
56
|
+
@file_path,
|
57
|
+
@stat.uid,
|
58
|
+
@stat.gid,
|
59
|
+
@stat.mtime.to_i,
|
60
|
+
@stat.mode].join(':')
|
61
|
+
Digest::SHA1.hexdigest(target)
|
62
|
+
end
|
63
|
+
|
64
|
+
def file_content_entry_for_files_only
|
65
|
+
return FileContent.new unless @is_directory
|
66
|
+
return nil
|
67
|
+
end
|
68
|
+
|
69
|
+
def build_parameters
|
70
|
+
return nil if @stat.nil?
|
71
|
+
{
|
72
|
+
:system => @manifest.system,
|
73
|
+
:path => @file_path,
|
74
|
+
:path_hash => @path_hash,
|
75
|
+
:file_content => file_content_entry_for_files_only,
|
76
|
+
:uid => @stat.uid,
|
77
|
+
:gid => @stat.gid,
|
78
|
+
:mtime => @stat.mtime.to_i,
|
79
|
+
:mode => @stat.mode,
|
80
|
+
:is_directory => @is_directory
|
81
|
+
}
|
82
|
+
end
|
83
|
+
|
21
84
|
end
|
22
85
|
|
23
86
|
# A Manifest represents the corpus of ArchivedFiles and
|
@@ -32,7 +95,11 @@ module Nearline
|
|
32
95
|
attr_accessor :backup_paths
|
33
96
|
# Just needed when you create a manifest
|
34
97
|
attr_accessor :backup_exclusions
|
35
|
-
|
98
|
+
|
99
|
+
# Maximum number of files to stat and process in a batch
|
100
|
+
@@max_files_cached = 10000
|
101
|
+
cattr_accessor :max_files_cached
|
102
|
+
|
36
103
|
def self.new_for_name(system_name)
|
37
104
|
system = System.for_name(system_name)
|
38
105
|
system.manifests << m = Nearline::Models::Manifest.new
|
@@ -43,10 +110,44 @@ module Nearline
|
|
43
110
|
def self.backup(system, backup_paths, backup_exclusions)
|
44
111
|
manifest = self.new(:system => system)
|
45
112
|
manifest.save!
|
113
|
+
manifest.backup(backup_paths, backup_exclusions)
|
114
|
+
end
|
115
|
+
|
116
|
+
def backup(backup_paths, backup_exclusions)
|
117
|
+
FileFinder.recurse(backup_paths, backup_exclusions) do |file_path|
|
118
|
+
handle_file_path(file_path)
|
119
|
+
end
|
120
|
+
finish_remaining_file_infos
|
46
121
|
|
47
|
-
|
48
|
-
|
49
|
-
|
122
|
+
self.completed_at = Time.now
|
123
|
+
self.save!
|
124
|
+
self
|
125
|
+
end
|
126
|
+
|
127
|
+
def handle_file_path(file_path)
|
128
|
+
@file_infos = @file_infos || []
|
129
|
+
@file_infos << FileInformation.new(file_path, self)
|
130
|
+
|
131
|
+
if @file_infos.size > @@max_files_cached
|
132
|
+
process_file_infos
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def finish_remaining_file_infos
|
137
|
+
process_file_infos
|
138
|
+
end
|
139
|
+
|
140
|
+
def process_file_infos
|
141
|
+
return if @file_infos.size == 0
|
142
|
+
|
143
|
+
lookup = existing_archived_file_lookup
|
144
|
+
@file_infos.each do |file_info|
|
145
|
+
$stdout.write file_info.file_path + " "
|
146
|
+
if (af = lookup[file_info.path_hash])
|
147
|
+
self.archived_files << af
|
148
|
+
else
|
149
|
+
af = ArchivedFile.create_for(file_info)
|
150
|
+
end
|
50
151
|
if (!af.nil?)
|
51
152
|
$stdout.write "#{Time.at(af.mtime).asctime}"
|
52
153
|
if (!af.file_content.nil?)
|
@@ -55,12 +156,18 @@ module Nearline
|
|
55
156
|
$stdout.write("\n")
|
56
157
|
end
|
57
158
|
end
|
58
|
-
|
59
|
-
manifest.completed_at = Time.now
|
60
|
-
manifest.save!
|
61
|
-
manifest
|
159
|
+
@file_infos = []
|
62
160
|
end
|
63
161
|
|
162
|
+
def existing_archived_file_lookup
|
163
|
+
return {} if @file_infos.size == 0
|
164
|
+
path_hashes = @file_infos.collect {|e| "'#{e.path_hash}'"}.join(", ")
|
165
|
+
conditions = "path_hash in (#{path_hashes})"
|
166
|
+
hits = ArchivedFile.find(:all, :conditions => conditions)
|
167
|
+
existing_files = {}
|
168
|
+
hits.each { |e| existing_files[e.path_hash] = e }
|
169
|
+
existing_files
|
170
|
+
end
|
64
171
|
|
65
172
|
# Find all Manifest entries (across all Systems) which have never finished.
|
66
173
|
#
|
@@ -73,17 +180,24 @@ module Nearline
|
|
73
180
|
|
74
181
|
def self.restore_all_missing(system, latest_date_time = Time.now)
|
75
182
|
manifest = system.latest_manifest_as_of(latest_date_time)
|
76
|
-
manifest.
|
183
|
+
manifest.iterate_all_missing do |af|
|
184
|
+
af.restore
|
185
|
+
end
|
77
186
|
end
|
78
187
|
|
79
|
-
|
80
|
-
|
188
|
+
def self.what_would_restore(system, latest_date_time = Time.now)
|
189
|
+
manifest = system.latest_manifest_as_of(latest_date_time)
|
190
|
+
manifest.iterate_all_missing {}
|
191
|
+
end
|
192
|
+
|
193
|
+
# Iterate all missing files from this manifest, yielding each
|
194
|
+
def iterate_all_missing
|
81
195
|
files_restored = []
|
82
196
|
self.archived_files.each do |af|
|
83
197
|
begin
|
84
198
|
File.stat(af.path)
|
85
199
|
rescue
|
86
|
-
af
|
200
|
+
yield af
|
87
201
|
files_restored << af.path
|
88
202
|
end
|
89
203
|
end
|
@@ -97,14 +211,83 @@ module Nearline
|
|
97
211
|
end
|
98
212
|
|
99
213
|
def before_destroy
|
100
|
-
|
214
|
+
destroy_archived_files_with_content
|
215
|
+
destroy_archived_files_without_content
|
216
|
+
destroy_archived_files_manifests
|
217
|
+
destroy_logs
|
218
|
+
self.destroy_without_habtm_shim_for_archived_files
|
219
|
+
end
|
220
|
+
|
221
|
+
private
|
222
|
+
|
223
|
+
def archived_file_content_query(op)
|
224
|
+
<<-END_SQL
|
225
|
+
select distinct fc.id
|
226
|
+
from archived_files af,
|
227
|
+
archived_files_manifests afm, file_contents fc
|
228
|
+
where
|
229
|
+
afm.manifest_id #{op} #{self.id} and
|
230
|
+
afm.archived_file_id = af.id and
|
231
|
+
af.file_content_id = fc.id
|
232
|
+
END_SQL
|
233
|
+
end
|
234
|
+
|
235
|
+
def destroy_archived_files_with_content
|
236
|
+
|
237
|
+
fc_in = self.connection.select_all(archived_file_content_query("=")).collect{|e| e["id"]}
|
238
|
+
fc_out = self.connection.select_all(archived_file_content_query("!=")).collect{|e| e["id"]}
|
239
|
+
fc_to_destroy = (fc_in - fc_out).join ", "
|
240
|
+
|
241
|
+
if (fc_to_destroy.size > 0)
|
242
|
+
af_to_destroy = self.connection.select_all(<<-END_QUERY
|
243
|
+
select af.id from archived_files af, archived_files_manifests afm
|
244
|
+
where afm.manifest_id=#{self.id} and afm.archived_file_id = af.id and
|
245
|
+
af.file_content_id in (#{fc_to_destroy})
|
246
|
+
END_QUERY
|
247
|
+
).collect{|e| e["id"]}
|
248
|
+
else
|
249
|
+
af_to_destroy = []
|
250
|
+
end
|
251
|
+
|
252
|
+
Nearline::Models::ArchivedFile.find(af_to_destroy).each do |af|
|
253
|
+
af.orphan_check
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
def archived_files_query(op)
|
258
|
+
<<-END_QUERY
|
259
|
+
select distinct af.id
|
260
|
+
from archived_files af,
|
261
|
+
archived_files_manifests afm
|
262
|
+
where af.file_content_id is null and
|
263
|
+
af.id = afm.archived_file_id and
|
264
|
+
afm.manifest_id #{op} #{self.id}
|
265
|
+
END_QUERY
|
266
|
+
end
|
267
|
+
|
268
|
+
def destroy_archived_files_without_content
|
269
|
+
af_in = self.connection.select_all(archived_files_query("=")).collect{|e| e["id"]}
|
270
|
+
af_out = self.connection.select_all(archived_files_query("!=")).collect{|e| e["id"]}
|
271
|
+
|
272
|
+
af_to_destroy = Nearline::Models::ArchivedFile.find(af_in - af_out)
|
273
|
+
af_to_destroy.each do |af|
|
101
274
|
af.orphan_check
|
102
275
|
end
|
276
|
+
end
|
277
|
+
|
278
|
+
|
279
|
+
def destroy_archived_files_manifests
|
280
|
+
self.connection.delete("delete from archived_files_manifests where manifest_id=#{self.id}")
|
281
|
+
end
|
282
|
+
|
283
|
+
def destroy_logs
|
103
284
|
logs.each do |log|
|
104
285
|
log.destroy
|
105
|
-
end
|
286
|
+
end
|
106
287
|
end
|
107
288
|
|
289
|
+
public
|
290
|
+
|
108
291
|
def total_size
|
109
292
|
size = 0
|
110
293
|
archived_files.each do |af|
|
@@ -1,8 +1,10 @@
|
|
1
1
|
module Nearline
|
2
2
|
module_function
|
3
3
|
|
4
|
-
#
|
5
|
-
VERSION = "0.0.
|
4
|
+
# Version of the software
|
5
|
+
VERSION = "0.0.5"
|
6
|
+
# Last version that changed the database structure
|
7
|
+
DB_VERSION = "0.0.4"
|
6
8
|
|
7
9
|
# Array of every Nearline Model using an ActiveRecord connection
|
8
10
|
AR_MODELS = Nearline::Models.constants.map do |m|
|
@@ -21,6 +23,8 @@ module Nearline
|
|
21
23
|
# Stomps on any ActiveRecord::Base.establish_connection you might
|
22
24
|
# have already established.
|
23
25
|
#
|
26
|
+
# ***NOTE: MYSQL is the only recommended database at this time.***
|
27
|
+
#
|
24
28
|
# === Examples
|
25
29
|
# Nearline.connect!({:adapter => 'sqlite3', :database => 'data/sqlite.db'})
|
26
30
|
#
|
@@ -49,6 +53,9 @@ module Nearline
|
|
49
53
|
#
|
50
54
|
# Accepts a Hash to establish the connection or
|
51
55
|
# a String referring to an entry in config/database.yml.
|
56
|
+
#
|
57
|
+
# ***NOTE: MYSQL is the only recommended database at this time.***
|
58
|
+
#
|
52
59
|
# === Examples
|
53
60
|
# Nearline.connect({:adapter => 'sqlite3', :database => 'data/sqlite.db'})
|
54
61
|
#
|
@@ -90,9 +97,7 @@ module Nearline
|
|
90
97
|
# Nearline.backup('my_laptop', ['/home/me', '/var/svn']
|
91
98
|
#
|
92
99
|
def backup(system_name, backup_paths,backup_exclusions= [])
|
93
|
-
|
94
|
-
raise SchemaVersionException.for_version(schema_version)
|
95
|
-
end
|
100
|
+
raise_failing_version_check
|
96
101
|
Nearline::Models::System.backup(
|
97
102
|
system_name,
|
98
103
|
Utilities.string_to_array(backup_paths),
|
@@ -120,13 +125,17 @@ module Nearline
|
|
120
125
|
#
|
121
126
|
# Returns an Array of paths restored
|
122
127
|
def restore(system_name, latest_date_time = Time.now)
|
123
|
-
|
124
|
-
raise SchemaVersionException.for_version(schema_version)
|
125
|
-
end
|
128
|
+
raise_failing_version_check
|
126
129
|
Nearline::Models::System.restore_all_missing(system_name, latest_date_time)
|
127
130
|
end
|
128
131
|
|
129
|
-
|
132
|
+
# Returns an array of paths that would be restored given the provided
|
133
|
+
# parameters
|
134
|
+
def what_would_restore(system_name, latest_date_time = Time.now)
|
135
|
+
raise_failing_version_check
|
136
|
+
Nearline::Models::System.what_would_restore(system_name, latest_date_time)
|
137
|
+
end
|
138
|
+
|
130
139
|
# Returns the nearline version of the database
|
131
140
|
def schema_version
|
132
141
|
begin
|
@@ -138,15 +147,23 @@ module Nearline
|
|
138
147
|
end
|
139
148
|
end
|
140
149
|
|
150
|
+
def raise_failing_version_check
|
151
|
+
unless version_check?
|
152
|
+
raise SchemaVersionException.for_version(schema_version)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
141
156
|
# Returns true only if the Nearline version matches the schema
|
142
157
|
def version_check?
|
143
|
-
Nearline::
|
158
|
+
Nearline::DB_VERSION == schema_version()
|
144
159
|
end
|
145
160
|
|
146
161
|
class SchemaVersionException < Exception
|
147
162
|
def self.for_version(v)
|
148
|
-
SchemaVersionException.new(
|
149
|
-
|
163
|
+
SchemaVersionException.new(<<-END_ERROR)
|
164
|
+
Schema #{v} is not the same version as nearline database
|
165
|
+
version #{Nearline::DB_VERSION} used in Nearline #{Nearline::VERSION}!
|
166
|
+
END_ERROR
|
150
167
|
end
|
151
168
|
end
|
152
169
|
|
data/lib/nearline/schema.rb
CHANGED
@@ -18,7 +18,7 @@ module Nearline
|
|
18
18
|
end
|
19
19
|
|
20
20
|
def empty_schema
|
21
|
-
Nearline::Models::
|
21
|
+
Nearline::Models::System.destroy_all
|
22
22
|
end
|
23
23
|
|
24
24
|
def generate_schema
|
@@ -99,7 +99,7 @@ module Nearline
|
|
99
99
|
t.column :version, :string
|
100
100
|
end
|
101
101
|
|
102
|
-
execute "insert into nearline_version (version) values ('#{Nearline::
|
102
|
+
execute "insert into nearline_version (version) values ('#{Nearline::DB_VERSION}')"
|
103
103
|
end
|
104
104
|
end
|
105
105
|
|
data/lib/nearline/system.rb
CHANGED
@@ -52,15 +52,15 @@ module Nearline
|
|
52
52
|
Manifest.restore_all_missing(self, latest_date_time)
|
53
53
|
end
|
54
54
|
|
55
|
-
def
|
56
|
-
|
57
|
-
|
58
|
-
for af in self.archived_files
|
59
|
-
@lookup_hash[af.path_hash] = af.id
|
60
|
-
end
|
61
|
-
@lookup_hash
|
55
|
+
def self.what_would_restore(system_name, latest_date_time)
|
56
|
+
system = self.for_name(system_name)
|
57
|
+
system.what_would_restore(latest_date_time)
|
62
58
|
end
|
63
59
|
|
60
|
+
def what_would_restore(latest_date_time = Time.now)
|
61
|
+
Manifest.what_would_restore(self, latest_date_time)
|
62
|
+
end
|
63
|
+
|
64
64
|
def before_destroy
|
65
65
|
for manifest in self.manifests
|
66
66
|
manifest.destroy
|
data/tasks/gemspec.rake
CHANGED
@@ -9,6 +9,7 @@ SPEC = Gem::Specification.new do |s|
|
|
9
9
|
s.version = Nearline::VERSION
|
10
10
|
s.author = "Robert J. Osborne"
|
11
11
|
s.email = "rjo1970@gmail.com"
|
12
|
+
s.homepage = "http://rubyforge.org/projects/nearline"
|
12
13
|
s.summary = "Nearline is a near-line backup and recovery solution"
|
13
14
|
s.description = %{
|
14
15
|
Nearline is a library to make managing near-line file repositories
|
@@ -18,7 +19,6 @@ SPEC = Gem::Specification.new do |s|
|
|
18
19
|
s.files = FileList["{tests,lib,doc,tasks}/**/*"].exclude("rdoc").to_a
|
19
20
|
s.add_dependency("activerecord", '>= 2.0.2')
|
20
21
|
s.require_path = "lib"
|
21
|
-
s.autorequire = "nearline"
|
22
22
|
s.test_file = "test/nearline_test.rb"
|
23
23
|
s.has_rdoc = true
|
24
24
|
end
|
metadata
CHANGED
@@ -1,67 +1,75 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.4
|
3
|
-
specification_version: 1
|
4
2
|
name: nearline
|
5
3
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.0.
|
7
|
-
date: 2008-04-18 00:00:00 -04:00
|
8
|
-
summary: Nearline is a near-line backup and recovery solution
|
9
|
-
require_paths:
|
10
|
-
- lib
|
11
|
-
email: rjo1970@gmail.com
|
12
|
-
homepage:
|
13
|
-
rubyforge_project: nearline
|
14
|
-
description: Nearline is a library to make managing near-line file repositories simple and elegant in pure Ruby.
|
15
|
-
autorequire: nearline
|
16
|
-
default_executable:
|
17
|
-
bindir: bin
|
18
|
-
has_rdoc: true
|
19
|
-
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
-
requirements:
|
21
|
-
- - ">"
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: 0.0.0
|
24
|
-
version:
|
4
|
+
version: 0.0.5
|
25
5
|
platform: ruby
|
26
|
-
signing_key:
|
27
|
-
cert_chain:
|
28
|
-
post_install_message:
|
29
6
|
authors:
|
30
7
|
- Robert J. Osborne
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-05-22 00:00:00 -04:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: activerecord
|
17
|
+
version_requirement:
|
18
|
+
version_requirements: !ruby/object:Gem::Requirement
|
19
|
+
requirements:
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 2.0.2
|
23
|
+
version:
|
24
|
+
description: Nearline is a library to make managing near-line file repositories simple and elegant in pure Ruby.
|
25
|
+
email: rjo1970@gmail.com
|
26
|
+
executables: []
|
27
|
+
|
28
|
+
extensions: []
|
29
|
+
|
30
|
+
extra_rdoc_files: []
|
31
|
+
|
31
32
|
files:
|
32
33
|
- lib/nearline
|
33
|
-
- lib/nearline/
|
34
|
+
- lib/nearline/schema.rb
|
35
|
+
- lib/nearline/system.rb
|
36
|
+
- lib/nearline/module_methods.rb
|
37
|
+
- lib/nearline/file_sequencer.rb
|
34
38
|
- lib/nearline/block.rb
|
35
|
-
- lib/nearline/file_content.rb
|
36
39
|
- lib/nearline/log.rb
|
40
|
+
- lib/nearline/archived_file.rb
|
41
|
+
- lib/nearline/file_content.rb
|
37
42
|
- lib/nearline/manifest.rb
|
38
|
-
- lib/nearline/module_methods.rb
|
39
|
-
- lib/nearline/schema.rb
|
40
|
-
- lib/nearline/system.rb
|
41
43
|
- lib/nearline.rb
|
42
|
-
- tasks/clean.rake
|
43
44
|
- tasks/gemspec.rake
|
44
|
-
- tasks/
|
45
|
+
- tasks/clean.rake
|
45
46
|
- tasks/test.rake
|
46
|
-
|
47
|
-
|
47
|
+
- tasks/rcov.rake
|
48
|
+
has_rdoc: true
|
49
|
+
homepage: http://rubyforge.org/projects/nearline
|
50
|
+
post_install_message:
|
48
51
|
rdoc_options: []
|
49
52
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
53
|
+
require_paths:
|
54
|
+
- lib
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: "0"
|
60
|
+
version:
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: "0"
|
66
|
+
version:
|
56
67
|
requirements: []
|
57
68
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
- !ruby/object:Gem::Version
|
66
|
-
version: 2.0.2
|
67
|
-
version:
|
69
|
+
rubyforge_project: nearline
|
70
|
+
rubygems_version: 1.1.1
|
71
|
+
signing_key:
|
72
|
+
specification_version: 2
|
73
|
+
summary: Nearline is a near-line backup and recovery solution
|
74
|
+
test_files:
|
75
|
+
- test/nearline_test.rb
|