nearline 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/nearline/archived_file.rb +224 -223
- data/lib/nearline/block.rb +60 -55
- data/lib/nearline/file_content.rb +87 -85
- data/lib/nearline/schema.rb +93 -90
- data/tasks/gemspec.rake +1 -1
- metadata +2 -2
@@ -1,224 +1,225 @@
|
|
1
|
-
module Nearline
|
2
|
-
module Models
|
3
|
-
|
4
|
-
# Represents file metadata and possible related FileContent
|
5
|
-
# for a single file on a single system
|
6
|
-
class ArchivedFile < ActiveRecord::Base
|
7
|
-
belongs_to :file_content
|
8
|
-
has_and_belongs_to_many :manifests
|
9
|
-
|
10
|
-
def self.create_for(system_name, file_path, manifest)
|
11
|
-
|
12
|
-
file_information = FileInformation.new(system_name, file_path, manifest)
|
13
|
-
|
14
|
-
# The path doesn't actually exist and fails a File.stat
|
15
|
-
return nil if file_information.path_hash.nil?
|
16
|
-
|
17
|
-
# If we find an exising entry, use it
|
18
|
-
hit = self.find_by_path_hash(file_information.path_hash)
|
19
|
-
return hit unless hit.nil?
|
20
|
-
|
21
|
-
# We need to create a record for either a directory or file
|
22
|
-
archived_file = ArchivedFile.new(
|
23
|
-
file_information.archived_file_parameters
|
24
|
-
)
|
25
|
-
|
26
|
-
# Find a new directory
|
27
|
-
if (file_information.is_directory)
|
28
|
-
archived_file.save!
|
29
|
-
return archived_file
|
30
|
-
end
|
31
|
-
|
32
|
-
# Find a new file that needs persisted
|
33
|
-
archived_file.file_content.file_size =
|
34
|
-
[file_information.stat.size].pack('Q').unpack('L').first # HACK for Windows
|
35
|
-
archived_file.persist(manifest)
|
36
|
-
archived_file.save!
|
37
|
-
archived_file
|
38
|
-
|
39
|
-
# TODO: Symbolic links, block devices, ...?
|
40
|
-
end
|
41
|
-
|
42
|
-
class FileInformation
|
43
|
-
attr_reader :path_hash, :stat, :is_directory, :archived_file_parameters
|
44
|
-
def initialize(system_name, file_path, manifest)
|
45
|
-
@manifest = manifest
|
46
|
-
@stat = read_stat(file_path)
|
47
|
-
@is_directory = File.directory?(file_path)
|
48
|
-
@path_hash = generate_path_hash(system_name, file_path)
|
49
|
-
@archived_file_parameters = build_parameters(system_name, file_path)
|
50
|
-
end
|
51
|
-
|
52
|
-
def read_stat(file_path)
|
53
|
-
stat = nil
|
54
|
-
begin
|
55
|
-
stat = File.stat(file_path)
|
56
|
-
rescue
|
57
|
-
@manifest.add_log("File not found on stat: #{file_path}")
|
58
|
-
end
|
59
|
-
stat
|
60
|
-
end
|
61
|
-
|
62
|
-
def generate_path_hash(system_name, file_path)
|
63
|
-
return nil if @stat.nil?
|
64
|
-
target = [system_name,
|
65
|
-
file_path,
|
66
|
-
@stat.uid,
|
67
|
-
@stat.gid,
|
68
|
-
@stat.mtime.to_i,
|
69
|
-
@stat.mode].join(':')
|
70
|
-
Digest::SHA1.hexdigest(target)
|
71
|
-
end
|
72
|
-
|
73
|
-
def file_content_entry_for_files_only
|
74
|
-
return FileContent.fresh_entry unless @is_directory
|
75
|
-
return nil
|
76
|
-
end
|
77
|
-
|
78
|
-
def build_parameters(system_name, file_path)
|
79
|
-
return nil if @stat.nil?
|
80
|
-
{
|
81
|
-
:system_name => system_name,
|
82
|
-
:path => file_path,
|
83
|
-
:path_hash => @path_hash,
|
84
|
-
:file_content => file_content_entry_for_files_only,
|
85
|
-
:uid => @stat.uid,
|
86
|
-
:gid => @stat.gid,
|
87
|
-
:mtime => @stat.mtime.to_i,
|
88
|
-
:mode => @stat.mode,
|
89
|
-
:is_directory => @is_directory
|
90
|
-
}
|
91
|
-
end
|
92
|
-
|
93
|
-
end
|
94
|
-
|
95
|
-
def restore(*args)
|
96
|
-
@options = args.extract_options!
|
97
|
-
if (self.is_directory)
|
98
|
-
FileUtils.mkdir_p option_override(:path)
|
99
|
-
restore_metadata
|
100
|
-
return
|
101
|
-
end
|
102
|
-
target_path = File.dirname(option_override(:path))
|
103
|
-
if (!File.exist? target_path)
|
104
|
-
FileUtils.mkdir_p target_path
|
105
|
-
end
|
106
|
-
f = File.open(option_override(:path), "wb")
|
107
|
-
self.file_content.restore_to(f)
|
108
|
-
f.close
|
109
|
-
restore_metadata
|
110
|
-
return
|
111
|
-
end
|
112
|
-
|
113
|
-
def option_override(key)
|
114
|
-
if (@options.has_key?(key))
|
115
|
-
return @options[key]
|
116
|
-
end
|
117
|
-
return self.send(key.to_s)
|
118
|
-
end
|
119
|
-
|
120
|
-
def restore_metadata
|
121
|
-
path = option_override(:path)
|
122
|
-
mtime = option_override(:mtime)
|
123
|
-
uid = option_override(:uid)
|
124
|
-
gid = option_override(:gid)
|
125
|
-
mode = option_override(:mode)
|
126
|
-
File.utime(0,Time.at(mtime),path)
|
127
|
-
File.chown(uid, gid, path)
|
128
|
-
File.chmod(mode, path)
|
129
|
-
end
|
130
|
-
|
131
|
-
def before_destroy
|
132
|
-
self.file_content.orphan_check if !self.file_content.nil?
|
133
|
-
end
|
134
|
-
|
135
|
-
def orphan_check
|
136
|
-
if self.manifests.size == 1
|
137
|
-
self.destroy
|
138
|
-
end
|
139
|
-
end
|
140
|
-
|
141
|
-
# Actually persist the file to the repository
|
142
|
-
# It has already been determined that a new ArchivedFile record is
|
143
|
-
# necessary and the file requires persisting
|
144
|
-
#
|
145
|
-
# But, the content may be identical to something else, and we
|
146
|
-
# won't know that until we complete the process and have to
|
147
|
-
# clean up our mess.
|
148
|
-
def persist(manifest)
|
149
|
-
whole_file_hash = Digest::SHA1.new
|
150
|
-
file_size = 0
|
151
|
-
begin
|
152
|
-
file_size = read_file_counting_bytes(whole_file_hash)
|
153
|
-
rescue
|
154
|
-
manifest.add_log "Got error '#{$!}' on path: #{self.path}"
|
155
|
-
self.orphan_check
|
156
|
-
return nil
|
157
|
-
end
|
158
|
-
|
159
|
-
size_check(file_size, manifest)
|
160
|
-
|
161
|
-
# Do we have a unique sequence?
|
162
|
-
key = whole_file_hash.hexdigest
|
163
|
-
return self if unique_sequence_processed?(key, manifest)
|
164
|
-
|
165
|
-
# Handle the case where the sequence is not unique...
|
166
|
-
clean_up_duplicate_content
|
167
|
-
replace_content(key)
|
168
|
-
self
|
169
|
-
end
|
170
|
-
|
171
|
-
def read_file_counting_bytes(whole_file_hash)
|
172
|
-
sequencer = FileSequencer.new(self.file_content)
|
173
|
-
file_size = 0
|
174
|
-
buffer = ""
|
175
|
-
File.open(self.path, "rb") do |io|
|
176
|
-
while (!io.eof) do
|
177
|
-
io.read(Block::MAX_SIZE, buffer)
|
178
|
-
file_size += buffer.size
|
179
|
-
whole_file_hash.update(buffer)
|
180
|
-
block = Block.for_content(buffer)
|
181
|
-
sequencer.preserve_block(block)
|
182
|
-
end
|
183
|
-
end
|
184
|
-
return file_size
|
185
|
-
end
|
186
|
-
|
187
|
-
def size_check(file_size, manifest)
|
188
|
-
if file_size != self.file_content.file_size
|
189
|
-
manifest.add_log "recorded file length #{file_size} " +
|
190
|
-
"does not match #{self.file_content.file_size} " +
|
191
|
-
"reported by the file system on path: #{self.path}"
|
192
|
-
end
|
193
|
-
end
|
194
|
-
|
195
|
-
def verify_content(manifest)
|
196
|
-
unless (self.file_content.verified?)
|
197
|
-
manifest.add_log "failed verification on path: #{self.path}"
|
198
|
-
end
|
199
|
-
end
|
200
|
-
|
201
|
-
def unique_sequence_processed?(key,manifest)
|
202
|
-
if self.file_content.unique_fingerprint?(key)
|
203
|
-
self.file_content.fingerprint = key
|
204
|
-
self.file_content.save!
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
self.file_content.
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
self.
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
1
|
+
module Nearline
|
2
|
+
module Models
|
3
|
+
|
4
|
+
# Represents file metadata and possible related FileContent
|
5
|
+
# for a single file on a single system
|
6
|
+
class ArchivedFile < ActiveRecord::Base
|
7
|
+
belongs_to :file_content
|
8
|
+
has_and_belongs_to_many :manifests
|
9
|
+
|
10
|
+
def self.create_for(system_name, file_path, manifest)
|
11
|
+
|
12
|
+
file_information = FileInformation.new(system_name, file_path, manifest)
|
13
|
+
|
14
|
+
# The path doesn't actually exist and fails a File.stat
|
15
|
+
return nil if file_information.path_hash.nil?
|
16
|
+
|
17
|
+
# If we find an exising entry, use it
|
18
|
+
hit = self.find_by_path_hash(file_information.path_hash)
|
19
|
+
return hit unless hit.nil?
|
20
|
+
|
21
|
+
# We need to create a record for either a directory or file
|
22
|
+
archived_file = ArchivedFile.new(
|
23
|
+
file_information.archived_file_parameters
|
24
|
+
)
|
25
|
+
|
26
|
+
# Find a new directory
|
27
|
+
if (file_information.is_directory)
|
28
|
+
archived_file.save!
|
29
|
+
return archived_file
|
30
|
+
end
|
31
|
+
|
32
|
+
# Find a new file that needs persisted
|
33
|
+
archived_file.file_content.file_size =
|
34
|
+
[file_information.stat.size].pack('Q').unpack('L').first # HACK for Windows
|
35
|
+
archived_file.persist(manifest)
|
36
|
+
archived_file.save!
|
37
|
+
archived_file
|
38
|
+
|
39
|
+
# TODO: Symbolic links, block devices, ...?
|
40
|
+
end
|
41
|
+
|
42
|
+
class FileInformation
|
43
|
+
attr_reader :path_hash, :stat, :is_directory, :archived_file_parameters
|
44
|
+
def initialize(system_name, file_path, manifest)
|
45
|
+
@manifest = manifest
|
46
|
+
@stat = read_stat(file_path)
|
47
|
+
@is_directory = File.directory?(file_path)
|
48
|
+
@path_hash = generate_path_hash(system_name, file_path)
|
49
|
+
@archived_file_parameters = build_parameters(system_name, file_path)
|
50
|
+
end
|
51
|
+
|
52
|
+
def read_stat(file_path)
|
53
|
+
stat = nil
|
54
|
+
begin
|
55
|
+
stat = File.stat(file_path)
|
56
|
+
rescue
|
57
|
+
@manifest.add_log("File not found on stat: #{file_path}")
|
58
|
+
end
|
59
|
+
stat
|
60
|
+
end
|
61
|
+
|
62
|
+
def generate_path_hash(system_name, file_path)
|
63
|
+
return nil if @stat.nil?
|
64
|
+
target = [system_name,
|
65
|
+
file_path,
|
66
|
+
@stat.uid,
|
67
|
+
@stat.gid,
|
68
|
+
@stat.mtime.to_i,
|
69
|
+
@stat.mode].join(':')
|
70
|
+
Digest::SHA1.hexdigest(target)
|
71
|
+
end
|
72
|
+
|
73
|
+
def file_content_entry_for_files_only
|
74
|
+
return FileContent.fresh_entry unless @is_directory
|
75
|
+
return nil
|
76
|
+
end
|
77
|
+
|
78
|
+
def build_parameters(system_name, file_path)
|
79
|
+
return nil if @stat.nil?
|
80
|
+
{
|
81
|
+
:system_name => system_name,
|
82
|
+
:path => file_path,
|
83
|
+
:path_hash => @path_hash,
|
84
|
+
:file_content => file_content_entry_for_files_only,
|
85
|
+
:uid => @stat.uid,
|
86
|
+
:gid => @stat.gid,
|
87
|
+
:mtime => @stat.mtime.to_i,
|
88
|
+
:mode => @stat.mode,
|
89
|
+
:is_directory => @is_directory
|
90
|
+
}
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
def restore(*args)
|
96
|
+
@options = args.extract_options!
|
97
|
+
if (self.is_directory)
|
98
|
+
FileUtils.mkdir_p option_override(:path)
|
99
|
+
restore_metadata
|
100
|
+
return
|
101
|
+
end
|
102
|
+
target_path = File.dirname(option_override(:path))
|
103
|
+
if (!File.exist? target_path)
|
104
|
+
FileUtils.mkdir_p target_path
|
105
|
+
end
|
106
|
+
f = File.open(option_override(:path), "wb")
|
107
|
+
self.file_content.restore_to(f)
|
108
|
+
f.close
|
109
|
+
restore_metadata
|
110
|
+
return
|
111
|
+
end
|
112
|
+
|
113
|
+
def option_override(key)
|
114
|
+
if (@options.has_key?(key))
|
115
|
+
return @options[key]
|
116
|
+
end
|
117
|
+
return self.send(key.to_s)
|
118
|
+
end
|
119
|
+
|
120
|
+
def restore_metadata
|
121
|
+
path = option_override(:path)
|
122
|
+
mtime = option_override(:mtime)
|
123
|
+
uid = option_override(:uid)
|
124
|
+
gid = option_override(:gid)
|
125
|
+
mode = option_override(:mode)
|
126
|
+
File.utime(0,Time.at(mtime),path)
|
127
|
+
File.chown(uid, gid, path)
|
128
|
+
File.chmod(mode, path)
|
129
|
+
end
|
130
|
+
|
131
|
+
def before_destroy
|
132
|
+
self.file_content.orphan_check if !self.file_content.nil?
|
133
|
+
end
|
134
|
+
|
135
|
+
def orphan_check
|
136
|
+
if self.manifests.size == 1
|
137
|
+
self.destroy
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# Actually persist the file to the repository
|
142
|
+
# It has already been determined that a new ArchivedFile record is
|
143
|
+
# necessary and the file requires persisting
|
144
|
+
#
|
145
|
+
# But, the content may be identical to something else, and we
|
146
|
+
# won't know that until we complete the process and have to
|
147
|
+
# clean up our mess.
|
148
|
+
def persist(manifest)
|
149
|
+
whole_file_hash = Digest::SHA1.new
|
150
|
+
file_size = 0
|
151
|
+
begin
|
152
|
+
file_size = read_file_counting_bytes(whole_file_hash)
|
153
|
+
rescue
|
154
|
+
manifest.add_log "Got error '#{$!}' on path: #{self.path}"
|
155
|
+
self.orphan_check
|
156
|
+
return nil
|
157
|
+
end
|
158
|
+
|
159
|
+
size_check(file_size, manifest)
|
160
|
+
|
161
|
+
# Do we have a unique sequence?
|
162
|
+
key = whole_file_hash.hexdigest
|
163
|
+
return self if unique_sequence_processed?(key, manifest)
|
164
|
+
|
165
|
+
# Handle the case where the sequence is not unique...
|
166
|
+
clean_up_duplicate_content
|
167
|
+
replace_content(key)
|
168
|
+
self
|
169
|
+
end
|
170
|
+
|
171
|
+
def read_file_counting_bytes(whole_file_hash)
|
172
|
+
sequencer = FileSequencer.new(self.file_content)
|
173
|
+
file_size = 0
|
174
|
+
buffer = ""
|
175
|
+
File.open(self.path, "rb") do |io|
|
176
|
+
while (!io.eof) do
|
177
|
+
io.read(Block::MAX_SIZE, buffer)
|
178
|
+
file_size += buffer.size
|
179
|
+
whole_file_hash.update(buffer)
|
180
|
+
block = Block.for_content(buffer)
|
181
|
+
sequencer.preserve_block(block)
|
182
|
+
end
|
183
|
+
end
|
184
|
+
return file_size
|
185
|
+
end
|
186
|
+
|
187
|
+
def size_check(file_size, manifest)
|
188
|
+
if file_size != self.file_content.file_size
|
189
|
+
manifest.add_log "recorded file length #{file_size} " +
|
190
|
+
"does not match #{self.file_content.file_size} " +
|
191
|
+
"reported by the file system on path: #{self.path}"
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def verify_content(manifest)
|
196
|
+
unless (self.file_content.verified?)
|
197
|
+
manifest.add_log "failed verification on path: #{self.path}"
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
def unique_sequence_processed?(key,manifest)
|
202
|
+
if self.file_content.unique_fingerprint?(key)
|
203
|
+
self.file_content.fingerprint = key
|
204
|
+
self.file_content.save!
|
205
|
+
self.save!
|
206
|
+
verify_content(manifest)
|
207
|
+
return true
|
208
|
+
end
|
209
|
+
false
|
210
|
+
end
|
211
|
+
|
212
|
+
def clean_up_duplicate_content
|
213
|
+
Sequence.delete_all("file_content_id=#{self.file_content.id}")
|
214
|
+
self.file_content.orphan_check
|
215
|
+
end
|
216
|
+
|
217
|
+
def replace_content(key)
|
218
|
+
self.file_content = FileContent.find_by_fingerprint(key)
|
219
|
+
self.save!
|
220
|
+
end
|
221
|
+
|
222
|
+
end
|
223
|
+
|
224
|
+
end
|
224
225
|
end
|
data/lib/nearline/block.rb
CHANGED
@@ -1,56 +1,61 @@
|
|
1
|
-
require 'active_record'
|
2
|
-
|
3
|
-
module Nearline
|
4
|
-
module Models
|
5
|
-
|
6
|
-
# Represents a unit of file content which may be
|
7
|
-
# freely shared across the repository
|
8
|
-
# Its sole responsibility is to preserve and provide
|
9
|
-
# content access
|
10
|
-
class Block < ActiveRecord::Base
|
11
|
-
require "zlib"
|
12
|
-
|
13
|
-
has_many :sequences
|
14
|
-
|
15
|
-
MAX_SIZE = (64 * 1024)-1
|
16
|
-
|
17
|
-
def attempt_compression
|
18
|
-
return if (self.is_compressed)
|
19
|
-
# TODO: Have a bump-the-compression option, here?
|
20
|
-
candidate_content = Zlib::Deflate.deflate(self.bulk_content)
|
21
|
-
if candidate_content.length < self.bulk_content.length
|
22
|
-
self.is_compressed = true
|
23
|
-
self.bulk_content = candidate_content
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
def calculate_fingerprint
|
28
|
-
self.fingerprint = Digest::SHA1.hexdigest(content)
|
29
|
-
end
|
30
|
-
|
31
|
-
def content
|
32
|
-
if (self.is_compressed)
|
33
|
-
return Zlib::Inflate.inflate(self.bulk_content)
|
34
|
-
end
|
35
|
-
self.bulk_content
|
36
|
-
end
|
37
|
-
|
38
|
-
def self.for_content(x)
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
block.
|
45
|
-
block
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
1
|
+
require 'active_record'
|
2
|
+
|
3
|
+
module Nearline
|
4
|
+
module Models
|
5
|
+
|
6
|
+
# Represents a unit of file content which may be
|
7
|
+
# freely shared across the repository
|
8
|
+
# Its sole responsibility is to preserve and provide
|
9
|
+
# content access
|
10
|
+
class Block < ActiveRecord::Base
|
11
|
+
require "zlib"
|
12
|
+
|
13
|
+
has_many :sequences
|
14
|
+
|
15
|
+
MAX_SIZE = (64 * 1024)-1
|
16
|
+
|
17
|
+
def attempt_compression
|
18
|
+
return if (self.is_compressed)
|
19
|
+
# TODO: Have a bump-the-compression option, here?
|
20
|
+
candidate_content = Zlib::Deflate.deflate(self.bulk_content)
|
21
|
+
if candidate_content.length < self.bulk_content.length
|
22
|
+
self.is_compressed = true
|
23
|
+
self.bulk_content = candidate_content
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def calculate_fingerprint
|
28
|
+
self.fingerprint = Digest::SHA1.hexdigest(content)
|
29
|
+
end
|
30
|
+
|
31
|
+
def content
|
32
|
+
if (self.is_compressed)
|
33
|
+
return Zlib::Inflate.inflate(self.bulk_content)
|
34
|
+
end
|
35
|
+
self.bulk_content
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.for_content(x, old_block = nil)
|
39
|
+
unless old_block.nil?
|
40
|
+
if x == old_block.content
|
41
|
+
return old_block
|
42
|
+
end
|
43
|
+
end
|
44
|
+
block = Models::Block.new(:bulk_content => x)
|
45
|
+
block.calculate_fingerprint
|
46
|
+
found = find_by_fingerprint(block.fingerprint)
|
47
|
+
return found if !found.nil?
|
48
|
+
block.attempt_compression
|
49
|
+
block.save!
|
50
|
+
block
|
51
|
+
end
|
52
|
+
|
53
|
+
def orphan_check
|
54
|
+
if self.sequences.size == 0
|
55
|
+
self.destroy
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
56
61
|
end
|
@@ -1,85 +1,87 @@
|
|
1
|
-
module Nearline
|
2
|
-
module Models
|
3
|
-
|
4
|
-
# Has the responsibility of identifying and
|
5
|
-
# verifying content
|
6
|
-
class FileContent < ActiveRecord::Base
|
7
|
-
has_many :sequences
|
8
|
-
has_many :archived_files
|
9
|
-
|
10
|
-
def self.fresh_entry
|
11
|
-
file_content = FileContent.new
|
12
|
-
file_content.save!
|
13
|
-
file_content
|
14
|
-
end
|
15
|
-
|
16
|
-
def
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
end
|
25
|
-
|
26
|
-
def
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
@file_content
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
1
|
+
module Nearline
|
2
|
+
module Models
|
3
|
+
|
4
|
+
# Has the responsibility of identifying and
|
5
|
+
# verifying content
|
6
|
+
class FileContent < ActiveRecord::Base
|
7
|
+
has_many :sequences
|
8
|
+
has_many :archived_files
|
9
|
+
|
10
|
+
def self.fresh_entry
|
11
|
+
file_content = FileContent.new
|
12
|
+
file_content.save!
|
13
|
+
file_content
|
14
|
+
end
|
15
|
+
|
16
|
+
def orphan_check
|
17
|
+
if (self.archived_files.size == 1)
|
18
|
+
sequences.each do |s|
|
19
|
+
s.destroy
|
20
|
+
s.block.orphan_check
|
21
|
+
end
|
22
|
+
self.destroy
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def unique_fingerprint?(key)
|
27
|
+
hit = FileContent.connection.select_one(
|
28
|
+
"select id from file_contents where fingerprint='#{key}'"
|
29
|
+
)
|
30
|
+
return hit.nil?
|
31
|
+
end
|
32
|
+
|
33
|
+
def restore_to(io)
|
34
|
+
sequences.each do |seq|
|
35
|
+
block = Block.find(seq.block_id)
|
36
|
+
io.write(block.content)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def verified?
|
41
|
+
if (!self.verified_at.nil?)
|
42
|
+
return true
|
43
|
+
end
|
44
|
+
whole_file_hash = Digest::SHA1.new
|
45
|
+
sequences.each do |seq|
|
46
|
+
block = Block.find(seq.block_id)
|
47
|
+
whole_file_hash.update(block.content)
|
48
|
+
end
|
49
|
+
if fingerprint == whole_file_hash.hexdigest
|
50
|
+
self.verified_at = Time.now
|
51
|
+
self.save!
|
52
|
+
return true
|
53
|
+
end
|
54
|
+
false
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
# Has the responsibility of preserving
|
61
|
+
# cardinality of stored blocks
|
62
|
+
class Sequence < ActiveRecord::Base
|
63
|
+
belongs_to :block
|
64
|
+
belongs_to :file_content
|
65
|
+
end
|
66
|
+
|
67
|
+
class FileSequencer
|
68
|
+
def initialize(file_content)
|
69
|
+
@inc = 0
|
70
|
+
@file_content = file_content
|
71
|
+
end
|
72
|
+
|
73
|
+
def preserve_block(block)
|
74
|
+
@inc += 1
|
75
|
+
sequence = Sequence.new(
|
76
|
+
:sequence => @inc,
|
77
|
+
:file_content_id => @file_content.id,
|
78
|
+
:block_id => block.id
|
79
|
+
)
|
80
|
+
sequence.save!
|
81
|
+
sequence
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
end
|
data/lib/nearline/schema.rb
CHANGED
@@ -1,90 +1,93 @@
|
|
1
|
-
module Nearline
|
2
|
-
module Models
|
3
|
-
|
4
|
-
module_function
|
5
|
-
|
6
|
-
def destroy_schema
|
7
|
-
ActiveRecord::Schema.define do
|
8
|
-
drop_table :blocks
|
9
|
-
drop_table :file_contents
|
10
|
-
drop_table :sequences
|
11
|
-
drop_table :archived_files
|
12
|
-
drop_table :manifests
|
13
|
-
drop_table :archived_files_manifests
|
14
|
-
drop_table :logs
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def empty_schema
|
19
|
-
Nearline::Models::Manifest.destroy_all
|
20
|
-
end
|
21
|
-
|
22
|
-
def generate_schema
|
23
|
-
ActiveRecord::Schema.define do
|
24
|
-
|
25
|
-
create_table :blocks do |t|
|
26
|
-
t.column :fingerprint, :string, :length => 40, :null => false
|
27
|
-
t.column :bulk_content, :binary
|
28
|
-
t.column :is_compressed, :boolean, :default => false
|
29
|
-
end
|
30
|
-
|
31
|
-
add_index :blocks, [:fingerprint], :unique => true
|
32
|
-
|
33
|
-
create_table :file_contents do |t|
|
34
|
-
t.column :fingerprint, :string, :length => 40
|
35
|
-
t.column :file_size, :
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
t.column :
|
41
|
-
t.column :
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
t.column :
|
52
|
-
t.column :
|
53
|
-
t.column :
|
54
|
-
t.column :
|
55
|
-
t.column :
|
56
|
-
t.column :
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
end
|
1
|
+
module Nearline
|
2
|
+
module Models
|
3
|
+
|
4
|
+
module_function
|
5
|
+
|
6
|
+
def destroy_schema
|
7
|
+
ActiveRecord::Schema.define do
|
8
|
+
drop_table :blocks
|
9
|
+
drop_table :file_contents
|
10
|
+
drop_table :sequences
|
11
|
+
drop_table :archived_files
|
12
|
+
drop_table :manifests
|
13
|
+
drop_table :archived_files_manifests
|
14
|
+
drop_table :logs
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def empty_schema
|
19
|
+
Nearline::Models::Manifest.destroy_all
|
20
|
+
end
|
21
|
+
|
22
|
+
def generate_schema
|
23
|
+
ActiveRecord::Schema.define do
|
24
|
+
|
25
|
+
create_table :blocks do |t|
|
26
|
+
t.column :fingerprint, :string, :length => 40, :null => false
|
27
|
+
t.column :bulk_content, :binary
|
28
|
+
t.column :is_compressed, :boolean, :default => false
|
29
|
+
end
|
30
|
+
|
31
|
+
add_index :blocks, [:fingerprint], :unique => true
|
32
|
+
|
33
|
+
create_table :file_contents do |t|
|
34
|
+
t.column :fingerprint, :string, :length => 40
|
35
|
+
t.column :file_size, :string, :default => 0
|
36
|
+
t.column :verified_at, :datetime
|
37
|
+
end
|
38
|
+
|
39
|
+
create_table :sequences do |t|
|
40
|
+
t.column :sequence, :integer, :null => false
|
41
|
+
t.column :block_id, :integer, :null => false
|
42
|
+
t.column :file_content_id, :integer, :null => false
|
43
|
+
end
|
44
|
+
|
45
|
+
add_index :sequences, [:sequence, :file_content_id], :unique => true,
|
46
|
+
:name => "sequence_jn_index"
|
47
|
+
|
48
|
+
add_index :sequences, [:block_id]
|
49
|
+
|
50
|
+
create_table :archived_files do |t|
|
51
|
+
t.column :system_name, :string, :null => false
|
52
|
+
t.column :path, :text, :null => false
|
53
|
+
t.column :path_hash, :string, :null => false, :length => 40
|
54
|
+
t.column :file_content_id, :integer
|
55
|
+
t.column :uid, :integer, :default => -1
|
56
|
+
t.column :gid, :integer, :default => -1
|
57
|
+
t.column :mtime, :integer, :default => 0
|
58
|
+
t.column :mode, :integer, :default => 33206 # "chmod 100666"
|
59
|
+
t.column :is_directory, :boolean
|
60
|
+
end
|
61
|
+
|
62
|
+
add_index :archived_files, [:path_hash], :unique => true
|
63
|
+
|
64
|
+
# Manifests are the reference to a collection of archived files
|
65
|
+
create_table :manifests do |t|
|
66
|
+
t.column :system_name, :string
|
67
|
+
t.column :created_at, :datetime
|
68
|
+
t.column :completed_at, :datetime
|
69
|
+
end
|
70
|
+
|
71
|
+
# Joins archived files across manifests so file references may be recycled
|
72
|
+
create_table :archived_files_manifests, :id => false do |t|
|
73
|
+
t.column :archived_file_id, :integer
|
74
|
+
t.column :manifest_id, :integer
|
75
|
+
end
|
76
|
+
|
77
|
+
add_index :archived_files_manifests,
|
78
|
+
[:archived_file_id, :manifest_id], {
|
79
|
+
:unique => true,
|
80
|
+
:name => "manifest_jn_index"
|
81
|
+
}
|
82
|
+
|
83
|
+
# Keeps a record of problems during backup related to a manifest
|
84
|
+
create_table :logs do |t|
|
85
|
+
t.column :manifest_id, :integer, :null => false
|
86
|
+
t.column :message, :text
|
87
|
+
t.column :created_at, :datetime
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
end
|
data/tasks/gemspec.rake
CHANGED
@@ -3,7 +3,7 @@ require 'rake/gempackagetask'
|
|
3
3
|
|
4
4
|
SPEC = Gem::Specification.new do |s|
|
5
5
|
s.name = "nearline"
|
6
|
-
s.version = "0.0.
|
6
|
+
s.version = "0.0.2"
|
7
7
|
s.author = "Robert J. Osborne"
|
8
8
|
s.email = "rjo1970@gmail.com"
|
9
9
|
s.summary = "Nearline is a near-line backup and recovery solution"
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.4
|
|
3
3
|
specification_version: 1
|
4
4
|
name: nearline
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.0.
|
7
|
-
date: 2008-04-
|
6
|
+
version: 0.0.2
|
7
|
+
date: 2008-04-05 00:00:00 -04:00
|
8
8
|
summary: Nearline is a near-line backup and recovery solution
|
9
9
|
require_paths:
|
10
10
|
- lib
|