nearline 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/nearline/archived_file.rb +224 -223
- data/lib/nearline/block.rb +60 -55
- data/lib/nearline/file_content.rb +87 -85
- data/lib/nearline/schema.rb +93 -90
- data/tasks/gemspec.rake +1 -1
- metadata +2 -2
@@ -1,224 +1,225 @@
|
|
1
|
-
module Nearline
|
2
|
-
module Models
|
3
|
-
|
4
|
-
# Represents file metadata and possible related FileContent
|
5
|
-
# for a single file on a single system
|
6
|
-
class ArchivedFile < ActiveRecord::Base
|
7
|
-
belongs_to :file_content
|
8
|
-
has_and_belongs_to_many :manifests
|
9
|
-
|
10
|
-
def self.create_for(system_name, file_path, manifest)
|
11
|
-
|
12
|
-
file_information = FileInformation.new(system_name, file_path, manifest)
|
13
|
-
|
14
|
-
# The path doesn't actually exist and fails a File.stat
|
15
|
-
return nil if file_information.path_hash.nil?
|
16
|
-
|
17
|
-
# If we find an exising entry, use it
|
18
|
-
hit = self.find_by_path_hash(file_information.path_hash)
|
19
|
-
return hit unless hit.nil?
|
20
|
-
|
21
|
-
# We need to create a record for either a directory or file
|
22
|
-
archived_file = ArchivedFile.new(
|
23
|
-
file_information.archived_file_parameters
|
24
|
-
)
|
25
|
-
|
26
|
-
# Find a new directory
|
27
|
-
if (file_information.is_directory)
|
28
|
-
archived_file.save!
|
29
|
-
return archived_file
|
30
|
-
end
|
31
|
-
|
32
|
-
# Find a new file that needs persisted
|
33
|
-
archived_file.file_content.file_size =
|
34
|
-
[file_information.stat.size].pack('Q').unpack('L').first # HACK for Windows
|
35
|
-
archived_file.persist(manifest)
|
36
|
-
archived_file.save!
|
37
|
-
archived_file
|
38
|
-
|
39
|
-
# TODO: Symbolic links, block devices, ...?
|
40
|
-
end
|
41
|
-
|
42
|
-
class FileInformation
|
43
|
-
attr_reader :path_hash, :stat, :is_directory, :archived_file_parameters
|
44
|
-
def initialize(system_name, file_path, manifest)
|
45
|
-
@manifest = manifest
|
46
|
-
@stat = read_stat(file_path)
|
47
|
-
@is_directory = File.directory?(file_path)
|
48
|
-
@path_hash = generate_path_hash(system_name, file_path)
|
49
|
-
@archived_file_parameters = build_parameters(system_name, file_path)
|
50
|
-
end
|
51
|
-
|
52
|
-
def read_stat(file_path)
|
53
|
-
stat = nil
|
54
|
-
begin
|
55
|
-
stat = File.stat(file_path)
|
56
|
-
rescue
|
57
|
-
@manifest.add_log("File not found on stat: #{file_path}")
|
58
|
-
end
|
59
|
-
stat
|
60
|
-
end
|
61
|
-
|
62
|
-
def generate_path_hash(system_name, file_path)
|
63
|
-
return nil if @stat.nil?
|
64
|
-
target = [system_name,
|
65
|
-
file_path,
|
66
|
-
@stat.uid,
|
67
|
-
@stat.gid,
|
68
|
-
@stat.mtime.to_i,
|
69
|
-
@stat.mode].join(':')
|
70
|
-
Digest::SHA1.hexdigest(target)
|
71
|
-
end
|
72
|
-
|
73
|
-
def file_content_entry_for_files_only
|
74
|
-
return FileContent.fresh_entry unless @is_directory
|
75
|
-
return nil
|
76
|
-
end
|
77
|
-
|
78
|
-
def build_parameters(system_name, file_path)
|
79
|
-
return nil if @stat.nil?
|
80
|
-
{
|
81
|
-
:system_name => system_name,
|
82
|
-
:path => file_path,
|
83
|
-
:path_hash => @path_hash,
|
84
|
-
:file_content => file_content_entry_for_files_only,
|
85
|
-
:uid => @stat.uid,
|
86
|
-
:gid => @stat.gid,
|
87
|
-
:mtime => @stat.mtime.to_i,
|
88
|
-
:mode => @stat.mode,
|
89
|
-
:is_directory => @is_directory
|
90
|
-
}
|
91
|
-
end
|
92
|
-
|
93
|
-
end
|
94
|
-
|
95
|
-
def restore(*args)
|
96
|
-
@options = args.extract_options!
|
97
|
-
if (self.is_directory)
|
98
|
-
FileUtils.mkdir_p option_override(:path)
|
99
|
-
restore_metadata
|
100
|
-
return
|
101
|
-
end
|
102
|
-
target_path = File.dirname(option_override(:path))
|
103
|
-
if (!File.exist? target_path)
|
104
|
-
FileUtils.mkdir_p target_path
|
105
|
-
end
|
106
|
-
f = File.open(option_override(:path), "wb")
|
107
|
-
self.file_content.restore_to(f)
|
108
|
-
f.close
|
109
|
-
restore_metadata
|
110
|
-
return
|
111
|
-
end
|
112
|
-
|
113
|
-
def option_override(key)
|
114
|
-
if (@options.has_key?(key))
|
115
|
-
return @options[key]
|
116
|
-
end
|
117
|
-
return self.send(key.to_s)
|
118
|
-
end
|
119
|
-
|
120
|
-
def restore_metadata
|
121
|
-
path = option_override(:path)
|
122
|
-
mtime = option_override(:mtime)
|
123
|
-
uid = option_override(:uid)
|
124
|
-
gid = option_override(:gid)
|
125
|
-
mode = option_override(:mode)
|
126
|
-
File.utime(0,Time.at(mtime),path)
|
127
|
-
File.chown(uid, gid, path)
|
128
|
-
File.chmod(mode, path)
|
129
|
-
end
|
130
|
-
|
131
|
-
def before_destroy
|
132
|
-
self.file_content.orphan_check if !self.file_content.nil?
|
133
|
-
end
|
134
|
-
|
135
|
-
def orphan_check
|
136
|
-
if self.manifests.size == 1
|
137
|
-
self.destroy
|
138
|
-
end
|
139
|
-
end
|
140
|
-
|
141
|
-
# Actually persist the file to the repository
|
142
|
-
# It has already been determined that a new ArchivedFile record is
|
143
|
-
# necessary and the file requires persisting
|
144
|
-
#
|
145
|
-
# But, the content may be identical to something else, and we
|
146
|
-
# won't know that until we complete the process and have to
|
147
|
-
# clean up our mess.
|
148
|
-
def persist(manifest)
|
149
|
-
whole_file_hash = Digest::SHA1.new
|
150
|
-
file_size = 0
|
151
|
-
begin
|
152
|
-
file_size = read_file_counting_bytes(whole_file_hash)
|
153
|
-
rescue
|
154
|
-
manifest.add_log "Got error '#{$!}' on path: #{self.path}"
|
155
|
-
self.orphan_check
|
156
|
-
return nil
|
157
|
-
end
|
158
|
-
|
159
|
-
size_check(file_size, manifest)
|
160
|
-
|
161
|
-
# Do we have a unique sequence?
|
162
|
-
key = whole_file_hash.hexdigest
|
163
|
-
return self if unique_sequence_processed?(key, manifest)
|
164
|
-
|
165
|
-
# Handle the case where the sequence is not unique...
|
166
|
-
clean_up_duplicate_content
|
167
|
-
replace_content(key)
|
168
|
-
self
|
169
|
-
end
|
170
|
-
|
171
|
-
def read_file_counting_bytes(whole_file_hash)
|
172
|
-
sequencer = FileSequencer.new(self.file_content)
|
173
|
-
file_size = 0
|
174
|
-
buffer = ""
|
175
|
-
File.open(self.path, "rb") do |io|
|
176
|
-
while (!io.eof) do
|
177
|
-
io.read(Block::MAX_SIZE, buffer)
|
178
|
-
file_size += buffer.size
|
179
|
-
whole_file_hash.update(buffer)
|
180
|
-
block = Block.for_content(buffer)
|
181
|
-
sequencer.preserve_block(block)
|
182
|
-
end
|
183
|
-
end
|
184
|
-
return file_size
|
185
|
-
end
|
186
|
-
|
187
|
-
def size_check(file_size, manifest)
|
188
|
-
if file_size != self.file_content.file_size
|
189
|
-
manifest.add_log "recorded file length #{file_size} " +
|
190
|
-
"does not match #{self.file_content.file_size} " +
|
191
|
-
"reported by the file system on path: #{self.path}"
|
192
|
-
end
|
193
|
-
end
|
194
|
-
|
195
|
-
def verify_content(manifest)
|
196
|
-
unless (self.file_content.verified?)
|
197
|
-
manifest.add_log "failed verification on path: #{self.path}"
|
198
|
-
end
|
199
|
-
end
|
200
|
-
|
201
|
-
def unique_sequence_processed?(key,manifest)
|
202
|
-
if self.file_content.unique_fingerprint?(key)
|
203
|
-
self.file_content.fingerprint = key
|
204
|
-
self.file_content.save!
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
self.file_content.
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
self.
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
1
|
+
module Nearline
|
2
|
+
module Models
|
3
|
+
|
4
|
+
# Represents file metadata and possible related FileContent
|
5
|
+
# for a single file on a single system
|
6
|
+
class ArchivedFile < ActiveRecord::Base
|
7
|
+
belongs_to :file_content
|
8
|
+
has_and_belongs_to_many :manifests
|
9
|
+
|
10
|
+
def self.create_for(system_name, file_path, manifest)
|
11
|
+
|
12
|
+
file_information = FileInformation.new(system_name, file_path, manifest)
|
13
|
+
|
14
|
+
# The path doesn't actually exist and fails a File.stat
|
15
|
+
return nil if file_information.path_hash.nil?
|
16
|
+
|
17
|
+
# If we find an exising entry, use it
|
18
|
+
hit = self.find_by_path_hash(file_information.path_hash)
|
19
|
+
return hit unless hit.nil?
|
20
|
+
|
21
|
+
# We need to create a record for either a directory or file
|
22
|
+
archived_file = ArchivedFile.new(
|
23
|
+
file_information.archived_file_parameters
|
24
|
+
)
|
25
|
+
|
26
|
+
# Find a new directory
|
27
|
+
if (file_information.is_directory)
|
28
|
+
archived_file.save!
|
29
|
+
return archived_file
|
30
|
+
end
|
31
|
+
|
32
|
+
# Find a new file that needs persisted
|
33
|
+
archived_file.file_content.file_size =
|
34
|
+
[file_information.stat.size].pack('Q').unpack('L').first # HACK for Windows
|
35
|
+
archived_file.persist(manifest)
|
36
|
+
archived_file.save!
|
37
|
+
archived_file
|
38
|
+
|
39
|
+
# TODO: Symbolic links, block devices, ...?
|
40
|
+
end
|
41
|
+
|
42
|
+
class FileInformation
|
43
|
+
attr_reader :path_hash, :stat, :is_directory, :archived_file_parameters
|
44
|
+
def initialize(system_name, file_path, manifest)
|
45
|
+
@manifest = manifest
|
46
|
+
@stat = read_stat(file_path)
|
47
|
+
@is_directory = File.directory?(file_path)
|
48
|
+
@path_hash = generate_path_hash(system_name, file_path)
|
49
|
+
@archived_file_parameters = build_parameters(system_name, file_path)
|
50
|
+
end
|
51
|
+
|
52
|
+
def read_stat(file_path)
|
53
|
+
stat = nil
|
54
|
+
begin
|
55
|
+
stat = File.stat(file_path)
|
56
|
+
rescue
|
57
|
+
@manifest.add_log("File not found on stat: #{file_path}")
|
58
|
+
end
|
59
|
+
stat
|
60
|
+
end
|
61
|
+
|
62
|
+
def generate_path_hash(system_name, file_path)
|
63
|
+
return nil if @stat.nil?
|
64
|
+
target = [system_name,
|
65
|
+
file_path,
|
66
|
+
@stat.uid,
|
67
|
+
@stat.gid,
|
68
|
+
@stat.mtime.to_i,
|
69
|
+
@stat.mode].join(':')
|
70
|
+
Digest::SHA1.hexdigest(target)
|
71
|
+
end
|
72
|
+
|
73
|
+
def file_content_entry_for_files_only
|
74
|
+
return FileContent.fresh_entry unless @is_directory
|
75
|
+
return nil
|
76
|
+
end
|
77
|
+
|
78
|
+
def build_parameters(system_name, file_path)
|
79
|
+
return nil if @stat.nil?
|
80
|
+
{
|
81
|
+
:system_name => system_name,
|
82
|
+
:path => file_path,
|
83
|
+
:path_hash => @path_hash,
|
84
|
+
:file_content => file_content_entry_for_files_only,
|
85
|
+
:uid => @stat.uid,
|
86
|
+
:gid => @stat.gid,
|
87
|
+
:mtime => @stat.mtime.to_i,
|
88
|
+
:mode => @stat.mode,
|
89
|
+
:is_directory => @is_directory
|
90
|
+
}
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
def restore(*args)
|
96
|
+
@options = args.extract_options!
|
97
|
+
if (self.is_directory)
|
98
|
+
FileUtils.mkdir_p option_override(:path)
|
99
|
+
restore_metadata
|
100
|
+
return
|
101
|
+
end
|
102
|
+
target_path = File.dirname(option_override(:path))
|
103
|
+
if (!File.exist? target_path)
|
104
|
+
FileUtils.mkdir_p target_path
|
105
|
+
end
|
106
|
+
f = File.open(option_override(:path), "wb")
|
107
|
+
self.file_content.restore_to(f)
|
108
|
+
f.close
|
109
|
+
restore_metadata
|
110
|
+
return
|
111
|
+
end
|
112
|
+
|
113
|
+
def option_override(key)
|
114
|
+
if (@options.has_key?(key))
|
115
|
+
return @options[key]
|
116
|
+
end
|
117
|
+
return self.send(key.to_s)
|
118
|
+
end
|
119
|
+
|
120
|
+
def restore_metadata
|
121
|
+
path = option_override(:path)
|
122
|
+
mtime = option_override(:mtime)
|
123
|
+
uid = option_override(:uid)
|
124
|
+
gid = option_override(:gid)
|
125
|
+
mode = option_override(:mode)
|
126
|
+
File.utime(0,Time.at(mtime),path)
|
127
|
+
File.chown(uid, gid, path)
|
128
|
+
File.chmod(mode, path)
|
129
|
+
end
|
130
|
+
|
131
|
+
def before_destroy
|
132
|
+
self.file_content.orphan_check if !self.file_content.nil?
|
133
|
+
end
|
134
|
+
|
135
|
+
def orphan_check
|
136
|
+
if self.manifests.size == 1
|
137
|
+
self.destroy
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# Actually persist the file to the repository
|
142
|
+
# It has already been determined that a new ArchivedFile record is
|
143
|
+
# necessary and the file requires persisting
|
144
|
+
#
|
145
|
+
# But, the content may be identical to something else, and we
|
146
|
+
# won't know that until we complete the process and have to
|
147
|
+
# clean up our mess.
|
148
|
+
def persist(manifest)
|
149
|
+
whole_file_hash = Digest::SHA1.new
|
150
|
+
file_size = 0
|
151
|
+
begin
|
152
|
+
file_size = read_file_counting_bytes(whole_file_hash)
|
153
|
+
rescue
|
154
|
+
manifest.add_log "Got error '#{$!}' on path: #{self.path}"
|
155
|
+
self.orphan_check
|
156
|
+
return nil
|
157
|
+
end
|
158
|
+
|
159
|
+
size_check(file_size, manifest)
|
160
|
+
|
161
|
+
# Do we have a unique sequence?
|
162
|
+
key = whole_file_hash.hexdigest
|
163
|
+
return self if unique_sequence_processed?(key, manifest)
|
164
|
+
|
165
|
+
# Handle the case where the sequence is not unique...
|
166
|
+
clean_up_duplicate_content
|
167
|
+
replace_content(key)
|
168
|
+
self
|
169
|
+
end
|
170
|
+
|
171
|
+
def read_file_counting_bytes(whole_file_hash)
|
172
|
+
sequencer = FileSequencer.new(self.file_content)
|
173
|
+
file_size = 0
|
174
|
+
buffer = ""
|
175
|
+
File.open(self.path, "rb") do |io|
|
176
|
+
while (!io.eof) do
|
177
|
+
io.read(Block::MAX_SIZE, buffer)
|
178
|
+
file_size += buffer.size
|
179
|
+
whole_file_hash.update(buffer)
|
180
|
+
block = Block.for_content(buffer)
|
181
|
+
sequencer.preserve_block(block)
|
182
|
+
end
|
183
|
+
end
|
184
|
+
return file_size
|
185
|
+
end
|
186
|
+
|
187
|
+
def size_check(file_size, manifest)
|
188
|
+
if file_size != self.file_content.file_size
|
189
|
+
manifest.add_log "recorded file length #{file_size} " +
|
190
|
+
"does not match #{self.file_content.file_size} " +
|
191
|
+
"reported by the file system on path: #{self.path}"
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def verify_content(manifest)
|
196
|
+
unless (self.file_content.verified?)
|
197
|
+
manifest.add_log "failed verification on path: #{self.path}"
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
def unique_sequence_processed?(key,manifest)
|
202
|
+
if self.file_content.unique_fingerprint?(key)
|
203
|
+
self.file_content.fingerprint = key
|
204
|
+
self.file_content.save!
|
205
|
+
self.save!
|
206
|
+
verify_content(manifest)
|
207
|
+
return true
|
208
|
+
end
|
209
|
+
false
|
210
|
+
end
|
211
|
+
|
212
|
+
def clean_up_duplicate_content
|
213
|
+
Sequence.delete_all("file_content_id=#{self.file_content.id}")
|
214
|
+
self.file_content.orphan_check
|
215
|
+
end
|
216
|
+
|
217
|
+
def replace_content(key)
|
218
|
+
self.file_content = FileContent.find_by_fingerprint(key)
|
219
|
+
self.save!
|
220
|
+
end
|
221
|
+
|
222
|
+
end
|
223
|
+
|
224
|
+
end
|
224
225
|
end
|
data/lib/nearline/block.rb
CHANGED
@@ -1,56 +1,61 @@
|
|
1
|
-
require 'active_record'
|
2
|
-
|
3
|
-
module Nearline
|
4
|
-
module Models
|
5
|
-
|
6
|
-
# Represents a unit of file content which may be
|
7
|
-
# freely shared across the repository
|
8
|
-
# Its sole responsibility is to preserve and provide
|
9
|
-
# content access
|
10
|
-
class Block < ActiveRecord::Base
|
11
|
-
require "zlib"
|
12
|
-
|
13
|
-
has_many :sequences
|
14
|
-
|
15
|
-
MAX_SIZE = (64 * 1024)-1
|
16
|
-
|
17
|
-
def attempt_compression
|
18
|
-
return if (self.is_compressed)
|
19
|
-
# TODO: Have a bump-the-compression option, here?
|
20
|
-
candidate_content = Zlib::Deflate.deflate(self.bulk_content)
|
21
|
-
if candidate_content.length < self.bulk_content.length
|
22
|
-
self.is_compressed = true
|
23
|
-
self.bulk_content = candidate_content
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
def calculate_fingerprint
|
28
|
-
self.fingerprint = Digest::SHA1.hexdigest(content)
|
29
|
-
end
|
30
|
-
|
31
|
-
def content
|
32
|
-
if (self.is_compressed)
|
33
|
-
return Zlib::Inflate.inflate(self.bulk_content)
|
34
|
-
end
|
35
|
-
self.bulk_content
|
36
|
-
end
|
37
|
-
|
38
|
-
def self.for_content(x)
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
block.
|
45
|
-
block
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
1
|
+
require 'active_record'
|
2
|
+
|
3
|
+
module Nearline
|
4
|
+
module Models
|
5
|
+
|
6
|
+
# Represents a unit of file content which may be
|
7
|
+
# freely shared across the repository
|
8
|
+
# Its sole responsibility is to preserve and provide
|
9
|
+
# content access
|
10
|
+
class Block < ActiveRecord::Base
|
11
|
+
require "zlib"
|
12
|
+
|
13
|
+
has_many :sequences
|
14
|
+
|
15
|
+
MAX_SIZE = (64 * 1024)-1
|
16
|
+
|
17
|
+
def attempt_compression
|
18
|
+
return if (self.is_compressed)
|
19
|
+
# TODO: Have a bump-the-compression option, here?
|
20
|
+
candidate_content = Zlib::Deflate.deflate(self.bulk_content)
|
21
|
+
if candidate_content.length < self.bulk_content.length
|
22
|
+
self.is_compressed = true
|
23
|
+
self.bulk_content = candidate_content
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def calculate_fingerprint
|
28
|
+
self.fingerprint = Digest::SHA1.hexdigest(content)
|
29
|
+
end
|
30
|
+
|
31
|
+
def content
|
32
|
+
if (self.is_compressed)
|
33
|
+
return Zlib::Inflate.inflate(self.bulk_content)
|
34
|
+
end
|
35
|
+
self.bulk_content
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.for_content(x, old_block = nil)
|
39
|
+
unless old_block.nil?
|
40
|
+
if x == old_block.content
|
41
|
+
return old_block
|
42
|
+
end
|
43
|
+
end
|
44
|
+
block = Models::Block.new(:bulk_content => x)
|
45
|
+
block.calculate_fingerprint
|
46
|
+
found = find_by_fingerprint(block.fingerprint)
|
47
|
+
return found if !found.nil?
|
48
|
+
block.attempt_compression
|
49
|
+
block.save!
|
50
|
+
block
|
51
|
+
end
|
52
|
+
|
53
|
+
def orphan_check
|
54
|
+
if self.sequences.size == 0
|
55
|
+
self.destroy
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
56
61
|
end
|
@@ -1,85 +1,87 @@
|
|
1
|
-
module Nearline
|
2
|
-
module Models
|
3
|
-
|
4
|
-
# Has the responsibility of identifying and
|
5
|
-
# verifying content
|
6
|
-
class FileContent < ActiveRecord::Base
|
7
|
-
has_many :sequences
|
8
|
-
has_many :archived_files
|
9
|
-
|
10
|
-
def self.fresh_entry
|
11
|
-
file_content = FileContent.new
|
12
|
-
file_content.save!
|
13
|
-
file_content
|
14
|
-
end
|
15
|
-
|
16
|
-
def
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
end
|
25
|
-
|
26
|
-
def
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
@file_content
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
1
|
+
module Nearline
|
2
|
+
module Models
|
3
|
+
|
4
|
+
# Has the responsibility of identifying and
|
5
|
+
# verifying content
|
6
|
+
class FileContent < ActiveRecord::Base
|
7
|
+
has_many :sequences
|
8
|
+
has_many :archived_files
|
9
|
+
|
10
|
+
def self.fresh_entry
|
11
|
+
file_content = FileContent.new
|
12
|
+
file_content.save!
|
13
|
+
file_content
|
14
|
+
end
|
15
|
+
|
16
|
+
def orphan_check
|
17
|
+
if (self.archived_files.size == 1)
|
18
|
+
sequences.each do |s|
|
19
|
+
s.destroy
|
20
|
+
s.block.orphan_check
|
21
|
+
end
|
22
|
+
self.destroy
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def unique_fingerprint?(key)
|
27
|
+
hit = FileContent.connection.select_one(
|
28
|
+
"select id from file_contents where fingerprint='#{key}'"
|
29
|
+
)
|
30
|
+
return hit.nil?
|
31
|
+
end
|
32
|
+
|
33
|
+
def restore_to(io)
|
34
|
+
sequences.each do |seq|
|
35
|
+
block = Block.find(seq.block_id)
|
36
|
+
io.write(block.content)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def verified?
|
41
|
+
if (!self.verified_at.nil?)
|
42
|
+
return true
|
43
|
+
end
|
44
|
+
whole_file_hash = Digest::SHA1.new
|
45
|
+
sequences.each do |seq|
|
46
|
+
block = Block.find(seq.block_id)
|
47
|
+
whole_file_hash.update(block.content)
|
48
|
+
end
|
49
|
+
if fingerprint == whole_file_hash.hexdigest
|
50
|
+
self.verified_at = Time.now
|
51
|
+
self.save!
|
52
|
+
return true
|
53
|
+
end
|
54
|
+
false
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
# Has the responsibility of preserving
|
61
|
+
# cardinality of stored blocks
|
62
|
+
class Sequence < ActiveRecord::Base
|
63
|
+
belongs_to :block
|
64
|
+
belongs_to :file_content
|
65
|
+
end
|
66
|
+
|
67
|
+
class FileSequencer
|
68
|
+
def initialize(file_content)
|
69
|
+
@inc = 0
|
70
|
+
@file_content = file_content
|
71
|
+
end
|
72
|
+
|
73
|
+
def preserve_block(block)
|
74
|
+
@inc += 1
|
75
|
+
sequence = Sequence.new(
|
76
|
+
:sequence => @inc,
|
77
|
+
:file_content_id => @file_content.id,
|
78
|
+
:block_id => block.id
|
79
|
+
)
|
80
|
+
sequence.save!
|
81
|
+
sequence
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
end
|
data/lib/nearline/schema.rb
CHANGED
@@ -1,90 +1,93 @@
|
|
1
|
-
module Nearline
|
2
|
-
module Models
|
3
|
-
|
4
|
-
module_function
|
5
|
-
|
6
|
-
def destroy_schema
|
7
|
-
ActiveRecord::Schema.define do
|
8
|
-
drop_table :blocks
|
9
|
-
drop_table :file_contents
|
10
|
-
drop_table :sequences
|
11
|
-
drop_table :archived_files
|
12
|
-
drop_table :manifests
|
13
|
-
drop_table :archived_files_manifests
|
14
|
-
drop_table :logs
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def empty_schema
|
19
|
-
Nearline::Models::Manifest.destroy_all
|
20
|
-
end
|
21
|
-
|
22
|
-
def generate_schema
|
23
|
-
ActiveRecord::Schema.define do
|
24
|
-
|
25
|
-
create_table :blocks do |t|
|
26
|
-
t.column :fingerprint, :string, :length => 40, :null => false
|
27
|
-
t.column :bulk_content, :binary
|
28
|
-
t.column :is_compressed, :boolean, :default => false
|
29
|
-
end
|
30
|
-
|
31
|
-
add_index :blocks, [:fingerprint], :unique => true
|
32
|
-
|
33
|
-
create_table :file_contents do |t|
|
34
|
-
t.column :fingerprint, :string, :length => 40
|
35
|
-
t.column :file_size, :
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
t.column :
|
41
|
-
t.column :
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
t.column :
|
52
|
-
t.column :
|
53
|
-
t.column :
|
54
|
-
t.column :
|
55
|
-
t.column :
|
56
|
-
t.column :
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
end
|
1
|
+
module Nearline
|
2
|
+
module Models
|
3
|
+
|
4
|
+
module_function
|
5
|
+
|
6
|
+
def destroy_schema
|
7
|
+
ActiveRecord::Schema.define do
|
8
|
+
drop_table :blocks
|
9
|
+
drop_table :file_contents
|
10
|
+
drop_table :sequences
|
11
|
+
drop_table :archived_files
|
12
|
+
drop_table :manifests
|
13
|
+
drop_table :archived_files_manifests
|
14
|
+
drop_table :logs
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def empty_schema
|
19
|
+
Nearline::Models::Manifest.destroy_all
|
20
|
+
end
|
21
|
+
|
22
|
+
def generate_schema
|
23
|
+
ActiveRecord::Schema.define do
|
24
|
+
|
25
|
+
create_table :blocks do |t|
|
26
|
+
t.column :fingerprint, :string, :length => 40, :null => false
|
27
|
+
t.column :bulk_content, :binary
|
28
|
+
t.column :is_compressed, :boolean, :default => false
|
29
|
+
end
|
30
|
+
|
31
|
+
add_index :blocks, [:fingerprint], :unique => true
|
32
|
+
|
33
|
+
create_table :file_contents do |t|
|
34
|
+
t.column :fingerprint, :string, :length => 40
|
35
|
+
t.column :file_size, :string, :default => 0
|
36
|
+
t.column :verified_at, :datetime
|
37
|
+
end
|
38
|
+
|
39
|
+
create_table :sequences do |t|
|
40
|
+
t.column :sequence, :integer, :null => false
|
41
|
+
t.column :block_id, :integer, :null => false
|
42
|
+
t.column :file_content_id, :integer, :null => false
|
43
|
+
end
|
44
|
+
|
45
|
+
add_index :sequences, [:sequence, :file_content_id], :unique => true,
|
46
|
+
:name => "sequence_jn_index"
|
47
|
+
|
48
|
+
add_index :sequences, [:block_id]
|
49
|
+
|
50
|
+
create_table :archived_files do |t|
|
51
|
+
t.column :system_name, :string, :null => false
|
52
|
+
t.column :path, :text, :null => false
|
53
|
+
t.column :path_hash, :string, :null => false, :length => 40
|
54
|
+
t.column :file_content_id, :integer
|
55
|
+
t.column :uid, :integer, :default => -1
|
56
|
+
t.column :gid, :integer, :default => -1
|
57
|
+
t.column :mtime, :integer, :default => 0
|
58
|
+
t.column :mode, :integer, :default => 33206 # "chmod 100666"
|
59
|
+
t.column :is_directory, :boolean
|
60
|
+
end
|
61
|
+
|
62
|
+
add_index :archived_files, [:path_hash], :unique => true
|
63
|
+
|
64
|
+
# Manifests are the reference to a collection of archived files
|
65
|
+
create_table :manifests do |t|
|
66
|
+
t.column :system_name, :string
|
67
|
+
t.column :created_at, :datetime
|
68
|
+
t.column :completed_at, :datetime
|
69
|
+
end
|
70
|
+
|
71
|
+
# Joins archived files across manifests so file references may be recycled
|
72
|
+
create_table :archived_files_manifests, :id => false do |t|
|
73
|
+
t.column :archived_file_id, :integer
|
74
|
+
t.column :manifest_id, :integer
|
75
|
+
end
|
76
|
+
|
77
|
+
add_index :archived_files_manifests,
|
78
|
+
[:archived_file_id, :manifest_id], {
|
79
|
+
:unique => true,
|
80
|
+
:name => "manifest_jn_index"
|
81
|
+
}
|
82
|
+
|
83
|
+
# Keeps a record of problems during backup related to a manifest
|
84
|
+
create_table :logs do |t|
|
85
|
+
t.column :manifest_id, :integer, :null => false
|
86
|
+
t.column :message, :text
|
87
|
+
t.column :created_at, :datetime
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
end
|
data/tasks/gemspec.rake
CHANGED
@@ -3,7 +3,7 @@ require 'rake/gempackagetask'
|
|
3
3
|
|
4
4
|
SPEC = Gem::Specification.new do |s|
|
5
5
|
s.name = "nearline"
|
6
|
-
s.version = "0.0.
|
6
|
+
s.version = "0.0.2"
|
7
7
|
s.author = "Robert J. Osborne"
|
8
8
|
s.email = "rjo1970@gmail.com"
|
9
9
|
s.summary = "Nearline is a near-line backup and recovery solution"
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.4
|
|
3
3
|
specification_version: 1
|
4
4
|
name: nearline
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.0.
|
7
|
-
date: 2008-04-
|
6
|
+
version: 0.0.2
|
7
|
+
date: 2008-04-05 00:00:00 -04:00
|
8
8
|
summary: Nearline is a near-line backup and recovery solution
|
9
9
|
require_paths:
|
10
10
|
- lib
|