sdr-replication 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/libdir.rb +3 -0
- data/lib/replication/archive_catalog.rb +110 -0
- data/lib/replication/bagit_bag.rb +337 -0
- data/lib/replication/command_consumer.rb +55 -0
- data/lib/replication/command_producer.rb +105 -0
- data/lib/replication/dpn_check_rep.rb +83 -0
- data/lib/replication/file_fixity.rb +98 -0
- data/lib/replication/fixity.rb +155 -0
- data/lib/replication/operating_system.rb +33 -0
- data/lib/replication/replica.rb +62 -0
- data/lib/replication/sdr_object_version.rb +63 -0
- data/lib/replication/tarfile.rb +160 -0
- data/lib/sdr_replication.rb +26 -0
- metadata +198 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: e99a0814e4383ec6287dec7df41825d786e65919
|
4
|
+
data.tar.gz: 4890c2758dd820f22ce8aa9015621e1b33dec9ae
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: deb3400e53fcbdf16cf8263ffe1deed70be83889633b4fecfa0de58e41a076e7f502de61d22f079b09ff6b81ddb29586bb6e898782f22e45010ecfd46d93828d
|
7
|
+
data.tar.gz: a0533d23addcc264e7aeca97f985bdad1d70e8081230e09ab5337a03314b058374541b1587f47a5f89dd2f558115677190d49b320b892a77e7031bb15ac9c3ff
|
data/lib/libdir.rb
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rest-client'
|
3
|
+
|
4
|
+
module Replication
|
5
|
+
|
6
|
+
# A wrapper class based on {RestClient} used to interface with the Archive Catalog service.
|
7
|
+
# <br>
|
8
|
+
# <br>
|
9
|
+
# The default RestClient behavior is:
|
10
|
+
# * for results code between 200 and 207 a RestClient::Response will be returned
|
11
|
+
# * for results code 301, 302 or 307 the redirection will be followed if the request is a get or a head
|
12
|
+
# * for result code 303 the redirection will be followed and the request transformed into a get
|
13
|
+
# * for other cases a RestClient::Exception holding the Response will be raised
|
14
|
+
#
|
15
|
+
# But we are using a technique that forces RestClient to always provide the response
|
16
|
+
# <br>
|
17
|
+
# <br>
|
18
|
+
# RestClient::Response has these instance methods (some inherited from AbstractResponse):
|
19
|
+
# * args
|
20
|
+
# * body
|
21
|
+
# * code (e.g. 204)
|
22
|
+
# * description (e.g. "204 No Content | 0 bytes")
|
23
|
+
# * headers
|
24
|
+
# * net_http_res
|
25
|
+
#
|
26
|
+
# @see https://github.com/rest-client/rest-client
|
27
|
+
# @see http://rubydoc.info/gems/rest-client/1.6.7/frames
|
28
|
+
class ArchiveCatalog
|
29
|
+
|
30
|
+
@root_uri = 'http://localhost:3000'
|
31
|
+
@timeout = 120
|
32
|
+
|
33
|
+
# @see https://www.google.com/search?q="class+<<+self"+"attr_accessor"
|
34
|
+
class << self
|
35
|
+
|
36
|
+
# @return [String] The base or home URL of the Archive Catalog web service
|
37
|
+
attr_accessor :root_uri
|
38
|
+
|
39
|
+
# @return [Integer] seconds to wait for a response or to open a connection. Value nil disables the timeout.
|
40
|
+
attr_accessor :timeout
|
41
|
+
|
42
|
+
# The base RestClient resource to be used for requests
|
43
|
+
def root_resource
|
44
|
+
RestClient::Resource.new(@root_uri, {:open_timeout => @timeout, :timeout => @timeout})
|
45
|
+
end
|
46
|
+
|
47
|
+
# Get the item record from the specified table for the specified primary key.
|
48
|
+
# @param [String] table name of the database table
|
49
|
+
# @param [String] id primary key for the item in the database table
|
50
|
+
# @return [Hash] the row (in key,value hash) from the specified table for the specified identifier.
|
51
|
+
# Response body contains the item data in JSON format, which is converted to a hash.
|
52
|
+
# @see http://tools.ietf.org/html/rfc2616#page-53
|
53
|
+
def get_item(table,id)
|
54
|
+
# Don't raise RestClient::Exception but return the response
|
55
|
+
headers = {:accept => 'application/json'}
|
56
|
+
response = root_resource["#{table}/#{id}.json"].get(headers) {|response, request, result| response }
|
57
|
+
case response.code.to_s
|
58
|
+
when '200'
|
59
|
+
JSON.parse(response.body)
|
60
|
+
else
|
61
|
+
raise response.description
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Retrieve an existing database record or add a new one using the data provided.
|
66
|
+
# @param [String] table name of the database table
|
67
|
+
# @param [Hash] hash the item data to be added to the database table
|
68
|
+
# @return [Hash] result containing the item data as if a GET were performed.
|
69
|
+
# The HTTP response code for success is 201 (Created).
|
70
|
+
# @see http://en.wikipedia.org/wiki/POST_(HTTP)
|
71
|
+
# @see http://tools.ietf.org/html/rfc2616#page-54
|
72
|
+
def find_or_create_item(table,hash)
|
73
|
+
payload = hash.to_json
|
74
|
+
headers = {:content_type => :json, :accept => :json}
|
75
|
+
# Don't raise RestClient::Exception but return the response
|
76
|
+
response = root_resource["#{table}.json"].post(payload, headers) {|response, request, result| response }
|
77
|
+
case response.code.to_s
|
78
|
+
when '201'
|
79
|
+
JSON.parse(response.body)
|
80
|
+
else
|
81
|
+
raise response.description
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Update the database columns for the specified item using the hash data.
|
86
|
+
# @param [String] table name of the database table
|
87
|
+
# @param [String] id primary key for the item in the database table
|
88
|
+
# @param [Hash] hash the item data to be updated in the database table
|
89
|
+
# @return (Boolean) true if the HTTP response code is 204, per specification for PATCH or PUT request types.
|
90
|
+
# Response body is empty, per same specification.
|
91
|
+
# @see https://tools.ietf.org/html/rfc5789
|
92
|
+
# @see http://stackoverflow.com/questions/797834/should-a-restful-put-operation-return-something/827045#827045
|
93
|
+
def update_item(table,id,hash)
|
94
|
+
payload = hash.to_json
|
95
|
+
headers = {:content_type => :json}
|
96
|
+
# Don't raise RestClient::Exception but return the response
|
97
|
+
response = root_resource["#{table}/#{id}.json"].patch(payload, headers) {|response, request, result| response }
|
98
|
+
case response.code.to_s
|
99
|
+
when '204'
|
100
|
+
true
|
101
|
+
else
|
102
|
+
raise response.description
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
@@ -0,0 +1,337 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../libdir')
|
2
|
+
require 'sdr_replication'
|
3
|
+
|
4
|
+
module Replication
|
5
|
+
|
6
|
+
# A BagIt bag contains a structured copy of a digital object for storage, transfer, or replication
|
7
|
+
# @see https://tools.ietf.org/html/draft-kunze-bagit-10
|
8
|
+
# This class can be used to create, parse, or validate a bag instance
|
9
|
+
#
|
10
|
+
# @note Copyright (c) 2014 by The Board of Trustees of the Leland Stanford Junior University.
|
11
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
12
|
+
class BagitBag
|
13
|
+
|
14
|
+
# @param [Pathname,String] pathname The location of the bag home directory
|
15
|
+
# @return [BagitBag] Initialize a new bag, create home and payload folders, write bagit.txt file
|
16
|
+
def BagitBag.create_bag(pathname)
|
17
|
+
bag = BagitBag.new
|
18
|
+
bag.bag_pathname = pathname
|
19
|
+
bag.payload_pathname.mkpath
|
20
|
+
bag.write_bagit_txt
|
21
|
+
bag
|
22
|
+
end
|
23
|
+
|
24
|
+
# @param [Pathname,String] pathname The location of the bag home directory
|
25
|
+
# @return [BagitBag] Initialize a new bag, create home and payload folders, write bagit.txt file
|
26
|
+
def BagitBag.open_bag(pathname)
|
27
|
+
bag = BagitBag.new
|
28
|
+
bag.bag_pathname = pathname
|
29
|
+
raise "No bag found at #{bag.bag_pathname}" unless bag.bag_pathname.exist?
|
30
|
+
bagit_txt = bag.bag_pathname.join("bagit.txt")
|
31
|
+
raise "No bagit.txt file found at #{bagit_txt}" unless bagit_txt.exist?
|
32
|
+
bag
|
33
|
+
end
|
34
|
+
|
35
|
+
# @return [Pathname] The location of the bag home directory
|
36
|
+
def bag_pathname
|
37
|
+
@bag_pathname
|
38
|
+
end
|
39
|
+
|
40
|
+
# @param [Pathname,String] pathname The location of the bag home directory
|
41
|
+
# @return [Void] Set the location of the bag home directory
|
42
|
+
def bag_pathname=(pathname)
|
43
|
+
@bag_pathname = Pathname(pathname)
|
44
|
+
end
|
45
|
+
|
46
|
+
# @return [Pathname] The location of the bag data directory
|
47
|
+
def payload_pathname
|
48
|
+
bag_pathname.join('data')
|
49
|
+
end
|
50
|
+
|
51
|
+
# @return [Pathname] Generate the bagit.txt tag file
|
52
|
+
def write_bagit_txt
|
53
|
+
bagit_txt = bag_pathname.join("bagit.txt")
|
54
|
+
bagit_txt.open('w') do |f|
|
55
|
+
f.puts "Tag-File-Character-Encoding: UTF-8"
|
56
|
+
f.puts "BagIt-Version: 0.97"
|
57
|
+
end
|
58
|
+
bagit_txt
|
59
|
+
end
|
60
|
+
|
61
|
+
# @return [Hash<String,String] A hash containing the properties documented in the bagit.txt tagfile
|
62
|
+
def read_bagit_txt
|
63
|
+
properties = Hash.new
|
64
|
+
bagit_txt = bag_pathname.join("bagit.txt")
|
65
|
+
bagit_txt.readlines.each do |line|
|
66
|
+
line.chomp!.strip!
|
67
|
+
key,value = line.split(':',2)
|
68
|
+
properties[key.strip] = value.strip if value
|
69
|
+
end
|
70
|
+
properties
|
71
|
+
end
|
72
|
+
|
73
|
+
# @return [Array<Symbol>] The list of checksum types to be used when generating fixity data
|
74
|
+
def bag_checksum_types
|
75
|
+
@bag_checksum_types ||= Fixity.default_checksum_types
|
76
|
+
end
|
77
|
+
|
78
|
+
# @param [Object] types The list of checksum types to be used when generating fixity data
|
79
|
+
# @return [Void] Set the list of checksum types to be used when generating fixity data
|
80
|
+
def bag_checksum_types=(*types)
|
81
|
+
@bag_checksum_types = Fixity.validate_checksum_types(*types)
|
82
|
+
end
|
83
|
+
|
84
|
+
# @param [Symbol] link_mode Specifies whether to :copy, :link, or :symlink the files to the payload directory
|
85
|
+
# @param [Pathname] source_dir The source location of the directory whose contents are to be ingested
|
86
|
+
# @return [Pathname] Generate file_fixity_hash and send it to #add_payload_files
|
87
|
+
def add_payload_dir (link_mode, source_dir)
|
88
|
+
file_fixity_hash = Fixity.generate_checksums(source_dir, nil ,bag_checksum_types)
|
89
|
+
add_payload_files(link_mode, source_dir, file_fixity_hash)
|
90
|
+
payload_pathname
|
91
|
+
end
|
92
|
+
|
93
|
+
# @param [Symbol] link_mode Specifies whether to :copy, :link, or :symlink the files to the payload directory
|
94
|
+
# @param [Pathname] source_basepath The source location of the directory whose contents are to be ingested
|
95
|
+
# @param [Hash<String,FileFixity>] file_fixity_hash The list of files (with fixity data) to be added to the payload
|
96
|
+
# @return [Pathname] Copy or link the files specified in the file_fixity_hash to the payload directory,
|
97
|
+
# then update the payload manifest files
|
98
|
+
def add_payload_files(link_mode, source_basepath, file_fixity_hash)
|
99
|
+
file_fixity_hash.keys.each do |file_id|
|
100
|
+
source_pathname = source_basepath.join(file_id)
|
101
|
+
target_pathname = payload_pathname.join(file_id)
|
102
|
+
copy_file(link_mode, source_pathname, target_pathname)
|
103
|
+
end
|
104
|
+
write_manifest_checksums('manifest', file_fixity_hash)
|
105
|
+
payload_pathname
|
106
|
+
end
|
107
|
+
|
108
|
+
# @param [Symbol] link_mode Specifies whether to :copy, :link, or :symlink the files to the payload directory
|
109
|
+
# @param [Pathname] source_pathname The source location of the file to be ingested
|
110
|
+
# @param [Pathname] target_pathname The location of the directory in which to place the file
|
111
|
+
# @return [Pathname] link or copy the specified file from source location to the target location
|
112
|
+
def copy_file(link_mode, source_pathname, target_pathname)
|
113
|
+
target_pathname.parent.mkpath
|
114
|
+
case link_mode
|
115
|
+
when :copy, nil
|
116
|
+
FileUtils.copy(source_pathname.to_s, target_pathname.to_s) # automatically dereferences symlinks
|
117
|
+
when :link
|
118
|
+
FileUtils.link(source_pathname.to_s, target_pathname.to_s) #, :force => true (false is default)
|
119
|
+
when :symlink
|
120
|
+
FileUtils.symlink(source_pathname.to_s, target_pathname.to_s) #, :force => true (false is default)
|
121
|
+
else
|
122
|
+
raise "Invalid link_mode: #{link_mode}, expected one of [:copy,:link,:symlink]"
|
123
|
+
end
|
124
|
+
target_pathname
|
125
|
+
end
|
126
|
+
|
127
|
+
# @param [Pathname,String] source_fullpath The location of the directory whose content will be tarred
|
128
|
+
# @param [Pathname,String] source_basepath The location of the directory to change to before doing the tar create
|
129
|
+
# @return [Tarfile] Create a tar archive of a directory into the payload directory,
|
130
|
+
# generating checksums in parallel processes and recording those checksums in the payload manifests
|
131
|
+
def add_payload_tarfile(tarfile_id,source_fullpath, source_basepath)
|
132
|
+
tarfile = Tarfile.new
|
133
|
+
tarfile.source_basepath = Pathname(source_basepath)
|
134
|
+
tarfile.source_fullpath = Pathname(source_fullpath)
|
135
|
+
tarfile.tarfile_basepath = payload_pathname
|
136
|
+
tarfile.tarfile_fullpath = payload_pathname.join("#{tarfile_id}")
|
137
|
+
tarfile.create_tarfile
|
138
|
+
file_fixity_hash = Fixity.generate_checksums(tarfile.tarfile_basepath,[tarfile.tarfile_fullpath],bag_checksum_types)
|
139
|
+
write_manifest_checksums('manifest', file_fixity_hash)
|
140
|
+
tarfile
|
141
|
+
end
|
142
|
+
|
143
|
+
# @return [Pathname] Generate the bag-info.txt tag file to record the payload size
|
144
|
+
def write_bag_info_txt
|
145
|
+
payload_size = bag_payload_size
|
146
|
+
bag_info_txt = bag_pathname.join("bag-info.txt")
|
147
|
+
bag_info_txt.open('w') do |f|
|
148
|
+
f.puts "External-Identifier: #{bag_pathname.basename}"
|
149
|
+
f.puts "Payload-Oxum: #{payload_size[:bytes]}.#{payload_size[:files]}"
|
150
|
+
f.puts "Bag-Size: #{bag_size_human(payload_size[:bytes])}"
|
151
|
+
end
|
152
|
+
bag_info_txt
|
153
|
+
end
|
154
|
+
|
155
|
+
# @return [Hash<Symbol,Integer>] A hash contining the payload size in bytes, and the number of files,
|
156
|
+
# derived from the payload directory contents
|
157
|
+
def bag_payload_size
|
158
|
+
payload_pathname.find.select{|f| f.file?}.inject({bytes: 0, files: 0}) do |hash,file|
|
159
|
+
hash[:bytes] += file.size
|
160
|
+
hash[:files] += 1
|
161
|
+
hash
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
# @param [Integer] bytes The total number of bytes in the payload
|
166
|
+
# @return [String] Human-readable rendition of the total payload size
|
167
|
+
def bag_size_human(bytes)
|
168
|
+
count = 0
|
169
|
+
size = bytes
|
170
|
+
while ( size >= 1024 and count < 4 )
|
171
|
+
size /= 1024.0
|
172
|
+
count += 1
|
173
|
+
end
|
174
|
+
if (count == 0)
|
175
|
+
return sprintf("%d B", size)
|
176
|
+
else
|
177
|
+
return sprintf("%.2f %s", size, %w[B KB MB GB TB][count] )
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
# @return [Hash<String,String] A hash containing the properties documented in the bag-info.txt tagfile
|
182
|
+
def read_bag_info_txt
|
183
|
+
properties = Hash.new
|
184
|
+
bag_info = bag_pathname.join("bag-info.txt")
|
185
|
+
bag_info.readlines.each do |line|
|
186
|
+
line.chomp!.strip!
|
187
|
+
key,value = line.split(':',2)
|
188
|
+
properties[key.strip] = value.strip if value
|
189
|
+
end
|
190
|
+
properties
|
191
|
+
end
|
192
|
+
|
193
|
+
# @return [Hash<Symbol,Integer>] A hash contining the payload size in bytes, and the number of files,
|
194
|
+
# derived from the Payload-Oxum property
|
195
|
+
def info_payload_size
|
196
|
+
info = read_bag_info_txt
|
197
|
+
size_array = info['Payload-Oxum'].split('.')
|
198
|
+
size_hash = {:bytes => size_array[0].to_i, :files => size_array[1].to_i}
|
199
|
+
size_hash
|
200
|
+
end
|
201
|
+
|
202
|
+
# @return [Boolean] Compare the actual measured payload size against the value recorded in bag-info.txt
|
203
|
+
def verify_payload_size
|
204
|
+
info_size = info_payload_size
|
205
|
+
bag_size = bag_payload_size
|
206
|
+
if info_size != bag_size
|
207
|
+
raise "Failed payload size verification! Expected: #{info_size}, Found: #{bag_size}"
|
208
|
+
end
|
209
|
+
true
|
210
|
+
end
|
211
|
+
|
212
|
+
# @return [Hash<String,FileFixity>] create hash containing ids and checksums for all files in the bag's root directory
|
213
|
+
def generate_tagfile_checksums
|
214
|
+
tagfiles = bag_pathname.children.reject{|file| file.basename.to_s.start_with?('tagmanifest')}
|
215
|
+
Fixity.generate_checksums(bag_pathname, tagfiles, bag_checksum_types )
|
216
|
+
end
|
217
|
+
|
218
|
+
# @return [Hash<String,FileFixity>] create hash containing ids and checksums for all files in the bag's payload
|
219
|
+
def generate_payload_checksums
|
220
|
+
Fixity.generate_checksums(payload_pathname, nil, bag_checksum_types)
|
221
|
+
end
|
222
|
+
|
223
|
+
# @param [String] manifest_type The type of manifest file ('manifest' or 'tagmanifest') to be updated
|
224
|
+
# @param [Hash<String,FileFixity>] file_fixity_hash A hash containing file ids and fixity data
|
225
|
+
# @param [String] open_mode The file open mode (default is 'a')
|
226
|
+
# @return [Hash<Symbol,Pathname] Update each of the manifests with data from the file_fixity_hash
|
227
|
+
def write_manifest_checksums(manifest_type, file_fixity_hash, open_mode='a')
|
228
|
+
manifests = Hash.new
|
229
|
+
self.bag_checksum_types.each do |checksum_type|
|
230
|
+
manifest_pathname = bag_pathname.join("#{manifest_type}-#{checksum_type}.txt")
|
231
|
+
manifest_file = manifest_pathname.open(open_mode)
|
232
|
+
file_fixity_hash.values.each do |fixity|
|
233
|
+
checksum = fixity.get_checksum(checksum_type)
|
234
|
+
manifest_file.puts("#{checksum} #{fixity.file_id}") if checksum
|
235
|
+
end
|
236
|
+
manifest_file.close
|
237
|
+
manifests[checksum_type] = manifest_pathname
|
238
|
+
end
|
239
|
+
manifests
|
240
|
+
end
|
241
|
+
|
242
|
+
# @param [String] manifest_type The type of manifest file ('manifest' or 'tagmanifest') to be read
|
243
|
+
# @return [Hash<String,FileFixity>] A hash containing file ids and fixity data derived from the manifest files
|
244
|
+
def read_manifest_files(manifest_type)
|
245
|
+
file_fixity_hash = Hash.new
|
246
|
+
checksum_type_list = Array.new
|
247
|
+
Fixity.valid_checksum_ids.each do |checksum_type|
|
248
|
+
manifest_pathname = bag_pathname.join("#{manifest_type}-#{checksum_type}.txt")
|
249
|
+
if manifest_pathname.file?
|
250
|
+
checksum_type_list << checksum_type
|
251
|
+
manifest_pathname.readlines.each do |line|
|
252
|
+
line.chomp!.strip!
|
253
|
+
checksum,file_id = line.split(/[\s*]+/,2)
|
254
|
+
file_fixity = file_fixity_hash[file_id] || FileFixity.new(file_id: file_id)
|
255
|
+
file_fixity.set_checksum(checksum_type,checksum)
|
256
|
+
file_fixity_hash[file_id] = file_fixity
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|
260
|
+
self.bag_checksum_types = self.bag_checksum_types | checksum_type_list
|
261
|
+
file_fixity_hash
|
262
|
+
end
|
263
|
+
|
264
|
+
# @return [Boolean] Compare fixity data from the tag manifest files against the values measured by digesting the files
|
265
|
+
def verify_tagfile_manifests
|
266
|
+
manifest_type = 'tagmanifest'
|
267
|
+
manifest_fixity_hash = read_manifest_files(manifest_type)
|
268
|
+
bag_fixity_hash = generate_tagfile_checksums
|
269
|
+
verify_manifests(manifest_type, manifest_fixity_hash, bag_fixity_hash)
|
270
|
+
end
|
271
|
+
|
272
|
+
# @return [Boolean] Compare fixity data from the payload manifest files against the values measured by digesting the files
|
273
|
+
def verify_payload_manifests
|
274
|
+
manifest_type = 'manifest'
|
275
|
+
manifest_fixity_hash = read_manifest_files(manifest_type)
|
276
|
+
bag_fixity_hash = generate_payload_checksums
|
277
|
+
verify_manifests(manifest_type, manifest_fixity_hash, bag_fixity_hash)
|
278
|
+
end
|
279
|
+
|
280
|
+
# @param [String] manifest_type The type of manifest file ('manifest' or 'tagmanifest') to be read
|
281
|
+
# @param [Hash<String,FileFixity>] manifest_fixity_hash A hash containing file ids and fixity data derived from the manifest files
|
282
|
+
# @param [Hash<String,FileFixity>] bag_fixity_hash A hash containing file ids and fixity data derived from the actual files
|
283
|
+
# @return [Boolean] Compare fixity data from the manifest files against the values measured by digesting the files,
|
284
|
+
# returning true if equal or false if not equal
|
285
|
+
def verify_manifests(manifest_type, manifest_fixity_hash, bag_fixity_hash)
|
286
|
+
diff = manifest_diff(manifest_fixity_hash, bag_fixity_hash)
|
287
|
+
if diff.size > 0
|
288
|
+
raise "Failed #{manifest_type} verification! Differences: \n#{diff.inspect}"
|
289
|
+
end
|
290
|
+
true
|
291
|
+
end
|
292
|
+
|
293
|
+
# @param [Hash<String,FileFixity>] manifest_fixity_hash A hash containing file ids and fixity data derived from the manifest files
|
294
|
+
# @param [Hash<String,FileFixity>] bag_fixity_hash A hash containing file ids and fixity data derived from the actual files
|
295
|
+
# @return [Hash] A report of the differences between the fixity data from the manifest files
|
296
|
+
# against the values measured by digesting the files
|
297
|
+
def manifest_diff(manifest_fixity_hash, bag_fixity_hash)
|
298
|
+
diff = Hash.new
|
299
|
+
(manifest_fixity_hash.keys | bag_fixity_hash.keys).each do |file_id|
|
300
|
+
manifest_fixity = manifest_fixity_hash[file_id] || FileFixity.new(file_id: file_id)
|
301
|
+
bag_fixity = bag_fixity_hash[file_id] || FileFixity.new(file_id: file_id)
|
302
|
+
if manifest_fixity != bag_fixity
|
303
|
+
diff[file_id] = manifest_fixity.diff(bag_fixity,'manifest','bag')
|
304
|
+
end
|
305
|
+
end
|
306
|
+
diff
|
307
|
+
end
|
308
|
+
|
309
|
+
# @return [Boolean] Validate the bag containing the digital object
|
310
|
+
def verify_bag
|
311
|
+
verify_bag_structure
|
312
|
+
verify_tagfile_manifests
|
313
|
+
verify_payload_size
|
314
|
+
verify_payload_manifests
|
315
|
+
true
|
316
|
+
end
|
317
|
+
|
318
|
+
# @return [Boolean] Test the existence of expected files, return true if files exist, raise exception if not
|
319
|
+
def verify_bag_structure
|
320
|
+
required_files = ['data','bagit.txt','bag-info.txt','manifest-sha256.txt','tagmanifest-sha256.txt']
|
321
|
+
required_files.each{|filename| verify_pathname(bag_pathname.join(filename))}
|
322
|
+
optional_files = []
|
323
|
+
true
|
324
|
+
end
|
325
|
+
|
326
|
+
# @param [Pathname] pathname The file whose existence should be verified
|
327
|
+
# @return [Boolean] Test the existence of the specified path. Return true if file exists, raise exception if not
|
328
|
+
def verify_pathname(pathname)
|
329
|
+
raise "#{pathname.basename} not found at #{pathname}" unless pathname.exist?
|
330
|
+
true
|
331
|
+
end
|
332
|
+
|
333
|
+
|
334
|
+
end
|
335
|
+
|
336
|
+
|
337
|
+
end
|