moab-versioning 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/moab.rb +59 -0
- data/lib/moab/bagger.rb +289 -0
- data/lib/moab/config.rb +21 -0
- data/lib/moab/exceptions.rb +18 -0
- data/lib/moab/file_group.rb +244 -0
- data/lib/moab/file_group_difference.rb +336 -0
- data/lib/moab/file_group_difference_subset.rb +45 -0
- data/lib/moab/file_instance.rb +82 -0
- data/lib/moab/file_instance_difference.rb +54 -0
- data/lib/moab/file_inventory.rb +279 -0
- data/lib/moab/file_inventory_difference.rb +132 -0
- data/lib/moab/file_manifestation.rb +85 -0
- data/lib/moab/file_signature.rb +200 -0
- data/lib/moab/signature_catalog.rb +195 -0
- data/lib/moab/signature_catalog_entry.rb +61 -0
- data/lib/moab/storage_object.rb +220 -0
- data/lib/moab/storage_object_version.rb +333 -0
- data/lib/moab/storage_repository.rb +57 -0
- data/lib/moab/storage_services.rb +104 -0
- data/lib/moab/verification_result.rb +83 -0
- data/lib/moab/version_metadata.rb +38 -0
- data/lib/moab/version_metadata_entry.rb +64 -0
- data/lib/moab/version_metadata_event.rb +47 -0
- data/lib/moab_stanford.rb +18 -0
- data/lib/monkey_patches.rb +65 -0
- data/lib/serializer.rb +36 -0
- data/lib/serializer/manifest.rb +76 -0
- data/lib/serializer/serializable.rb +178 -0
- data/lib/stanford/active_fedora_object.rb +34 -0
- data/lib/stanford/content_inventory.rb +236 -0
- data/lib/stanford/dor_metadata.rb +49 -0
- data/lib/stanford/storage_repository.rb +46 -0
- data/lib/stanford/storage_services.rb +66 -0
- data/lib/tasks/yard.rake +34 -0
- data/lib/tools/api_doc_generator.rb +396 -0
- data/lib/tools/spec_generator.rb +410 -0
- data/lib/tools/spec_generator_old.rb +49 -0
- metadata +252 -0
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'moab'
|
2
|
+
|
3
|
+
module Moab
|
4
|
+
|
5
|
+
# A file-level entry in a digital object's {SignatureCatalog}.
|
6
|
+
# It has a child {FileSignature} element that identifies the file's contents (the bytestream)
|
7
|
+
# along with data that specfies the SDR storage location that was used to preserve a single file instance.
|
8
|
+
#
|
9
|
+
# ====Data Model
|
10
|
+
# * {SignatureCatalog} = lookup table containing a cumulative collection of all files ever ingested
|
11
|
+
# * <b>{SignatureCatalogEntry} [1..*] = an row in the lookup table containing storage information about a single file</b>
|
12
|
+
# * {FileSignature} [1] = file fixity information
|
13
|
+
#
|
14
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
15
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
16
|
+
class SignatureCatalogEntry < Serializable
|
17
|
+
|
18
|
+
include HappyMapper
|
19
|
+
|
20
|
+
# The name of the XML element used to serialize this objects data
|
21
|
+
tag 'entry'
|
22
|
+
|
23
|
+
# (see Serializable#initialize)
|
24
|
+
def initialize(opts={})
|
25
|
+
super(opts)
|
26
|
+
end
|
27
|
+
|
28
|
+
# @attribute
|
29
|
+
# @return [Integer] The ordinal version number
|
30
|
+
attribute :version_id, Integer, :tag => 'originalVersion', :key => true, :on_save => Proc.new {|n| n.to_s}
|
31
|
+
|
32
|
+
# @attribute
|
33
|
+
# @return [String] The name of the file group
|
34
|
+
attribute :group_id, String, :tag => 'groupId', :key => true
|
35
|
+
|
36
|
+
# @attribute
|
37
|
+
# @return [String] The id is the filename path, relative to the file group's base directory
|
38
|
+
attribute :path, String, :key => true, :tag => 'storagePath'
|
39
|
+
|
40
|
+
# @attribute
|
41
|
+
# @return [FileSignature] The fixity data of the file instance
|
42
|
+
element :signature, FileSignature, :tag => 'fileSignature'
|
43
|
+
|
44
|
+
def signature
|
45
|
+
# HappyMapper's parser tries to put an array of signatures in the signature field
|
46
|
+
@signature.is_a?(Array) ? @signature[0] : @signature
|
47
|
+
end
|
48
|
+
|
49
|
+
def signature=(signature)
|
50
|
+
@signature = signature.is_a?(Array) ? signature[0] : signature
|
51
|
+
end
|
52
|
+
|
53
|
+
# @api internal
|
54
|
+
# @return [String] Returns the storage path to a file, relative to the object storage home directory
|
55
|
+
def storage_path
|
56
|
+
File.join(StorageObject.version_dirname(version_id),'data', group_id, path)
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
@@ -0,0 +1,220 @@
|
|
1
|
+
require 'moab'
|
2
|
+
|
3
|
+
module Moab
|
4
|
+
|
5
|
+
# A class to represent a digital object's repository storage location
|
6
|
+
# and methods for
|
7
|
+
# * packaging a bag for ingest of a new object version to the repository
|
8
|
+
# * ingesting a bag
|
9
|
+
# * disseminating a bag containing a reconstructed object version
|
10
|
+
#
|
11
|
+
# ====Data Model
|
12
|
+
# * {StorageRepository} = represents a digital object repository storage node
|
13
|
+
# * {StorageServices} = supports application layer access to the repository's objects, data, and metadata
|
14
|
+
# * <b>{StorageObject} = represents a digital object's repository storage location and ingest/dissemination methods</b>
|
15
|
+
# * {StorageObjectVersion} [1..*] = represents a version subdirectory within an object's home directory
|
16
|
+
# * {Bagger} [1] = utility for creating bagit packages for ingest or dissemination
|
17
|
+
#
|
18
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
19
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
20
|
+
class StorageObject
|
21
|
+
|
22
|
+
# @return [String] The digital object ID (druid)
|
23
|
+
attr_accessor :digital_object_id
|
24
|
+
|
25
|
+
# @return [Pathname] The location of the object's storage home directory
|
26
|
+
attr_accessor :object_pathname
|
27
|
+
|
28
|
+
# @param object_id [String] The digital object identifier
|
29
|
+
# @param object_dir [Pathname,String] The location of the object's storage home directory
|
30
|
+
def initialize(object_id, object_dir, mkpath=false)
|
31
|
+
@digital_object_id = object_id
|
32
|
+
@object_pathname = Pathname.new(object_dir)
|
33
|
+
initialize_storage if mkpath
|
34
|
+
end
|
35
|
+
|
36
|
+
# @return [Boolean] true if the object's storage directory exists
|
37
|
+
def exist?
|
38
|
+
@object_pathname.exist?
|
39
|
+
end
|
40
|
+
|
41
|
+
# @api external
|
42
|
+
# @return [void] Create the directory for the digital object home unless it already exists
|
43
|
+
def initialize_storage
|
44
|
+
@object_pathname.mkpath
|
45
|
+
end
|
46
|
+
|
47
|
+
# @api external
|
48
|
+
# @param bag_dir [Pathname,String] The location of the bag to be ingested
|
49
|
+
# @return [void] Ingest a new object version contained in a bag into this objects storage area
|
50
|
+
# @example {include:file:spec/features/storage/ingest_spec.rb}
|
51
|
+
def ingest_bag(bag_dir)
|
52
|
+
bag_dir = Pathname.new(bag_dir)
|
53
|
+
current_version = StorageObjectVersion.new(self,current_version_id)
|
54
|
+
current_inventory = current_version.file_inventory('version')
|
55
|
+
new_version = StorageObjectVersion.new(self,current_version_id + 1)
|
56
|
+
if FileInventory.xml_pathname_exist?(bag_dir,'version')
|
57
|
+
new_inventory = FileInventory.read_xml_file(bag_dir,'version')
|
58
|
+
elsif current_version.version_id == 0
|
59
|
+
new_inventory = versionize_bag(bag_dir,current_version,new_version)
|
60
|
+
end
|
61
|
+
validate_new_inventory(new_inventory)
|
62
|
+
new_version.ingest_bag_data(bag_dir)
|
63
|
+
new_version.update_catalog(current_version.signature_catalog,new_inventory)
|
64
|
+
new_version.generate_differences_report(current_inventory,new_inventory)
|
65
|
+
new_version.generate_manifest_inventory
|
66
|
+
new_version
|
67
|
+
end
|
68
|
+
|
69
|
+
# @api internal
|
70
|
+
# @param bag_dir [Pathname] The location of the bag to be ingested
|
71
|
+
# @param current_version[StorageObjectVersion] The current latest version of the object
|
72
|
+
# @param new_version [StorageObjectVersion] The version to be added
|
73
|
+
# @return [FileInventory] The file inventory of the specified type for this version
|
74
|
+
def versionize_bag(bag_dir,current_version,new_version)
|
75
|
+
new_inventory = FileInventory.new(
|
76
|
+
:type=>'version',
|
77
|
+
:digital_object_id=>@digital_object_id,
|
78
|
+
:version_id=>new_version.version_id,
|
79
|
+
:inventory_datetime => Time.now
|
80
|
+
)
|
81
|
+
new_inventory.inventory_from_bagit_bag(bag_dir)
|
82
|
+
new_inventory.write_xml_file(bag_dir)
|
83
|
+
version_additions = current_version.signature_catalog.version_additions(new_inventory)
|
84
|
+
version_additions.write_xml_file(bag_dir)
|
85
|
+
new_inventory
|
86
|
+
end
|
87
|
+
|
88
|
+
# @api external
|
89
|
+
# @param version_id [Integer] The version identifier of the object version to be disseminated
|
90
|
+
# @param bag_dir [Pathname,String] The location of the bag to be created
|
91
|
+
# @return [void] Reconstruct an object version and package it in a bag for dissemination
|
92
|
+
# @example {include:file:spec/features/storage/reconstruct_spec.rb}
|
93
|
+
def reconstruct_version(version_id, bag_dir)
|
94
|
+
storage_version = StorageObjectVersion.new(self,version_id)
|
95
|
+
version_inventory = storage_version.file_inventory('version')
|
96
|
+
signature_catalog = storage_version.signature_catalog
|
97
|
+
bagger = Bagger.new(version_inventory, signature_catalog, bag_dir)
|
98
|
+
bagger.fill_bag(:reconstructor,@object_pathname)
|
99
|
+
end
|
100
|
+
|
101
|
+
# @param [String] catalog_filepath The object-relative path of the file
|
102
|
+
# @return [Pathname] The absolute storage path of the file, including the object's home directory
|
103
|
+
def storage_filepath(catalog_filepath)
|
104
|
+
storage_filepath = @object_pathname.join(catalog_filepath)
|
105
|
+
raise FileNotFoundException, "#{catalog_filepath} missing from storage location #{storage_filepath}" unless storage_filepath.exist?
|
106
|
+
storage_filepath
|
107
|
+
end
|
108
|
+
|
109
|
+
# @api external
|
110
|
+
# @param version_id [Integer] The version identifier of an object version
|
111
|
+
# @return [String] The directory name of the version, relative to the digital object home directory (e.g v0002)
|
112
|
+
def self.version_dirname(version_id)
|
113
|
+
("v%04d" % version_id)
|
114
|
+
end
|
115
|
+
|
116
|
+
# @return [Array<Integer>] The list of all version ids for this object
|
117
|
+
def version_id_list
|
118
|
+
list = Array.new
|
119
|
+
@object_pathname.children.each do |dirname|
|
120
|
+
vnum = dirname.basename.to_s
|
121
|
+
if vnum.match /^v(\d+)$/
|
122
|
+
list << vnum[1..-1].to_i
|
123
|
+
end
|
124
|
+
end
|
125
|
+
list.sort
|
126
|
+
end
|
127
|
+
|
128
|
+
# @return [Array<StorageObjectVersion>] The list of all versions in this storage object
|
129
|
+
def version_list
|
130
|
+
version_id_list.collect{|id| self.storage_object_version(id)}
|
131
|
+
end
|
132
|
+
alias :versions :version_list
|
133
|
+
|
134
|
+
# @return [Boolean] true if there are no versions yet in this object
|
135
|
+
def empty?
|
136
|
+
version_id_list.empty?
|
137
|
+
end
|
138
|
+
|
139
|
+
# @api external
|
140
|
+
# @return [Integer] The identifier of the latest version of this object, or 0 if no versions exist
|
141
|
+
def current_version_id
|
142
|
+
return @current_version_id unless @current_version_id.nil?
|
143
|
+
list = self.version_id_list
|
144
|
+
version_id = list.empty? ? 0 : list.last
|
145
|
+
@current_version_id = version_id
|
146
|
+
end
|
147
|
+
|
148
|
+
# @return [StorageObjectVersion] The most recent version in the storage object
|
149
|
+
def current_version
|
150
|
+
self.storage_object_version(current_version_id)
|
151
|
+
end
|
152
|
+
|
153
|
+
# @api internal
|
154
|
+
# @param version_inventory [FileInventory] The inventory of the object version to be ingested
|
155
|
+
# @return [Boolean] Tests whether the new version number is one higher than the current version number
|
156
|
+
def validate_new_inventory(version_inventory)
|
157
|
+
if version_inventory.version_id != (current_version_id + 1)
|
158
|
+
raise "version mismatch - current: #{current_version_id} new: #{version_inventory.version_id}"
|
159
|
+
end
|
160
|
+
true
|
161
|
+
end
|
162
|
+
|
163
|
+
# @api external
|
164
|
+
# @param version_id [Integer] The existing version to return. If nil, return latest version
|
165
|
+
# @return [StorageObjectVersion] The representation of an existing version's storage area
|
166
|
+
def find_object_version(version_id=nil)
|
167
|
+
current = current_version_id
|
168
|
+
case version_id
|
169
|
+
when nil
|
170
|
+
StorageObjectVersion.new(self,current)
|
171
|
+
when 1..current
|
172
|
+
StorageObjectVersion.new(self,version_id)
|
173
|
+
else
|
174
|
+
raise "Version ID #{version_id} does not exist"
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
# @api external
|
179
|
+
# @param version_id [Integer] The version to return. OK if version does not exist
|
180
|
+
# @return [StorageObjectVersion] The representation of a specified version.
|
181
|
+
# * Version 0 is a special case used to generate empty manifests
|
182
|
+
# * Current version + 1 is used for creation of a new version
|
183
|
+
def storage_object_version(version_id)
|
184
|
+
if version_id
|
185
|
+
StorageObjectVersion.new(self,version_id)
|
186
|
+
else
|
187
|
+
raise "Version ID not specified"
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
# @return [VerificationResult] Return result of storage verfication
|
192
|
+
def verify_object_storage
|
193
|
+
result = VerificationResult.new(digital_object_id)
|
194
|
+
self.version_list.each do |version|
|
195
|
+
result.subentities << version.verify_version_storage
|
196
|
+
end
|
197
|
+
result.subentities << current_version.verify_signature_catalog
|
198
|
+
result.verified = result.subentities.all?{|entity| entity.verified}
|
199
|
+
result
|
200
|
+
end
|
201
|
+
|
202
|
+
# @param recovery_path [Pathname, String] The location of the recovered object versions
|
203
|
+
# @return [Boolean] Restore all recovered versions to online storage and verify results
|
204
|
+
def restore_object(recovery_path)
|
205
|
+
timestamp = Time.now
|
206
|
+
recovery_object = StorageObject.new(@digital_object_id, recovery_path, mkpath=false)
|
207
|
+
recovery_object.versions.each do |recovery_version|
|
208
|
+
version_id = recovery_version.version_id
|
209
|
+
storage_version = self.storage_object_version(version_id)
|
210
|
+
# rename/save the original
|
211
|
+
storage_version.deactivate(timestamp)
|
212
|
+
# copy the recovered version into place
|
213
|
+
FileUtils.cp_r(recovery_version.version_pathname.to_s,storage_version.version_pathname.to_s)
|
214
|
+
end
|
215
|
+
self
|
216
|
+
end
|
217
|
+
|
218
|
+
end
|
219
|
+
|
220
|
+
end
|
@@ -0,0 +1,333 @@
|
|
1
|
+
require 'moab'
|
2
|
+
|
3
|
+
module Moab
|
4
|
+
|
5
|
+
# A class to represent a version subdirectory within an object's home directory in preservation storage
|
6
|
+
#
|
7
|
+
# ====Data Model
|
8
|
+
# * {StorageRepository} = represents a digital object repository storage node
|
9
|
+
# * {StorageServices} = supports application layer access to the repository's objects, data, and metadata
|
10
|
+
# * {StorageObject} = represents a digital object's repository storage location and ingest/dissemination methods
|
11
|
+
# * <b>{StorageObjectVersion} [1..*] = represents a version subdirectory within an object's home directory</b>
|
12
|
+
# * {Bagger} [1] = utility for creating bagit packages for ingest or dissemination
|
13
|
+
#
|
14
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
15
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
16
|
+
class StorageObjectVersion
|
17
|
+
|
18
|
+
# @return [Integer] The ordinal version number
|
19
|
+
attr_accessor :version_id
|
20
|
+
|
21
|
+
# @return [String] The "v0001" directory name derived from the version id
|
22
|
+
attr_accessor :version_name
|
23
|
+
|
24
|
+
# @return [Pathname] The location of the version inside the home directory
|
25
|
+
attr_accessor :version_pathname
|
26
|
+
|
27
|
+
# @return [Pathname] The location of the object's home directory
|
28
|
+
attr_accessor :storage_object
|
29
|
+
|
30
|
+
# @return [Hash<FileInventory>] Cached copies of versionInventory, versionAdditions, or manifestInventory
|
31
|
+
attr_accessor :inventory_cache
|
32
|
+
|
33
|
+
# @param storage_object [StorageObject] The object representing the digital object's storage location
|
34
|
+
# @param version_id [Integer,String] The ordinal version number or a string like 'v0003'
|
35
|
+
def initialize(storage_object, version_id)
|
36
|
+
if version_id.is_a?(Integer)
|
37
|
+
@version_id = version_id
|
38
|
+
elsif version_id.is_a?(String) and version_id.match /^v(\d+)$/
|
39
|
+
@version_id = version_id.sub(/^v/,'').to_i
|
40
|
+
else
|
41
|
+
raise "version_id (#{version_id}) is not in a recognized format"
|
42
|
+
end
|
43
|
+
@version_name = StorageObject.version_dirname(@version_id)
|
44
|
+
@version_pathname = storage_object.object_pathname.join(@version_name)
|
45
|
+
@storage_object=storage_object
|
46
|
+
@inventory_cache = Hash.new
|
47
|
+
end
|
48
|
+
|
49
|
+
# @return [String] The unique identifier concatenating digital object id with version id
|
50
|
+
def composite_key
|
51
|
+
@storage_object.digital_object_id + '-' + StorageObject.version_dirname(@version_id)
|
52
|
+
end
|
53
|
+
|
54
|
+
# @return [Boolean] true if the object version directory exists
|
55
|
+
def exist?
|
56
|
+
@version_pathname.exist?
|
57
|
+
end
|
58
|
+
|
59
|
+
# @param [String] file_category The category of file ('content', 'metadata', or 'manifest'))
|
60
|
+
# @param [String] file_id The name of the file (path relative to base directory)
|
61
|
+
# @return [FileSignature] signature of the specified file
|
62
|
+
def find_signature(file_category, file_id)
|
63
|
+
if file_category =~ /manifest/
|
64
|
+
file_inventory('manifests').file_signature('manifests',file_id)
|
65
|
+
else
|
66
|
+
file_inventory('version').file_signature(file_category, file_id)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# @param [String] file_category The category of file ('content', 'metadata', or 'manifest')
|
71
|
+
# @param [String] file_id The name of the file (path relative to base directory)
|
72
|
+
# @return [Pathname] Pathname object containing the full path for the specified file
|
73
|
+
def find_filepath(file_category, file_id)
|
74
|
+
this_version_filepath = file_pathname(file_category, file_id)
|
75
|
+
return this_version_filepath if this_version_filepath.exist?
|
76
|
+
raise FileNotFoundException, "manifest file #{file_id} not found for #{@storage_object.digital_object_id} - #{@version_id}" if file_category == 'manifest'
|
77
|
+
file_signature = file_inventory('version').file_signature(file_category, file_id)
|
78
|
+
catalog_filepath = signature_catalog.catalog_filepath(file_signature)
|
79
|
+
@storage_object.storage_filepath(catalog_filepath)
|
80
|
+
end
|
81
|
+
|
82
|
+
# @param [String] file_category The category of file ('content', 'metadata', or 'manifest')
|
83
|
+
# @param [FileSignature] file_signature The signature of the file
|
84
|
+
# @return [Pathname] Pathname object containing the full path for the specified file
|
85
|
+
def find_filepath_using_signature(file_category, file_signature)
|
86
|
+
catalog_filepath = signature_catalog.catalog_filepath(file_signature)
|
87
|
+
@storage_object.storage_filepath(catalog_filepath)
|
88
|
+
end
|
89
|
+
|
90
|
+
# @param [String] file_category The category of file ('content', 'metadata', or 's')
|
91
|
+
# @param [String] file_id The name of the file (path relative to base directory)
|
92
|
+
# @return [Pathname] Pathname object containing this version's storage path for the specified file
|
93
|
+
def file_pathname(file_category, file_id)
|
94
|
+
file_category_pathname(file_category).join(file_id)
|
95
|
+
end
|
96
|
+
|
97
|
+
# @param [String] file_category The category of file ('content', 'metadata', or 's')
|
98
|
+
# @return [Pathname] Pathname object containing this version's storage home for the specified file category
|
99
|
+
def file_category_pathname(file_category)
|
100
|
+
if file_category =~ /manifest/
|
101
|
+
@version_pathname.join('manifests')
|
102
|
+
else
|
103
|
+
@version_pathname.join('data',file_category)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# @api external
|
108
|
+
# @param type [String] The type of inventory to return (version|additions|manifests)
|
109
|
+
# @return [FileInventory] The file inventory of the specified type for this version
|
110
|
+
# @see FileInventory#read_xml_file
|
111
|
+
def file_inventory(type)
|
112
|
+
if version_id > 0
|
113
|
+
return @inventory_cache[type] if @inventory_cache.has_key?(type)
|
114
|
+
@inventory_cache[type] = FileInventory.read_xml_file(@version_pathname.join('manifests'), type)
|
115
|
+
else
|
116
|
+
groups = ['content','metadata'].collect { |id| FileGroup.new(:group_id=>id)}
|
117
|
+
FileInventory.new(
|
118
|
+
:type=>'version',
|
119
|
+
:digital_object_id => @storage_object.digital_object_id,
|
120
|
+
:version_id => @version_id,
|
121
|
+
:groups => groups
|
122
|
+
)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# @api external
|
127
|
+
# @return [SignatureCatalog] The signature catalog of the digital object as of this version
|
128
|
+
def signature_catalog
|
129
|
+
if version_id > 0
|
130
|
+
SignatureCatalog.read_xml_file(@version_pathname.join('manifests'))
|
131
|
+
else
|
132
|
+
SignatureCatalog.new(:digital_object_id => @storage_object.digital_object_id)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
# @api internal
|
137
|
+
# @param bag_dir [Pathname,String] The location of the bag to be ingested
|
138
|
+
# @return [void] Create the version subdirectory and move files into it
|
139
|
+
def ingest_bag_data(bag_dir)
|
140
|
+
raise "Version already exists: #{@version_pathname.to_s}" if @version_pathname.exist?
|
141
|
+
@version_pathname.join('manifests').mkpath
|
142
|
+
bag_dir=Pathname(bag_dir)
|
143
|
+
ingest_dir(bag_dir.join('data'),@version_pathname.join('data'))
|
144
|
+
ingest_file(bag_dir.join(FileInventory.xml_filename('version')),@version_pathname.join('manifests'))
|
145
|
+
ingest_file(bag_dir.join(FileInventory.xml_filename('additions')),@version_pathname.join('manifests'))
|
146
|
+
end
|
147
|
+
|
148
|
+
# @api internal
|
149
|
+
# @param source_dir [Pathname] The source location of the directory whose contents are to be ingested
|
150
|
+
# @param target_dir [Pathname] The target location of the directory into which files are ingested
|
151
|
+
# @param use_links [Boolean] If true, use hard links; if false, make copies
|
152
|
+
# @return [void] recursively link or copy the source directory contents to the target directory
|
153
|
+
def ingest_dir(source_dir, target_dir, use_links=true)
|
154
|
+
raise "cannot copy - target already exists: #{target_dir.expand_path}" if target_dir.exist?
|
155
|
+
target_dir.mkpath
|
156
|
+
source_dir.children.each do |child|
|
157
|
+
if child.directory?
|
158
|
+
ingest_dir(child, target_dir.join(child.basename), use_links)
|
159
|
+
else
|
160
|
+
ingest_file(child, target_dir, use_links)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
# @api internal
|
166
|
+
# @param source_file [Pathname] The source location of the file to be ingested
|
167
|
+
# @param target_dir [Pathname] The location of the directory in which to place the file
|
168
|
+
# @param use_links [Boolean] If true, use hard links; if false, make copies
|
169
|
+
# @return [void] link or copy the specified file from source location to the version directory
|
170
|
+
def ingest_file(source_file, target_dir, use_links=true)
|
171
|
+
if use_links
|
172
|
+
FileUtils.link(source_file.to_s, target_dir.to_s) #, :force => true)
|
173
|
+
else
|
174
|
+
FileUtils.copy(source_file.to_s, target_dir.to_s)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
# @api internal
|
179
|
+
# @param signature_catalog [SignatureCatalog] The current version's catalog
|
180
|
+
# @param new_inventory [FileInventory] The new version's inventory
|
181
|
+
# @return [void] Updates the catalog to include newly added files, then saves it to disk
|
182
|
+
# @see SignatureCatalog#update
|
183
|
+
def update_catalog(signature_catalog,new_inventory)
|
184
|
+
signature_catalog.update(new_inventory, @version_pathname.join('data'))
|
185
|
+
signature_catalog.write_xml_file(@version_pathname.join('manifests'))
|
186
|
+
end
|
187
|
+
|
188
|
+
# @api internal
|
189
|
+
# @param old_inventory [FileInventory] The old version's inventory
|
190
|
+
# @param new_inventory [FileInventory] The new version's inventory
|
191
|
+
# @return [void] generate a file inventory differences report and save to disk
|
192
|
+
def generate_differences_report(old_inventory,new_inventory)
|
193
|
+
differences = FileInventoryDifference.new.compare(old_inventory, new_inventory)
|
194
|
+
differences.write_xml_file(@version_pathname.join('manifests'))
|
195
|
+
end
|
196
|
+
|
197
|
+
# @api internal
|
198
|
+
# @return [void] examine the version's directory and create/serialize a {FileInventory} containing the manifest files
|
199
|
+
def generate_manifest_inventory
|
200
|
+
manifest_inventory = FileInventory.new(
|
201
|
+
:type=>'manifests',
|
202
|
+
:digital_object_id=>@storage_object.digital_object_id,
|
203
|
+
:version_id=>@version_id)
|
204
|
+
manifest_inventory.groups << FileGroup.new(:group_id=>'manifests').group_from_directory(@version_pathname.join('manifests'), recursive=false)
|
205
|
+
manifest_inventory.write_xml_file(@version_pathname.join('manifests'))
|
206
|
+
end
|
207
|
+
|
208
|
+
# @return [VerificationResult] return result of testing correctness of version manifests
|
209
|
+
def verify_version_storage()
|
210
|
+
result = VerificationResult.new(self.composite_key)
|
211
|
+
result.subentities << self.verify_manifest_inventory
|
212
|
+
result.subentities << self.verify_version_inventory
|
213
|
+
result.subentities << self.verify_version_additions
|
214
|
+
result.verified = result.subentities.all?{|entity| entity.verified}
|
215
|
+
result
|
216
|
+
end
|
217
|
+
|
218
|
+
# @return [Boolean] return true if the manifest inventory matches the actual files
|
219
|
+
def verify_manifest_inventory
|
220
|
+
# read/parse manifestInventory.xml
|
221
|
+
result = VerificationResult.new("manifest_inventory")
|
222
|
+
manifest_inventory = self.file_inventory('manifests')
|
223
|
+
result.subentities << VerificationResult.verify_value('composite_key',self.composite_key,manifest_inventory.composite_key)
|
224
|
+
result.subentities << VerificationResult.verify_truth('manifests_group', ! manifest_inventory.group_empty?('manifests'))
|
225
|
+
# measure the manifest signatures of the files in the directory (excluding manifestInventory.xml)
|
226
|
+
directory_inventory = FileInventory.new.inventory_from_directory(@version_pathname.join('manifests'),'manifests')
|
227
|
+
directory_inventory.digital_object_id = storage_object.digital_object_id
|
228
|
+
directory_group = directory_inventory.group('manifests')
|
229
|
+
directory_group.remove_file_having_path("manifestInventory.xml")
|
230
|
+
# compare the measured signatures against the values in manifestInventory.xml
|
231
|
+
diff = FileInventoryDifference.new
|
232
|
+
diff.compare(manifest_inventory,directory_inventory)
|
233
|
+
compare_result = VerificationResult.new('file_differences')
|
234
|
+
compare_result.verified = (diff.difference_count == 0)
|
235
|
+
compare_result.details = diff.differences_detail
|
236
|
+
result.subentities << compare_result
|
237
|
+
result.verified = result.subentities.all?{|entity| entity.verified}
|
238
|
+
result
|
239
|
+
end
|
240
|
+
|
241
|
+
def verify_signature_catalog
|
242
|
+
result = VerificationResult.new("signature_catalog")
|
243
|
+
signature_catalog =self.signature_catalog
|
244
|
+
result.subentities << VerificationResult.verify_value('signature_key',self.composite_key,signature_catalog.composite_key)
|
245
|
+
found = 0
|
246
|
+
missing = Array.new
|
247
|
+
object_pathname = self.storage_object.object_pathname
|
248
|
+
signature_catalog.entries.each do |catalog_entry|
|
249
|
+
storage_location = object_pathname.join(catalog_entry.storage_path)
|
250
|
+
if storage_location.exist?
|
251
|
+
found += 1
|
252
|
+
else
|
253
|
+
missing << storage_location.to_s
|
254
|
+
end
|
255
|
+
end
|
256
|
+
file_result = VerificationResult.new("storage_location")
|
257
|
+
file_result.verified = (found == signature_catalog.file_count)
|
258
|
+
file_result.details = {
|
259
|
+
'expected' => signature_catalog.file_count,
|
260
|
+
'found' => found
|
261
|
+
}
|
262
|
+
file_result.details['missing'] = missing unless missing.empty?
|
263
|
+
result.subentities << file_result
|
264
|
+
result.verified = result.subentities.all?{|entity| entity.verified}
|
265
|
+
result
|
266
|
+
end
|
267
|
+
|
268
|
+
# @return [Boolean] true if files & signatures listed in version inventory can all be found
|
269
|
+
def verify_version_inventory
|
270
|
+
result = VerificationResult.new("version_inventory")
|
271
|
+
version_inventory = self.file_inventory('version')
|
272
|
+
result.subentities << VerificationResult.verify_value('inventory_key',self.composite_key,version_inventory.composite_key)
|
273
|
+
signature_catalog =self.signature_catalog
|
274
|
+
result.subentities << VerificationResult.verify_value('signature_key',self.composite_key,signature_catalog.composite_key)
|
275
|
+
found = 0
|
276
|
+
missing = Array.new
|
277
|
+
version_inventory.groups.each do |group|
|
278
|
+
group.files.each do |file|
|
279
|
+
file.instances.each do |instance|
|
280
|
+
relative_path = File.join(group.group_id, instance.path)
|
281
|
+
catalog_entry = signature_catalog.signature_hash[file.signature]
|
282
|
+
if ! catalog_entry.nil?
|
283
|
+
found += 1
|
284
|
+
else
|
285
|
+
missing << relative_path.to_s
|
286
|
+
end
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
file_result = VerificationResult.new("catalog_entry")
|
291
|
+
file_result.verified = (found == version_inventory.file_count)
|
292
|
+
file_result.details = {
|
293
|
+
'expected' => version_inventory.file_count,
|
294
|
+
'found' => found
|
295
|
+
}
|
296
|
+
file_result.details['missing'] = missing unless missing.empty?
|
297
|
+
|
298
|
+
result.subentities << file_result
|
299
|
+
result.verified = result.subentities.all?{|entity| entity.verified}
|
300
|
+
result
|
301
|
+
end
|
302
|
+
|
303
|
+
# @return [Boolean] returns true if files in data folder match files listed in version addtions inventory
|
304
|
+
def verify_version_additions
|
305
|
+
result = VerificationResult.new("version_additions")
|
306
|
+
version_additions = self.file_inventory('additions')
|
307
|
+
result.subentities << VerificationResult.verify_value('composite_key',self.composite_key,version_additions.composite_key)
|
308
|
+
data_directory = @version_pathname.join('data')
|
309
|
+
directory_inventory = FileInventory.new(:type=>'directory').inventory_from_directory(data_directory)
|
310
|
+
diff = FileInventoryDifference.new
|
311
|
+
diff.compare(version_additions, directory_inventory)
|
312
|
+
compare_result = VerificationResult.new('file_differences')
|
313
|
+
compare_result.verified = (diff.difference_count == 0)
|
314
|
+
compare_result.details = diff.differences_detail
|
315
|
+
result.subentities << compare_result
|
316
|
+
result.verified = result.subentities.all?{|entity| entity.verified}
|
317
|
+
result
|
318
|
+
end
|
319
|
+
|
320
|
+
# @param timestamp [Time] The time at which the deactivation was initiated. Used to name the inactive directory
|
321
|
+
# @return [null] Deactivate this object version by moving it to another directory. (Used by restore operation)
|
322
|
+
def deactivate(timestamp)
|
323
|
+
if @version_pathname.exist?
|
324
|
+
timestamp_pathname = @version_pathname.parent.join(timestamp.utc.iso8601.gsub(/[-:]/,''))
|
325
|
+
timestamp_pathname.mkpath
|
326
|
+
demote_pathame = timestamp_pathname.join(@version_pathname.basename)
|
327
|
+
@version_pathname.rename(demote_pathame)
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
end
|
332
|
+
|
333
|
+
end
|