moab-versioning 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/moab.rb +59 -0
- data/lib/moab/bagger.rb +289 -0
- data/lib/moab/config.rb +21 -0
- data/lib/moab/exceptions.rb +18 -0
- data/lib/moab/file_group.rb +244 -0
- data/lib/moab/file_group_difference.rb +336 -0
- data/lib/moab/file_group_difference_subset.rb +45 -0
- data/lib/moab/file_instance.rb +82 -0
- data/lib/moab/file_instance_difference.rb +54 -0
- data/lib/moab/file_inventory.rb +279 -0
- data/lib/moab/file_inventory_difference.rb +132 -0
- data/lib/moab/file_manifestation.rb +85 -0
- data/lib/moab/file_signature.rb +200 -0
- data/lib/moab/signature_catalog.rb +195 -0
- data/lib/moab/signature_catalog_entry.rb +61 -0
- data/lib/moab/storage_object.rb +220 -0
- data/lib/moab/storage_object_version.rb +333 -0
- data/lib/moab/storage_repository.rb +57 -0
- data/lib/moab/storage_services.rb +104 -0
- data/lib/moab/verification_result.rb +83 -0
- data/lib/moab/version_metadata.rb +38 -0
- data/lib/moab/version_metadata_entry.rb +64 -0
- data/lib/moab/version_metadata_event.rb +47 -0
- data/lib/moab_stanford.rb +18 -0
- data/lib/monkey_patches.rb +65 -0
- data/lib/serializer.rb +36 -0
- data/lib/serializer/manifest.rb +76 -0
- data/lib/serializer/serializable.rb +178 -0
- data/lib/stanford/active_fedora_object.rb +34 -0
- data/lib/stanford/content_inventory.rb +236 -0
- data/lib/stanford/dor_metadata.rb +49 -0
- data/lib/stanford/storage_repository.rb +46 -0
- data/lib/stanford/storage_services.rb +66 -0
- data/lib/tasks/yard.rake +34 -0
- data/lib/tools/api_doc_generator.rb +396 -0
- data/lib/tools/spec_generator.rb +410 -0
- data/lib/tools/spec_generator_old.rb +49 -0
- metadata +252 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: fe7dbe157c6bc9418f8b336b65431a1266ca4375
|
4
|
+
data.tar.gz: 5328ef34ef060962cf9598001c6d146cde7afdc3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 79893b2a0f5b5d5791cee7ef9d26213d2f6876ea32f71a258d6d3030982fa8a6824f5285de20e343775ca1dc4625cbc16d32c793182116f830c7a4065dd5c5ce
|
7
|
+
data.tar.gz: e73bae98354c6001887a6a1fa24589919e696596c74ffdaac0b3132b123c761fea0850e5000d3795443af49b696a3e4cbc92567d4ccb7fa5020f1cbfa7aeb191
|
data/lib/moab.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# Moab is a module that provides a distintive namespace for the collection of classes it contains.
|
2
|
+
#
|
3
|
+
# ====Data Model
|
4
|
+
#
|
5
|
+
# * <b>{FileInventory} = container for recording information about a collection of related files</b>
|
6
|
+
# * {FileGroup} [1..*] = subset allow segregation of content and metadata files
|
7
|
+
# * {FileManifestation} [1..*] = snapshot of a file's filesystem characteristics
|
8
|
+
# * {FileSignature} [1] = file fixity information
|
9
|
+
# * {FileInstance} [1..*] = filepath and timestamp of any physical file having that signature
|
10
|
+
#
|
11
|
+
# * <b>{SignatureCatalog} = lookup table containing a cumulative collection of all files ever ingested</b>
|
12
|
+
# * {SignatureCatalogEntry} [1..*] = an row in the lookup table containing storage information about a single file
|
13
|
+
# * {FileSignature} [1] = file fixity information
|
14
|
+
#
|
15
|
+
# * <b>{FileInventoryDifference} = compares two {FileInventory} instances based on file signatures and pathnames</b>
|
16
|
+
# * {FileGroupDifference} [1..*] = performs analysis and reports differences between two matching {FileGroup} objects
|
17
|
+
# * {FileGroupDifferenceSubset} [1..5] = collects a set of file-level differences of a give change type
|
18
|
+
# * {FileInstanceDifference} [1..*] = contains difference information at the file level
|
19
|
+
# * {FileSignature} [1..2] = contains the file signature(s) of two file instances being compared
|
20
|
+
#
|
21
|
+
# * <b>{VersionMetadata} = descriptive information about a digital object's versions</b>
|
22
|
+
# * {VersionMetadataEntry} [1..*] = attributes of a digital object version
|
23
|
+
# * {VersionMetadataEvent} [1..*] = object version lifecycle events with timestamps
|
24
|
+
#
|
25
|
+
# * <b>{StorageObject} = represents a digital object's repository storage location and ingest/dissemination methods</b>
|
26
|
+
# * {StorageObjectVersion} [1..*] = represents a version subdirectory within an object's home directory
|
27
|
+
# * {Bagger} [1] = utility for creating bagit packages for ingest or dissemination
|
28
|
+
#
|
29
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
30
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
31
|
+
module Moab
|
32
|
+
end
|
33
|
+
|
34
|
+
require 'serializer'
|
35
|
+
include Serializer
|
36
|
+
require 'confstruct/configuration'
|
37
|
+
require 'moab/config'
|
38
|
+
require 'moab/file_signature'
|
39
|
+
require 'moab/file_instance'
|
40
|
+
require 'moab/file_manifestation'
|
41
|
+
require 'moab/file_group'
|
42
|
+
require 'moab/file_inventory'
|
43
|
+
require 'moab/signature_catalog_entry'
|
44
|
+
require 'moab/signature_catalog'
|
45
|
+
require 'moab/file_instance_difference'
|
46
|
+
require 'moab/file_group_difference_subset'
|
47
|
+
require 'moab/file_group_difference'
|
48
|
+
require 'moab/file_inventory_difference'
|
49
|
+
require 'moab/version_metadata_event'
|
50
|
+
require 'moab/version_metadata_entry'
|
51
|
+
require 'moab/version_metadata'
|
52
|
+
require 'moab/bagger'
|
53
|
+
require 'moab/storage_object'
|
54
|
+
require 'moab/storage_object_version'
|
55
|
+
require 'moab/storage_repository'
|
56
|
+
require 'moab/storage_services'
|
57
|
+
require 'moab/exceptions'
|
58
|
+
require 'moab/verification_result'
|
59
|
+
|
data/lib/moab/bagger.rb
ADDED
@@ -0,0 +1,289 @@
|
|
1
|
+
require 'moab'
|
2
|
+
require 'systemu'
|
3
|
+
|
4
|
+
module Moab
|
5
|
+
|
6
|
+
# A class used to create a BagIt package from a version inventory and a set of source files.
|
7
|
+
# The {#fill_bag} method is called with a package_mode parameter that specifies
|
8
|
+
# whether the bag is being created for deposit into the repository or is to contain the output of a version reconstruction.
|
9
|
+
# * In <b>:depositor</b> mode, the version inventory is filtered using the digital object's signature catalog so that only new files are included
|
10
|
+
# * In <b>:reconstructor</b> mode, the version inventory and signature catalog are used together to regenerate the complete set of files for the version.
|
11
|
+
#
|
12
|
+
# ====Data Model
|
13
|
+
# * {StorageRepository} = represents a digital object repository storage node
|
14
|
+
# * {StorageServices} = supports application layer access to the repository's objects, data, and metadata
|
15
|
+
# * {StorageObject} = represents a digital object's repository storage location and ingest/dissemination methods
|
16
|
+
# * {StorageObjectVersion} [1..*] = represents a version subdirectory within an object's home directory
|
17
|
+
# * <b>{Bagger} [1] = utility for creating bagit packages for ingest or dissemination</b>
|
18
|
+
#
|
19
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
20
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
21
|
+
class Bagger
|
22
|
+
|
23
|
+
# @param version_inventory [FileInventory] The complete inventory of the files comprising a digital object version
|
24
|
+
# @param signature_catalog [SignatureCatalog] The signature catalog, used to specify source paths (in :reconstructor mode),
|
25
|
+
# or to filter the version inventory (in :depositor mode)
|
26
|
+
# @param bag_pathname [Pathname,String] The location of the Bagit bag to be created
|
27
|
+
def initialize(version_inventory, signature_catalog, bag_pathname)
|
28
|
+
@version_inventory = version_inventory
|
29
|
+
@signature_catalog = signature_catalog
|
30
|
+
@bag_pathname = Pathname.new(bag_pathname)
|
31
|
+
create_bagit_txt()
|
32
|
+
end
|
33
|
+
|
34
|
+
# @return [FileInventory] The complete inventory of the files comprising a digital object version
|
35
|
+
attr_accessor :version_inventory
|
36
|
+
|
37
|
+
# @return [SignatureCatalog] The signature catalog, used to specify source paths (in :reconstructor mode),
|
38
|
+
# or to filter the version inventory (in :depositor mode)
|
39
|
+
attr_accessor :signature_catalog
|
40
|
+
|
41
|
+
# @return [Pathname] The location of the Bagit bag to be created
|
42
|
+
attr_accessor :bag_pathname
|
43
|
+
|
44
|
+
# @return [FileInventory] The actual inventory of the files to be packaged (derived from @version_inventory in {#fill_bag})
|
45
|
+
attr_accessor :bag_inventory
|
46
|
+
|
47
|
+
# @return [Symbol] The operational mode controlling what gets bagged {#fill_bag}
|
48
|
+
# and the full path of source files {#fill_payload}
|
49
|
+
attr_accessor :package_mode
|
50
|
+
|
51
|
+
# @return [void] Delete any existing bag data and re-initialize the bag directory
|
52
|
+
def reset_bag
|
53
|
+
delete_bag
|
54
|
+
delete_tarfile
|
55
|
+
create_bagit_txt
|
56
|
+
end
|
57
|
+
|
58
|
+
# @api internal
|
59
|
+
# @return [void] Generate the bagit.txt tag file
|
60
|
+
def create_bagit_txt()
|
61
|
+
@bag_pathname.mkpath
|
62
|
+
@bag_pathname.join("bagit.txt").open('w') do |f|
|
63
|
+
f.puts "Tag-File-Character-Encoding: UTF-8"
|
64
|
+
f.puts "BagIt-Version: 0.97"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# @return [NilClass] Delete the bagit files
|
69
|
+
def delete_bag()
|
70
|
+
# make sure this looks like a bag before deleting
|
71
|
+
if @bag_pathname.join('bagit.txt').exist?
|
72
|
+
if @bag_pathname.join('data').exist?
|
73
|
+
@bag_pathname.rmtree
|
74
|
+
else
|
75
|
+
@bag_pathname.children.each {|file| file.delete}
|
76
|
+
@bag_pathname.rmdir
|
77
|
+
end
|
78
|
+
end
|
79
|
+
nil
|
80
|
+
end
|
81
|
+
|
82
|
+
# @param tar_pathname [Pathname] The location of the tar file (default is based on bag location)
|
83
|
+
def delete_tarfile()
|
84
|
+
bag_name = @bag_pathname.basename
|
85
|
+
bag_parent = @bag_pathname.parent
|
86
|
+
tar_pathname = bag_parent.join("#{bag_name}.tar")
|
87
|
+
tar_pathname.delete if tar_pathname.exist?
|
88
|
+
end
|
89
|
+
|
90
|
+
# @api external
|
91
|
+
# @param package_mode [Symbol] The operational mode controlling what gets bagged and the full path of source files (Bagger#fill_payload)
|
92
|
+
# @param source_base_pathname [Pathname] The home location of the source files
|
93
|
+
# @return [Bagger] Perform all the operations required to fill the bag payload, write the manifests and tagfiles, and checksum the tagfiles
|
94
|
+
# @example {include:file:spec/features/storage/deposit_spec.rb}
|
95
|
+
def fill_bag(package_mode, source_base_pathname)
|
96
|
+
create_bag_inventory(package_mode)
|
97
|
+
fill_payload(source_base_pathname)
|
98
|
+
create_tagfiles
|
99
|
+
self
|
100
|
+
end
|
101
|
+
|
102
|
+
# @api external
|
103
|
+
# @param package_mode [Symbol] The operational mode controlling what gets bagged and the full path of source files (Bagger#fill_payload)
|
104
|
+
# @return [FileInventory] Create, write, and return the inventory of the files that will become the payload
|
105
|
+
def create_bag_inventory(package_mode)
|
106
|
+
@package_mode = package_mode
|
107
|
+
@bag_pathname.mkpath
|
108
|
+
case package_mode
|
109
|
+
when :depositor
|
110
|
+
@version_inventory.write_xml_file(@bag_pathname, 'version')
|
111
|
+
@bag_inventory = @signature_catalog.version_additions(@version_inventory)
|
112
|
+
@bag_inventory.write_xml_file(@bag_pathname, 'additions')
|
113
|
+
when :reconstructor
|
114
|
+
@bag_inventory = @version_inventory
|
115
|
+
@bag_inventory.write_xml_file(@bag_pathname, 'version')
|
116
|
+
end
|
117
|
+
@bag_inventory
|
118
|
+
end
|
119
|
+
|
120
|
+
# @api internal
|
121
|
+
# @param source_base_pathname [Pathname] The home location of the source files
|
122
|
+
# @return [void] Fill in the bag's data folder with copies of all files to be packaged for delivery.
|
123
|
+
# This method uses Unix hard links in order to greatly speed up the process.
|
124
|
+
# Hard links, however, require that the target bag must be created within the same filesystem as the source files
|
125
|
+
def fill_payload(source_base_pathname)
|
126
|
+
@bag_inventory.groups.each do |group|
|
127
|
+
group_id = group.group_id
|
128
|
+
case @package_mode
|
129
|
+
when :depositor
|
130
|
+
deposit_group(group_id, source_base_pathname.join(group_id))
|
131
|
+
when :reconstructor
|
132
|
+
reconstuct_group(group_id, source_base_pathname)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
# @param group_id [String] The name of the data group being copied to the bag
|
138
|
+
# @param source_dir [Pathname] The location from which files should be copied
|
139
|
+
# @return [Boolean] Copy all the files listed in the group inventory to the bag.
|
140
|
+
# Return true if successful or nil if the group was not found in the inventory
|
141
|
+
def deposit_group(group_id, source_dir)
|
142
|
+
group = @bag_inventory.group(group_id)
|
143
|
+
return nil? if group.nil? or group.files.empty?
|
144
|
+
target_dir = @bag_pathname.join('data',group_id)
|
145
|
+
group.path_list.each do |relative_path|
|
146
|
+
source = source_dir.join(relative_path)
|
147
|
+
target = target_dir.join(relative_path)
|
148
|
+
target.parent.mkpath
|
149
|
+
FileUtils.symlink source, target
|
150
|
+
end
|
151
|
+
true
|
152
|
+
end
|
153
|
+
|
154
|
+
# @param group_id [String] The name of the data group being copied to the bag
|
155
|
+
# @param storage_object_dir [Pathname] The home location of the object store from which files should be copied
|
156
|
+
# @return [Boolean] Copy all the files listed in the group inventory to the bag.
|
157
|
+
# Return true if successful or nil if the group was not found in the inventory
|
158
|
+
def reconstuct_group(group_id, storage_object_dir)
|
159
|
+
group = @bag_inventory.group(group_id)
|
160
|
+
return nil? if group.nil? or group.files.empty?
|
161
|
+
target_dir = @bag_pathname.join('data',group_id)
|
162
|
+
group.files.each do |file|
|
163
|
+
catalog_entry = @signature_catalog.signature_hash[file.signature]
|
164
|
+
source = storage_object_dir.join(catalog_entry.storage_path)
|
165
|
+
file.instances.each do |instance|
|
166
|
+
target = target_dir.join(instance.path)
|
167
|
+
target.parent.mkpath
|
168
|
+
FileUtils.symlink source, target
|
169
|
+
end
|
170
|
+
end
|
171
|
+
true
|
172
|
+
end
|
173
|
+
|
174
|
+
# @return [Boolean] create BagIt manifests and tag files. Return true if successful
|
175
|
+
def create_tagfiles
|
176
|
+
create_payload_manifests
|
177
|
+
create_bag_info_txt
|
178
|
+
create_bagit_txt
|
179
|
+
create_tagfile_manifests
|
180
|
+
true
|
181
|
+
end
|
182
|
+
|
183
|
+
# @api internal
|
184
|
+
# @return [void] Using the checksum information from the inventory, create BagIt manifest files for the payload
|
185
|
+
def create_payload_manifests
|
186
|
+
manifest_pathname = Hash.new
|
187
|
+
manifest_file = Hash.new
|
188
|
+
manifest_types = [:md5, :sha1, :sha256]
|
189
|
+
manifest_types.each do |type|
|
190
|
+
manifest_pathname[type] = @bag_pathname.join("manifest-#{type.to_s}.txt")
|
191
|
+
manifest_file[type] = manifest_pathname[type].open('w')
|
192
|
+
end
|
193
|
+
@bag_inventory.groups.each do |group|
|
194
|
+
group.files.each do |file|
|
195
|
+
fixity = file.signature.fixity
|
196
|
+
file.instances.each do |instance|
|
197
|
+
data_path = File.join('data', group.group_id, instance.path)
|
198
|
+
manifest_types.each do |type|
|
199
|
+
manifest_file[type].puts("#{fixity[type]} #{data_path}") if fixity[type]
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
ensure
|
205
|
+
manifest_types.each do |type|
|
206
|
+
if manifest_file[type]
|
207
|
+
manifest_file[type].close
|
208
|
+
manifest_pathname[type].delete if
|
209
|
+
manifest_pathname[type].exist? and manifest_pathname[type].size == 0
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
# @api internal
|
215
|
+
# @return [void] Generate the bag-info.txt tag file
|
216
|
+
def create_bag_info_txt
|
217
|
+
@bag_pathname.join("bag-info.txt").open('w') do |f|
|
218
|
+
f.puts "External-Identifier: #{@bag_inventory.package_id}"
|
219
|
+
f.puts "Payload-Oxum: #{@bag_inventory.byte_count}.#{@bag_inventory.file_count}"
|
220
|
+
f.puts "Bag-Size: #{@bag_inventory.human_size}"
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
# @api internal
|
225
|
+
# @return [void] create BagIt tag manifest files containing checksums for all files in the bag's root directory
|
226
|
+
def create_tagfile_manifests()
|
227
|
+
manifest_pathname = Hash.new
|
228
|
+
manifest_file = Hash.new
|
229
|
+
manifest_types = [:md5, :sha1, :sha256]
|
230
|
+
manifest_types.each do |type|
|
231
|
+
manifest_pathname[type] = @bag_pathname.join("tagmanifest-#{type.to_s}.txt")
|
232
|
+
manifest_file[type] = manifest_pathname[type].open('w')
|
233
|
+
end
|
234
|
+
@bag_pathname.children.each do |file|
|
235
|
+
unless file.directory? || file.basename.to_s[0, 11] == 'tagmanifest'
|
236
|
+
signature = FileSignature.new.signature_from_file(file)
|
237
|
+
fixity = signature.fixity
|
238
|
+
manifest_types.each do |type|
|
239
|
+
manifest_file[type].puts("#{fixity[type]} #{file.basename}") if fixity[type]
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
ensure
|
244
|
+
manifest_types.each do |type|
|
245
|
+
if manifest_file[type]
|
246
|
+
manifest_file[type].close
|
247
|
+
manifest_pathname[type].delete if
|
248
|
+
manifest_pathname[type].exist? and manifest_pathname[type].size == 0
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
# @return [Boolean] Create a tar file containing the bag
|
254
|
+
def create_tarfile(tar_pathname=nil)
|
255
|
+
bag_name = @bag_pathname.basename
|
256
|
+
bag_parent = @bag_pathname.parent
|
257
|
+
tar_pathname ||= bag_parent.join("#{bag_name}.tar")
|
258
|
+
tar_cmd="cd '#{bag_parent}'; tar --dereference --force-local -cf '#{tar_pathname}' '#{bag_name}'"
|
259
|
+
begin
|
260
|
+
shell_execute(tar_cmd)
|
261
|
+
rescue
|
262
|
+
shell_execute(tar_cmd.sub('--force-local',''))
|
263
|
+
end
|
264
|
+
raise "Unable to create tarfile #{tar_pathname}" unless tar_pathname.exist?
|
265
|
+
return true
|
266
|
+
|
267
|
+
end
|
268
|
+
|
269
|
+
# Executes a system command in a subprocess.
|
270
|
+
# The method will return stdout from the command if execution was successful.
|
271
|
+
# The method will raise an exception if if execution fails.
|
272
|
+
# The exception's message will contain the explaination of the failure.
|
273
|
+
# @param [String] command the command to be executed
|
274
|
+
# @return [String] stdout from the command if execution was successful
|
275
|
+
def shell_execute(command)
|
276
|
+
status, stdout, stderr = systemu(command)
|
277
|
+
if (status.exitstatus != 0)
|
278
|
+
raise stderr
|
279
|
+
end
|
280
|
+
return stdout
|
281
|
+
rescue
|
282
|
+
msg = "Command failed to execute: [#{command}] caused by <STDERR = #{stderr.split($/).join('; ')}>"
|
283
|
+
msg << " STDOUT = #{stdout.split($/).join('; ')}" if (stdout && (stdout.length > 0))
|
284
|
+
raise msg
|
285
|
+
end
|
286
|
+
|
287
|
+
end
|
288
|
+
|
289
|
+
end
|
data/lib/moab/config.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'moab'
|
2
|
+
|
3
|
+
module Moab
|
4
|
+
|
5
|
+
#class Configuration < Confstruct::Configuration
|
6
|
+
#
|
7
|
+
# def configure(*args, &block)
|
8
|
+
# super(*args, &block)
|
9
|
+
#
|
10
|
+
# # Whatever you want to do after configuration
|
11
|
+
# # Something.initialize(self.repository_home)
|
12
|
+
# end
|
13
|
+
#end
|
14
|
+
|
15
|
+
# @return [Confstruct::Configuration] the configuration data
|
16
|
+
Config = Confstruct::Configuration.new do
|
17
|
+
repository_home nil
|
18
|
+
path_method :druid_tree
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Moab
|
2
|
+
class ObjectNotFoundException < RuntimeError
|
3
|
+
|
4
|
+
end
|
5
|
+
|
6
|
+
class FileNotFoundException < RuntimeError
|
7
|
+
|
8
|
+
end
|
9
|
+
|
10
|
+
class InvalidMetadataException < RuntimeError
|
11
|
+
|
12
|
+
end
|
13
|
+
|
14
|
+
class ValidationException < RuntimeError
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -0,0 +1,244 @@
|
|
1
|
+
require 'moab'
|
2
|
+
|
3
|
+
module Moab
|
4
|
+
|
5
|
+
# A container for a standard subset of a digital objects {FileManifestation} objects
|
6
|
+
# Used to segregate depositor content from repository metadata files
|
7
|
+
# This is a child element of {FileInventory}, which contains a full example
|
8
|
+
#
|
9
|
+
# ====Data Model
|
10
|
+
# * {FileInventory} = container for recording information about a collection of related files
|
11
|
+
# * <b>{FileGroup} [1..*] = subset allow segregation of content and metadata files</b>
|
12
|
+
# * {FileManifestation} [1..*] = snapshot of a file's filesystem characteristics
|
13
|
+
# * {FileSignature} [1] = file fixity information
|
14
|
+
# * {FileInstance} [1..*] = filepath and timestamp of any physical file having that signature
|
15
|
+
#
|
16
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
17
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
18
|
+
class FileGroup < Serializable
|
19
|
+
|
20
|
+
include HappyMapper
|
21
|
+
|
22
|
+
# The name of the XML element used to serialize this objects data
|
23
|
+
tag 'fileGroup'
|
24
|
+
|
25
|
+
# (see Serializable#initialize)
|
26
|
+
def initialize(opts={})
|
27
|
+
@signature_hash = OrderedHash.new
|
28
|
+
@data_source = ""
|
29
|
+
super(opts)
|
30
|
+
end
|
31
|
+
|
32
|
+
# @attribute
|
33
|
+
# @return [String] The name of the file group
|
34
|
+
attribute :group_id, String, :tag => 'groupId', :key => true
|
35
|
+
|
36
|
+
# @attribute
|
37
|
+
# @return [String] The directory location or other source of this groups file data
|
38
|
+
attribute :data_source, String, :tag => 'dataSource'
|
39
|
+
|
40
|
+
# @attribute
|
41
|
+
# @return [Integer] The total number of data files (dynamically calculated)
|
42
|
+
attribute :file_count, Integer, :tag => 'fileCount', :on_save => Proc.new {|i| i.to_s}
|
43
|
+
|
44
|
+
def file_count
|
45
|
+
files.inject(0) { |sum, manifestation| sum + manifestation.file_count }
|
46
|
+
end
|
47
|
+
|
48
|
+
# @attribute
|
49
|
+
# @return [Integer] The total size (in bytes) of all data files (dynamically calculated)
|
50
|
+
attribute :byte_count, Integer, :tag => 'byteCount', :on_save => Proc.new {|i| i.to_s}
|
51
|
+
|
52
|
+
def byte_count
|
53
|
+
files.inject(0) { |sum, manifestation| sum + manifestation.byte_count }
|
54
|
+
end
|
55
|
+
|
56
|
+
# @attribute
|
57
|
+
# @return [Integer] The total disk usage (in 1 kB blocks) of all data files (estimating du -k result) (dynamically calculated)
|
58
|
+
attribute :block_count, Integer, :tag => 'blockCount', :on_save => Proc.new {|i| i.to_s}
|
59
|
+
|
60
|
+
def block_count
|
61
|
+
files.inject(0) { |sum, manifestation| sum + manifestation.block_count }
|
62
|
+
end
|
63
|
+
|
64
|
+
# @return [Array<String>] The data fields to include in summary reports
|
65
|
+
def summary_fields
|
66
|
+
%w{group_id file_count byte_count block_count}
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
# @attribute
|
71
|
+
# @return [Array<FileManifestation>] The set of files comprising the group
|
72
|
+
has_many :files, FileManifestation, :tag => 'file'
|
73
|
+
|
74
|
+
def files
|
75
|
+
@signature_hash.values
|
76
|
+
end
|
77
|
+
|
78
|
+
# @return [OrderedHash<FileSignature, FileManifestation>] The actual in-memory store for the collection
|
79
|
+
# of {FileManifestation} objects that are contained in this file group.
|
80
|
+
attr_accessor :signature_hash
|
81
|
+
|
82
|
+
# @api internal
|
83
|
+
# @return [OrderedHash<String,FileSignature>] An index of file paths,
|
84
|
+
# used to test for existence of a filename in this file group
|
85
|
+
def path_hash
|
86
|
+
path_hash = OrderedHash.new
|
87
|
+
@signature_hash.each do |signature,manifestation|
|
88
|
+
manifestation.instances.each do |instance|
|
89
|
+
path_hash[instance.path] = signature
|
90
|
+
end
|
91
|
+
end
|
92
|
+
path_hash
|
93
|
+
end
|
94
|
+
|
95
|
+
# @return [Array<String>] The list of file paths in this group
|
96
|
+
def path_list
|
97
|
+
files.collect{|file| file.instances.collect{|instance| instance.path}}.flatten
|
98
|
+
end
|
99
|
+
|
100
|
+
# @api internal
|
101
|
+
# @param signature_subset [Array<FileSignature>] The signatures used to select the entries to return
|
102
|
+
# @return [OrderedHash<String,FileSignature>] A pathname,signature hash containing a subset of the filenames in this file group
|
103
|
+
def path_hash_subset(signature_subset)
|
104
|
+
path_hash = OrderedHash.new
|
105
|
+
signature_subset.each do |signature|
|
106
|
+
manifestation = @signature_hash[signature]
|
107
|
+
manifestation.instances.each do |instance|
|
108
|
+
path_hash[instance.path] = signature
|
109
|
+
end
|
110
|
+
end
|
111
|
+
path_hash
|
112
|
+
end
|
113
|
+
|
114
|
+
# @param manifestiation_array [Array<FileManifestation>] The collection of {FileManifestation} objects
|
115
|
+
# that are to be added to this file group. Used by HappyMapper when deserializing a {FileInventory} file
|
116
|
+
# Add the array of {FileManifestation} objects to this file group.
|
117
|
+
def files=(manifestiation_array)
|
118
|
+
manifestiation_array.each do |manifestiation|
|
119
|
+
add_file(manifestiation)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# @api internal
|
124
|
+
# @param manifestation [FileManifestation] The file manifestation to be added
|
125
|
+
# @return [void] Add a single {FileManifestation} object to this group
|
126
|
+
def add_file(manifestation)
|
127
|
+
manifestation.instances.each do |instance|
|
128
|
+
add_file_instance(manifestation.signature, instance)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# @api internal
|
133
|
+
# @param signature [FileSignature] The signature of the file instance to be added
|
134
|
+
# @param instance [FileInstance] The pathname and datetime of the file instance to be added
|
135
|
+
# @return [void] Add a single {FileSignature},{FileInstance} key/value pair to this group.
|
136
|
+
# Data is actually stored in the {#signature_hash}
|
137
|
+
def add_file_instance(signature,instance)
|
138
|
+
if @signature_hash.has_key?(signature)
|
139
|
+
manifestation = @signature_hash[signature]
|
140
|
+
else
|
141
|
+
manifestation = FileManifestation.new
|
142
|
+
manifestation.signature = signature
|
143
|
+
@signature_hash[signature] = manifestation
|
144
|
+
end
|
145
|
+
manifestation.instances << instance
|
146
|
+
end
|
147
|
+
|
148
|
+
# @param path [String] The path of the file to be removed
|
149
|
+
# @return [void] Remove a file from the inventory
|
150
|
+
# for example, the manifest inventory does not contain a file entry for itself
|
151
|
+
def remove_file_having_path(path)
|
152
|
+
signature = self.path_hash[path]
|
153
|
+
@signature_hash.delete(signature)
|
154
|
+
end
|
155
|
+
|
156
|
+
# @return [Pathname] The full path used as the basis of the relative paths reported
|
157
|
+
# in {FileInstance} objects that are children of the {FileManifestation} objects contained in this file group
|
158
|
+
attr_accessor :base_directory
|
159
|
+
|
160
|
+
def base_directory=(basepath)
|
161
|
+
@base_directory = Pathname.new(basepath).expand_path
|
162
|
+
end
|
163
|
+
|
164
|
+
# @api internal
|
165
|
+
# @param pathname [Pathname] The file path to be tested
|
166
|
+
# @return [Boolean] Test whether the given path is contained within the {#base_directory}
|
167
|
+
def is_descendent_of_base?(pathname)
|
168
|
+
raise("base_directory has not been set") if @base_directory.nil?
|
169
|
+
is_descendent = false
|
170
|
+
pathname.expand_path.ascend {|ancestor| is_descendent ||= (ancestor == @base_directory)}
|
171
|
+
raise("#{pathname} is not a descendent of #{@base_directory}") unless is_descendent
|
172
|
+
is_descendent
|
173
|
+
end
|
174
|
+
|
175
|
+
# @param directory [Pathame,String] The directory whose children are to be added to the file group
|
176
|
+
# @param signatures_from_bag [Hash<Pathname,Signature>] The fixity data already calculated for the files
|
177
|
+
# @param recursive [Boolean] if true, descend into child directories
|
178
|
+
# @return [FileGroup] Harvest a directory (using digest hash for fixity data) and add all files to the file group
|
179
|
+
def group_from_bagit_subdir(directory, signatures_from_bag, recursive=true)
|
180
|
+
@signatures_from_bag = signatures_from_bag
|
181
|
+
group_from_directory(directory, recursive)
|
182
|
+
end
|
183
|
+
|
184
|
+
# @api internal
|
185
|
+
# @param directory [Pathname,String] The location of the files to harvest
|
186
|
+
# @param recursive [Boolean] if true, descend into child directories
|
187
|
+
# @return [FileGroup] Harvest a directory and add all files to the file group
|
188
|
+
def group_from_directory(directory, recursive=true)
|
189
|
+
self.base_directory = directory
|
190
|
+
@data_source = @base_directory.to_s
|
191
|
+
harvest_directory(directory, recursive)
|
192
|
+
self
|
193
|
+
rescue Exception # Errno::ENOENT
|
194
|
+
@data_source = directory.to_s
|
195
|
+
self
|
196
|
+
end
|
197
|
+
|
198
|
+
# @api internal
|
199
|
+
# @param path [Pathname,String] pathname of the directory to be harvested
|
200
|
+
# @param recursive [Boolean] if true, also harvest subdirectories
|
201
|
+
# @param validated [Boolean] if true, path is verified to be descendant of (#base_directory)
|
202
|
+
# @return [void] Traverse a directory tree and add all files to the file group
|
203
|
+
# Note that unlike Find.find and Dir.glob, Pathname passes through symbolic links
|
204
|
+
# @see http://stackoverflow.com/questions/3974087/how-to-make-rubys-find-find-follow-symlinks
|
205
|
+
# @see http://stackoverflow.com/questions/357754/can-i-traverse-symlinked-directories-in-ruby-with-a-glob
|
206
|
+
def harvest_directory(path, recursive, validated=nil)
|
207
|
+
pathname=Pathname.new(path).expand_path
|
208
|
+
validated ||= is_descendent_of_base?(pathname)
|
209
|
+
pathname.children.sort.each do |child|
|
210
|
+
if child.basename.to_s == ".DS_Store"
|
211
|
+
next
|
212
|
+
elsif child.directory?
|
213
|
+
harvest_directory(child,recursive, validated) if recursive
|
214
|
+
else
|
215
|
+
add_physical_file(child, validated)
|
216
|
+
end
|
217
|
+
end
|
218
|
+
nil
|
219
|
+
end
|
220
|
+
|
221
|
+
# @api internal
|
222
|
+
# @param pathname [Pathname, String] The location of the file to be added
|
223
|
+
# @param validated [Boolean] if true, path is verified to be descendant of (#base_directory)
|
224
|
+
# @return [void] Add a single physical file's data to the array of files in this group.
|
225
|
+
# If fixity data was supplied in bag manifests, then utilize that data.
|
226
|
+
def add_physical_file(pathname, validated=nil)
|
227
|
+
pathname=Pathname.new(pathname).expand_path
|
228
|
+
validated ||= is_descendent_of_base?(pathname)
|
229
|
+
instance = FileInstance.new.instance_from_file(pathname, @base_directory)
|
230
|
+
if @signatures_from_bag && @signatures_from_bag[pathname]
|
231
|
+
signature = @signatures_from_bag[pathname]
|
232
|
+
unless signature.complete?
|
233
|
+
signature = signature.normalized_signature(pathname)
|
234
|
+
end
|
235
|
+
else
|
236
|
+
signature = FileSignature.new.signature_from_file(pathname)
|
237
|
+
end
|
238
|
+
add_file_instance(signature,instance)
|
239
|
+
end
|
240
|
+
|
241
|
+
end
|
242
|
+
|
243
|
+
end
|
244
|
+
|