moab-versioning 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/moab.rb +59 -0
- data/lib/moab/bagger.rb +289 -0
- data/lib/moab/config.rb +21 -0
- data/lib/moab/exceptions.rb +18 -0
- data/lib/moab/file_group.rb +244 -0
- data/lib/moab/file_group_difference.rb +336 -0
- data/lib/moab/file_group_difference_subset.rb +45 -0
- data/lib/moab/file_instance.rb +82 -0
- data/lib/moab/file_instance_difference.rb +54 -0
- data/lib/moab/file_inventory.rb +279 -0
- data/lib/moab/file_inventory_difference.rb +132 -0
- data/lib/moab/file_manifestation.rb +85 -0
- data/lib/moab/file_signature.rb +200 -0
- data/lib/moab/signature_catalog.rb +195 -0
- data/lib/moab/signature_catalog_entry.rb +61 -0
- data/lib/moab/storage_object.rb +220 -0
- data/lib/moab/storage_object_version.rb +333 -0
- data/lib/moab/storage_repository.rb +57 -0
- data/lib/moab/storage_services.rb +104 -0
- data/lib/moab/verification_result.rb +83 -0
- data/lib/moab/version_metadata.rb +38 -0
- data/lib/moab/version_metadata_entry.rb +64 -0
- data/lib/moab/version_metadata_event.rb +47 -0
- data/lib/moab_stanford.rb +18 -0
- data/lib/monkey_patches.rb +65 -0
- data/lib/serializer.rb +36 -0
- data/lib/serializer/manifest.rb +76 -0
- data/lib/serializer/serializable.rb +178 -0
- data/lib/stanford/active_fedora_object.rb +34 -0
- data/lib/stanford/content_inventory.rb +236 -0
- data/lib/stanford/dor_metadata.rb +49 -0
- data/lib/stanford/storage_repository.rb +46 -0
- data/lib/stanford/storage_services.rb +66 -0
- data/lib/tasks/yard.rake +34 -0
- data/lib/tools/api_doc_generator.rb +396 -0
- data/lib/tools/spec_generator.rb +410 -0
- data/lib/tools/spec_generator_old.rb +49 -0
- metadata +252 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: fe7dbe157c6bc9418f8b336b65431a1266ca4375
|
4
|
+
data.tar.gz: 5328ef34ef060962cf9598001c6d146cde7afdc3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 79893b2a0f5b5d5791cee7ef9d26213d2f6876ea32f71a258d6d3030982fa8a6824f5285de20e343775ca1dc4625cbc16d32c793182116f830c7a4065dd5c5ce
|
7
|
+
data.tar.gz: e73bae98354c6001887a6a1fa24589919e696596c74ffdaac0b3132b123c761fea0850e5000d3795443af49b696a3e4cbc92567d4ccb7fa5020f1cbfa7aeb191
|
data/lib/moab.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# Moab is a module that provides a distintive namespace for the collection of classes it contains.
|
2
|
+
#
|
3
|
+
# ====Data Model
|
4
|
+
#
|
5
|
+
# * <b>{FileInventory} = container for recording information about a collection of related files</b>
|
6
|
+
# * {FileGroup} [1..*] = subset allow segregation of content and metadata files
|
7
|
+
# * {FileManifestation} [1..*] = snapshot of a file's filesystem characteristics
|
8
|
+
# * {FileSignature} [1] = file fixity information
|
9
|
+
# * {FileInstance} [1..*] = filepath and timestamp of any physical file having that signature
|
10
|
+
#
|
11
|
+
# * <b>{SignatureCatalog} = lookup table containing a cumulative collection of all files ever ingested</b>
|
12
|
+
# * {SignatureCatalogEntry} [1..*] = an row in the lookup table containing storage information about a single file
|
13
|
+
# * {FileSignature} [1] = file fixity information
|
14
|
+
#
|
15
|
+
# * <b>{FileInventoryDifference} = compares two {FileInventory} instances based on file signatures and pathnames</b>
|
16
|
+
# * {FileGroupDifference} [1..*] = performs analysis and reports differences between two matching {FileGroup} objects
|
17
|
+
# * {FileGroupDifferenceSubset} [1..5] = collects a set of file-level differences of a give change type
|
18
|
+
# * {FileInstanceDifference} [1..*] = contains difference information at the file level
|
19
|
+
# * {FileSignature} [1..2] = contains the file signature(s) of two file instances being compared
|
20
|
+
#
|
21
|
+
# * <b>{VersionMetadata} = descriptive information about a digital object's versions</b>
|
22
|
+
# * {VersionMetadataEntry} [1..*] = attributes of a digital object version
|
23
|
+
# * {VersionMetadataEvent} [1..*] = object version lifecycle events with timestamps
|
24
|
+
#
|
25
|
+
# * <b>{StorageObject} = represents a digital object's repository storage location and ingest/dissemination methods</b>
|
26
|
+
# * {StorageObjectVersion} [1..*] = represents a version subdirectory within an object's home directory
|
27
|
+
# * {Bagger} [1] = utility for creating bagit packages for ingest or dissemination
|
28
|
+
#
|
29
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
30
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
31
|
+
module Moab
|
32
|
+
end
|
33
|
+
|
34
|
+
require 'serializer'
|
35
|
+
include Serializer
|
36
|
+
require 'confstruct/configuration'
|
37
|
+
require 'moab/config'
|
38
|
+
require 'moab/file_signature'
|
39
|
+
require 'moab/file_instance'
|
40
|
+
require 'moab/file_manifestation'
|
41
|
+
require 'moab/file_group'
|
42
|
+
require 'moab/file_inventory'
|
43
|
+
require 'moab/signature_catalog_entry'
|
44
|
+
require 'moab/signature_catalog'
|
45
|
+
require 'moab/file_instance_difference'
|
46
|
+
require 'moab/file_group_difference_subset'
|
47
|
+
require 'moab/file_group_difference'
|
48
|
+
require 'moab/file_inventory_difference'
|
49
|
+
require 'moab/version_metadata_event'
|
50
|
+
require 'moab/version_metadata_entry'
|
51
|
+
require 'moab/version_metadata'
|
52
|
+
require 'moab/bagger'
|
53
|
+
require 'moab/storage_object'
|
54
|
+
require 'moab/storage_object_version'
|
55
|
+
require 'moab/storage_repository'
|
56
|
+
require 'moab/storage_services'
|
57
|
+
require 'moab/exceptions'
|
58
|
+
require 'moab/verification_result'
|
59
|
+
|
data/lib/moab/bagger.rb
ADDED
@@ -0,0 +1,289 @@
|
|
1
|
+
require 'moab'
|
2
|
+
require 'systemu'
|
3
|
+
|
4
|
+
module Moab
|
5
|
+
|
6
|
+
# A class used to create a BagIt package from a version inventory and a set of source files.
|
7
|
+
# The {#fill_bag} method is called with a package_mode parameter that specifies
|
8
|
+
# whether the bag is being created for deposit into the repository or is to contain the output of a version reconstruction.
|
9
|
+
# * In <b>:depositor</b> mode, the version inventory is filtered using the digital object's signature catalog so that only new files are included
|
10
|
+
# * In <b>:reconstructor</b> mode, the version inventory and signature catalog are used together to regenerate the complete set of files for the version.
|
11
|
+
#
|
12
|
+
# ====Data Model
|
13
|
+
# * {StorageRepository} = represents a digital object repository storage node
|
14
|
+
# * {StorageServices} = supports application layer access to the repository's objects, data, and metadata
|
15
|
+
# * {StorageObject} = represents a digital object's repository storage location and ingest/dissemination methods
|
16
|
+
# * {StorageObjectVersion} [1..*] = represents a version subdirectory within an object's home directory
|
17
|
+
# * <b>{Bagger} [1] = utility for creating bagit packages for ingest or dissemination</b>
|
18
|
+
#
|
19
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
20
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
21
|
+
class Bagger
|
22
|
+
|
23
|
+
# @param version_inventory [FileInventory] The complete inventory of the files comprising a digital object version
|
24
|
+
# @param signature_catalog [SignatureCatalog] The signature catalog, used to specify source paths (in :reconstructor mode),
|
25
|
+
# or to filter the version inventory (in :depositor mode)
|
26
|
+
# @param bag_pathname [Pathname,String] The location of the Bagit bag to be created
|
27
|
+
def initialize(version_inventory, signature_catalog, bag_pathname)
|
28
|
+
@version_inventory = version_inventory
|
29
|
+
@signature_catalog = signature_catalog
|
30
|
+
@bag_pathname = Pathname.new(bag_pathname)
|
31
|
+
create_bagit_txt()
|
32
|
+
end
|
33
|
+
|
34
|
+
# @return [FileInventory] The complete inventory of the files comprising a digital object version
|
35
|
+
attr_accessor :version_inventory
|
36
|
+
|
37
|
+
# @return [SignatureCatalog] The signature catalog, used to specify source paths (in :reconstructor mode),
|
38
|
+
# or to filter the version inventory (in :depositor mode)
|
39
|
+
attr_accessor :signature_catalog
|
40
|
+
|
41
|
+
# @return [Pathname] The location of the Bagit bag to be created
|
42
|
+
attr_accessor :bag_pathname
|
43
|
+
|
44
|
+
# @return [FileInventory] The actual inventory of the files to be packaged (derived from @version_inventory in {#fill_bag})
|
45
|
+
attr_accessor :bag_inventory
|
46
|
+
|
47
|
+
# @return [Symbol] The operational mode controlling what gets bagged {#fill_bag}
|
48
|
+
# and the full path of source files {#fill_payload}
|
49
|
+
attr_accessor :package_mode
|
50
|
+
|
51
|
+
# @return [void] Delete any existing bag data and re-initialize the bag directory
|
52
|
+
def reset_bag
|
53
|
+
delete_bag
|
54
|
+
delete_tarfile
|
55
|
+
create_bagit_txt
|
56
|
+
end
|
57
|
+
|
58
|
+
# @api internal
|
59
|
+
# @return [void] Generate the bagit.txt tag file
|
60
|
+
def create_bagit_txt()
|
61
|
+
@bag_pathname.mkpath
|
62
|
+
@bag_pathname.join("bagit.txt").open('w') do |f|
|
63
|
+
f.puts "Tag-File-Character-Encoding: UTF-8"
|
64
|
+
f.puts "BagIt-Version: 0.97"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# @return [NilClass] Delete the bagit files
|
69
|
+
def delete_bag()
|
70
|
+
# make sure this looks like a bag before deleting
|
71
|
+
if @bag_pathname.join('bagit.txt').exist?
|
72
|
+
if @bag_pathname.join('data').exist?
|
73
|
+
@bag_pathname.rmtree
|
74
|
+
else
|
75
|
+
@bag_pathname.children.each {|file| file.delete}
|
76
|
+
@bag_pathname.rmdir
|
77
|
+
end
|
78
|
+
end
|
79
|
+
nil
|
80
|
+
end
|
81
|
+
|
82
|
+
# @param tar_pathname [Pathname] The location of the tar file (default is based on bag location)
|
83
|
+
def delete_tarfile()
|
84
|
+
bag_name = @bag_pathname.basename
|
85
|
+
bag_parent = @bag_pathname.parent
|
86
|
+
tar_pathname = bag_parent.join("#{bag_name}.tar")
|
87
|
+
tar_pathname.delete if tar_pathname.exist?
|
88
|
+
end
|
89
|
+
|
90
|
+
# @api external
|
91
|
+
# @param package_mode [Symbol] The operational mode controlling what gets bagged and the full path of source files (Bagger#fill_payload)
|
92
|
+
# @param source_base_pathname [Pathname] The home location of the source files
|
93
|
+
# @return [Bagger] Perform all the operations required to fill the bag payload, write the manifests and tagfiles, and checksum the tagfiles
|
94
|
+
# @example {include:file:spec/features/storage/deposit_spec.rb}
|
95
|
+
def fill_bag(package_mode, source_base_pathname)
|
96
|
+
create_bag_inventory(package_mode)
|
97
|
+
fill_payload(source_base_pathname)
|
98
|
+
create_tagfiles
|
99
|
+
self
|
100
|
+
end
|
101
|
+
|
102
|
+
# @api external
|
103
|
+
# @param package_mode [Symbol] The operational mode controlling what gets bagged and the full path of source files (Bagger#fill_payload)
|
104
|
+
# @return [FileInventory] Create, write, and return the inventory of the files that will become the payload
|
105
|
+
def create_bag_inventory(package_mode)
|
106
|
+
@package_mode = package_mode
|
107
|
+
@bag_pathname.mkpath
|
108
|
+
case package_mode
|
109
|
+
when :depositor
|
110
|
+
@version_inventory.write_xml_file(@bag_pathname, 'version')
|
111
|
+
@bag_inventory = @signature_catalog.version_additions(@version_inventory)
|
112
|
+
@bag_inventory.write_xml_file(@bag_pathname, 'additions')
|
113
|
+
when :reconstructor
|
114
|
+
@bag_inventory = @version_inventory
|
115
|
+
@bag_inventory.write_xml_file(@bag_pathname, 'version')
|
116
|
+
end
|
117
|
+
@bag_inventory
|
118
|
+
end
|
119
|
+
|
120
|
+
# @api internal
|
121
|
+
# @param source_base_pathname [Pathname] The home location of the source files
|
122
|
+
# @return [void] Fill in the bag's data folder with copies of all files to be packaged for delivery.
|
123
|
+
# This method uses Unix hard links in order to greatly speed up the process.
|
124
|
+
# Hard links, however, require that the target bag must be created within the same filesystem as the source files
|
125
|
+
def fill_payload(source_base_pathname)
|
126
|
+
@bag_inventory.groups.each do |group|
|
127
|
+
group_id = group.group_id
|
128
|
+
case @package_mode
|
129
|
+
when :depositor
|
130
|
+
deposit_group(group_id, source_base_pathname.join(group_id))
|
131
|
+
when :reconstructor
|
132
|
+
reconstuct_group(group_id, source_base_pathname)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
# @param group_id [String] The name of the data group being copied to the bag
|
138
|
+
# @param source_dir [Pathname] The location from which files should be copied
|
139
|
+
# @return [Boolean] Copy all the files listed in the group inventory to the bag.
|
140
|
+
# Return true if successful or nil if the group was not found in the inventory
|
141
|
+
def deposit_group(group_id, source_dir)
|
142
|
+
group = @bag_inventory.group(group_id)
|
143
|
+
return nil? if group.nil? or group.files.empty?
|
144
|
+
target_dir = @bag_pathname.join('data',group_id)
|
145
|
+
group.path_list.each do |relative_path|
|
146
|
+
source = source_dir.join(relative_path)
|
147
|
+
target = target_dir.join(relative_path)
|
148
|
+
target.parent.mkpath
|
149
|
+
FileUtils.symlink source, target
|
150
|
+
end
|
151
|
+
true
|
152
|
+
end
|
153
|
+
|
154
|
+
# @param group_id [String] The name of the data group being copied to the bag
|
155
|
+
# @param storage_object_dir [Pathname] The home location of the object store from which files should be copied
|
156
|
+
# @return [Boolean] Copy all the files listed in the group inventory to the bag.
|
157
|
+
# Return true if successful or nil if the group was not found in the inventory
|
158
|
+
def reconstuct_group(group_id, storage_object_dir)
|
159
|
+
group = @bag_inventory.group(group_id)
|
160
|
+
return nil? if group.nil? or group.files.empty?
|
161
|
+
target_dir = @bag_pathname.join('data',group_id)
|
162
|
+
group.files.each do |file|
|
163
|
+
catalog_entry = @signature_catalog.signature_hash[file.signature]
|
164
|
+
source = storage_object_dir.join(catalog_entry.storage_path)
|
165
|
+
file.instances.each do |instance|
|
166
|
+
target = target_dir.join(instance.path)
|
167
|
+
target.parent.mkpath
|
168
|
+
FileUtils.symlink source, target
|
169
|
+
end
|
170
|
+
end
|
171
|
+
true
|
172
|
+
end
|
173
|
+
|
174
|
+
# @return [Boolean] create BagIt manifests and tag files. Return true if successful
|
175
|
+
def create_tagfiles
|
176
|
+
create_payload_manifests
|
177
|
+
create_bag_info_txt
|
178
|
+
create_bagit_txt
|
179
|
+
create_tagfile_manifests
|
180
|
+
true
|
181
|
+
end
|
182
|
+
|
183
|
+
# @api internal
|
184
|
+
# @return [void] Using the checksum information from the inventory, create BagIt manifest files for the payload
|
185
|
+
def create_payload_manifests
|
186
|
+
manifest_pathname = Hash.new
|
187
|
+
manifest_file = Hash.new
|
188
|
+
manifest_types = [:md5, :sha1, :sha256]
|
189
|
+
manifest_types.each do |type|
|
190
|
+
manifest_pathname[type] = @bag_pathname.join("manifest-#{type.to_s}.txt")
|
191
|
+
manifest_file[type] = manifest_pathname[type].open('w')
|
192
|
+
end
|
193
|
+
@bag_inventory.groups.each do |group|
|
194
|
+
group.files.each do |file|
|
195
|
+
fixity = file.signature.fixity
|
196
|
+
file.instances.each do |instance|
|
197
|
+
data_path = File.join('data', group.group_id, instance.path)
|
198
|
+
manifest_types.each do |type|
|
199
|
+
manifest_file[type].puts("#{fixity[type]} #{data_path}") if fixity[type]
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
ensure
|
205
|
+
manifest_types.each do |type|
|
206
|
+
if manifest_file[type]
|
207
|
+
manifest_file[type].close
|
208
|
+
manifest_pathname[type].delete if
|
209
|
+
manifest_pathname[type].exist? and manifest_pathname[type].size == 0
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
# @api internal
|
215
|
+
# @return [void] Generate the bag-info.txt tag file
|
216
|
+
def create_bag_info_txt
|
217
|
+
@bag_pathname.join("bag-info.txt").open('w') do |f|
|
218
|
+
f.puts "External-Identifier: #{@bag_inventory.package_id}"
|
219
|
+
f.puts "Payload-Oxum: #{@bag_inventory.byte_count}.#{@bag_inventory.file_count}"
|
220
|
+
f.puts "Bag-Size: #{@bag_inventory.human_size}"
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
# @api internal
|
225
|
+
# @return [void] create BagIt tag manifest files containing checksums for all files in the bag's root directory
|
226
|
+
def create_tagfile_manifests()
|
227
|
+
manifest_pathname = Hash.new
|
228
|
+
manifest_file = Hash.new
|
229
|
+
manifest_types = [:md5, :sha1, :sha256]
|
230
|
+
manifest_types.each do |type|
|
231
|
+
manifest_pathname[type] = @bag_pathname.join("tagmanifest-#{type.to_s}.txt")
|
232
|
+
manifest_file[type] = manifest_pathname[type].open('w')
|
233
|
+
end
|
234
|
+
@bag_pathname.children.each do |file|
|
235
|
+
unless file.directory? || file.basename.to_s[0, 11] == 'tagmanifest'
|
236
|
+
signature = FileSignature.new.signature_from_file(file)
|
237
|
+
fixity = signature.fixity
|
238
|
+
manifest_types.each do |type|
|
239
|
+
manifest_file[type].puts("#{fixity[type]} #{file.basename}") if fixity[type]
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
ensure
|
244
|
+
manifest_types.each do |type|
|
245
|
+
if manifest_file[type]
|
246
|
+
manifest_file[type].close
|
247
|
+
manifest_pathname[type].delete if
|
248
|
+
manifest_pathname[type].exist? and manifest_pathname[type].size == 0
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
# @return [Boolean] Create a tar file containing the bag
|
254
|
+
def create_tarfile(tar_pathname=nil)
|
255
|
+
bag_name = @bag_pathname.basename
|
256
|
+
bag_parent = @bag_pathname.parent
|
257
|
+
tar_pathname ||= bag_parent.join("#{bag_name}.tar")
|
258
|
+
tar_cmd="cd '#{bag_parent}'; tar --dereference --force-local -cf '#{tar_pathname}' '#{bag_name}'"
|
259
|
+
begin
|
260
|
+
shell_execute(tar_cmd)
|
261
|
+
rescue
|
262
|
+
shell_execute(tar_cmd.sub('--force-local',''))
|
263
|
+
end
|
264
|
+
raise "Unable to create tarfile #{tar_pathname}" unless tar_pathname.exist?
|
265
|
+
return true
|
266
|
+
|
267
|
+
end
|
268
|
+
|
269
|
+
# Executes a system command in a subprocess.
|
270
|
+
# The method will return stdout from the command if execution was successful.
|
271
|
+
# The method will raise an exception if if execution fails.
|
272
|
+
# The exception's message will contain the explaination of the failure.
|
273
|
+
# @param [String] command the command to be executed
|
274
|
+
# @return [String] stdout from the command if execution was successful
|
275
|
+
def shell_execute(command)
|
276
|
+
status, stdout, stderr = systemu(command)
|
277
|
+
if (status.exitstatus != 0)
|
278
|
+
raise stderr
|
279
|
+
end
|
280
|
+
return stdout
|
281
|
+
rescue
|
282
|
+
msg = "Command failed to execute: [#{command}] caused by <STDERR = #{stderr.split($/).join('; ')}>"
|
283
|
+
msg << " STDOUT = #{stdout.split($/).join('; ')}" if (stdout && (stdout.length > 0))
|
284
|
+
raise msg
|
285
|
+
end
|
286
|
+
|
287
|
+
end
|
288
|
+
|
289
|
+
end
|
data/lib/moab/config.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'moab'
|
2
|
+
|
3
|
+
module Moab
|
4
|
+
|
5
|
+
#class Configuration < Confstruct::Configuration
|
6
|
+
#
|
7
|
+
# def configure(*args, &block)
|
8
|
+
# super(*args, &block)
|
9
|
+
#
|
10
|
+
# # Whatever you want to do after configuration
|
11
|
+
# # Something.initialize(self.repository_home)
|
12
|
+
# end
|
13
|
+
#end
|
14
|
+
|
15
|
+
# @return [Confstruct::Configuration] the configuration data
|
16
|
+
Config = Confstruct::Configuration.new do
|
17
|
+
repository_home nil
|
18
|
+
path_method :druid_tree
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Moab
|
2
|
+
class ObjectNotFoundException < RuntimeError
|
3
|
+
|
4
|
+
end
|
5
|
+
|
6
|
+
class FileNotFoundException < RuntimeError
|
7
|
+
|
8
|
+
end
|
9
|
+
|
10
|
+
class InvalidMetadataException < RuntimeError
|
11
|
+
|
12
|
+
end
|
13
|
+
|
14
|
+
class ValidationException < RuntimeError
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -0,0 +1,244 @@
|
|
1
|
+
require 'moab'
|
2
|
+
|
3
|
+
module Moab
|
4
|
+
|
5
|
+
# A container for a standard subset of a digital objects {FileManifestation} objects
|
6
|
+
# Used to segregate depositor content from repository metadata files
|
7
|
+
# This is a child element of {FileInventory}, which contains a full example
|
8
|
+
#
|
9
|
+
# ====Data Model
|
10
|
+
# * {FileInventory} = container for recording information about a collection of related files
|
11
|
+
# * <b>{FileGroup} [1..*] = subset allow segregation of content and metadata files</b>
|
12
|
+
# * {FileManifestation} [1..*] = snapshot of a file's filesystem characteristics
|
13
|
+
# * {FileSignature} [1] = file fixity information
|
14
|
+
# * {FileInstance} [1..*] = filepath and timestamp of any physical file having that signature
|
15
|
+
#
|
16
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
17
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
18
|
+
class FileGroup < Serializable
|
19
|
+
|
20
|
+
include HappyMapper
|
21
|
+
|
22
|
+
# The name of the XML element used to serialize this objects data
|
23
|
+
tag 'fileGroup'
|
24
|
+
|
25
|
+
# (see Serializable#initialize)
|
26
|
+
def initialize(opts={})
|
27
|
+
@signature_hash = OrderedHash.new
|
28
|
+
@data_source = ""
|
29
|
+
super(opts)
|
30
|
+
end
|
31
|
+
|
32
|
+
# @attribute
|
33
|
+
# @return [String] The name of the file group
|
34
|
+
attribute :group_id, String, :tag => 'groupId', :key => true
|
35
|
+
|
36
|
+
# @attribute
|
37
|
+
# @return [String] The directory location or other source of this groups file data
|
38
|
+
attribute :data_source, String, :tag => 'dataSource'
|
39
|
+
|
40
|
+
# @attribute
|
41
|
+
# @return [Integer] The total number of data files (dynamically calculated)
|
42
|
+
attribute :file_count, Integer, :tag => 'fileCount', :on_save => Proc.new {|i| i.to_s}
|
43
|
+
|
44
|
+
def file_count
|
45
|
+
files.inject(0) { |sum, manifestation| sum + manifestation.file_count }
|
46
|
+
end
|
47
|
+
|
48
|
+
# @attribute
|
49
|
+
# @return [Integer] The total size (in bytes) of all data files (dynamically calculated)
|
50
|
+
attribute :byte_count, Integer, :tag => 'byteCount', :on_save => Proc.new {|i| i.to_s}
|
51
|
+
|
52
|
+
def byte_count
|
53
|
+
files.inject(0) { |sum, manifestation| sum + manifestation.byte_count }
|
54
|
+
end
|
55
|
+
|
56
|
+
# @attribute
|
57
|
+
# @return [Integer] The total disk usage (in 1 kB blocks) of all data files (estimating du -k result) (dynamically calculated)
|
58
|
+
attribute :block_count, Integer, :tag => 'blockCount', :on_save => Proc.new {|i| i.to_s}
|
59
|
+
|
60
|
+
def block_count
|
61
|
+
files.inject(0) { |sum, manifestation| sum + manifestation.block_count }
|
62
|
+
end
|
63
|
+
|
64
|
+
# @return [Array<String>] The data fields to include in summary reports
|
65
|
+
def summary_fields
|
66
|
+
%w{group_id file_count byte_count block_count}
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
# @attribute
|
71
|
+
# @return [Array<FileManifestation>] The set of files comprising the group
|
72
|
+
has_many :files, FileManifestation, :tag => 'file'
|
73
|
+
|
74
|
+
def files
|
75
|
+
@signature_hash.values
|
76
|
+
end
|
77
|
+
|
78
|
+
# @return [OrderedHash<FileSignature, FileManifestation>] The actual in-memory store for the collection
|
79
|
+
# of {FileManifestation} objects that are contained in this file group.
|
80
|
+
attr_accessor :signature_hash
|
81
|
+
|
82
|
+
# @api internal
|
83
|
+
# @return [OrderedHash<String,FileSignature>] An index of file paths,
|
84
|
+
# used to test for existence of a filename in this file group
|
85
|
+
def path_hash
|
86
|
+
path_hash = OrderedHash.new
|
87
|
+
@signature_hash.each do |signature,manifestation|
|
88
|
+
manifestation.instances.each do |instance|
|
89
|
+
path_hash[instance.path] = signature
|
90
|
+
end
|
91
|
+
end
|
92
|
+
path_hash
|
93
|
+
end
|
94
|
+
|
95
|
+
# @return [Array<String>] The list of file paths in this group
|
96
|
+
def path_list
|
97
|
+
files.collect{|file| file.instances.collect{|instance| instance.path}}.flatten
|
98
|
+
end
|
99
|
+
|
100
|
+
# @api internal
|
101
|
+
# @param signature_subset [Array<FileSignature>] The signatures used to select the entries to return
|
102
|
+
# @return [OrderedHash<String,FileSignature>] A pathname,signature hash containing a subset of the filenames in this file group
|
103
|
+
def path_hash_subset(signature_subset)
|
104
|
+
path_hash = OrderedHash.new
|
105
|
+
signature_subset.each do |signature|
|
106
|
+
manifestation = @signature_hash[signature]
|
107
|
+
manifestation.instances.each do |instance|
|
108
|
+
path_hash[instance.path] = signature
|
109
|
+
end
|
110
|
+
end
|
111
|
+
path_hash
|
112
|
+
end
|
113
|
+
|
114
|
+
# @param manifestiation_array [Array<FileManifestation>] The collection of {FileManifestation} objects
|
115
|
+
# that are to be added to this file group. Used by HappyMapper when deserializing a {FileInventory} file
|
116
|
+
# Add the array of {FileManifestation} objects to this file group.
|
117
|
+
def files=(manifestiation_array)
|
118
|
+
manifestiation_array.each do |manifestiation|
|
119
|
+
add_file(manifestiation)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# @api internal
|
124
|
+
# @param manifestation [FileManifestation] The file manifestation to be added
|
125
|
+
# @return [void] Add a single {FileManifestation} object to this group
|
126
|
+
def add_file(manifestation)
|
127
|
+
manifestation.instances.each do |instance|
|
128
|
+
add_file_instance(manifestation.signature, instance)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# @api internal
|
133
|
+
# @param signature [FileSignature] The signature of the file instance to be added
|
134
|
+
# @param instance [FileInstance] The pathname and datetime of the file instance to be added
|
135
|
+
# @return [void] Add a single {FileSignature},{FileInstance} key/value pair to this group.
|
136
|
+
# Data is actually stored in the {#signature_hash}
|
137
|
+
def add_file_instance(signature,instance)
|
138
|
+
if @signature_hash.has_key?(signature)
|
139
|
+
manifestation = @signature_hash[signature]
|
140
|
+
else
|
141
|
+
manifestation = FileManifestation.new
|
142
|
+
manifestation.signature = signature
|
143
|
+
@signature_hash[signature] = manifestation
|
144
|
+
end
|
145
|
+
manifestation.instances << instance
|
146
|
+
end
|
147
|
+
|
148
|
+
# @param path [String] The path of the file to be removed
|
149
|
+
# @return [void] Remove a file from the inventory
|
150
|
+
# for example, the manifest inventory does not contain a file entry for itself
|
151
|
+
def remove_file_having_path(path)
|
152
|
+
signature = self.path_hash[path]
|
153
|
+
@signature_hash.delete(signature)
|
154
|
+
end
|
155
|
+
|
156
|
+
# @return [Pathname] The full path used as the basis of the relative paths reported
|
157
|
+
# in {FileInstance} objects that are children of the {FileManifestation} objects contained in this file group
|
158
|
+
attr_accessor :base_directory
|
159
|
+
|
160
|
+
def base_directory=(basepath)
|
161
|
+
@base_directory = Pathname.new(basepath).expand_path
|
162
|
+
end
|
163
|
+
|
164
|
+
# @api internal
|
165
|
+
# @param pathname [Pathname] The file path to be tested
|
166
|
+
# @return [Boolean] Test whether the given path is contained within the {#base_directory}
|
167
|
+
def is_descendent_of_base?(pathname)
|
168
|
+
raise("base_directory has not been set") if @base_directory.nil?
|
169
|
+
is_descendent = false
|
170
|
+
pathname.expand_path.ascend {|ancestor| is_descendent ||= (ancestor == @base_directory)}
|
171
|
+
raise("#{pathname} is not a descendent of #{@base_directory}") unless is_descendent
|
172
|
+
is_descendent
|
173
|
+
end
|
174
|
+
|
175
|
+
# @param directory [Pathame,String] The directory whose children are to be added to the file group
|
176
|
+
# @param signatures_from_bag [Hash<Pathname,Signature>] The fixity data already calculated for the files
|
177
|
+
# @param recursive [Boolean] if true, descend into child directories
|
178
|
+
# @return [FileGroup] Harvest a directory (using digest hash for fixity data) and add all files to the file group
|
179
|
+
def group_from_bagit_subdir(directory, signatures_from_bag, recursive=true)
|
180
|
+
@signatures_from_bag = signatures_from_bag
|
181
|
+
group_from_directory(directory, recursive)
|
182
|
+
end
|
183
|
+
|
184
|
+
# @api internal
|
185
|
+
# @param directory [Pathname,String] The location of the files to harvest
|
186
|
+
# @param recursive [Boolean] if true, descend into child directories
|
187
|
+
# @return [FileGroup] Harvest a directory and add all files to the file group
|
188
|
+
def group_from_directory(directory, recursive=true)
|
189
|
+
self.base_directory = directory
|
190
|
+
@data_source = @base_directory.to_s
|
191
|
+
harvest_directory(directory, recursive)
|
192
|
+
self
|
193
|
+
rescue Exception # Errno::ENOENT
|
194
|
+
@data_source = directory.to_s
|
195
|
+
self
|
196
|
+
end
|
197
|
+
|
198
|
+
# @api internal
|
199
|
+
# @param path [Pathname,String] pathname of the directory to be harvested
|
200
|
+
# @param recursive [Boolean] if true, also harvest subdirectories
|
201
|
+
# @param validated [Boolean] if true, path is verified to be descendant of (#base_directory)
|
202
|
+
# @return [void] Traverse a directory tree and add all files to the file group
|
203
|
+
# Note that unlike Find.find and Dir.glob, Pathname passes through symbolic links
|
204
|
+
# @see http://stackoverflow.com/questions/3974087/how-to-make-rubys-find-find-follow-symlinks
|
205
|
+
# @see http://stackoverflow.com/questions/357754/can-i-traverse-symlinked-directories-in-ruby-with-a-glob
|
206
|
+
def harvest_directory(path, recursive, validated=nil)
|
207
|
+
pathname=Pathname.new(path).expand_path
|
208
|
+
validated ||= is_descendent_of_base?(pathname)
|
209
|
+
pathname.children.sort.each do |child|
|
210
|
+
if child.basename.to_s == ".DS_Store"
|
211
|
+
next
|
212
|
+
elsif child.directory?
|
213
|
+
harvest_directory(child,recursive, validated) if recursive
|
214
|
+
else
|
215
|
+
add_physical_file(child, validated)
|
216
|
+
end
|
217
|
+
end
|
218
|
+
nil
|
219
|
+
end
|
220
|
+
|
221
|
+
# @api internal
|
222
|
+
# @param pathname [Pathname, String] The location of the file to be added
|
223
|
+
# @param validated [Boolean] if true, path is verified to be descendant of (#base_directory)
|
224
|
+
# @return [void] Add a single physical file's data to the array of files in this group.
|
225
|
+
# If fixity data was supplied in bag manifests, then utilize that data.
|
226
|
+
def add_physical_file(pathname, validated=nil)
|
227
|
+
pathname=Pathname.new(pathname).expand_path
|
228
|
+
validated ||= is_descendent_of_base?(pathname)
|
229
|
+
instance = FileInstance.new.instance_from_file(pathname, @base_directory)
|
230
|
+
if @signatures_from_bag && @signatures_from_bag[pathname]
|
231
|
+
signature = @signatures_from_bag[pathname]
|
232
|
+
unless signature.complete?
|
233
|
+
signature = signature.normalized_signature(pathname)
|
234
|
+
end
|
235
|
+
else
|
236
|
+
signature = FileSignature.new.signature_from_file(pathname)
|
237
|
+
end
|
238
|
+
add_file_instance(signature,instance)
|
239
|
+
end
|
240
|
+
|
241
|
+
end
|
242
|
+
|
243
|
+
end
|
244
|
+
|