moab-versioning 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/lib/moab.rb +59 -0
  3. data/lib/moab/bagger.rb +289 -0
  4. data/lib/moab/config.rb +21 -0
  5. data/lib/moab/exceptions.rb +18 -0
  6. data/lib/moab/file_group.rb +244 -0
  7. data/lib/moab/file_group_difference.rb +336 -0
  8. data/lib/moab/file_group_difference_subset.rb +45 -0
  9. data/lib/moab/file_instance.rb +82 -0
  10. data/lib/moab/file_instance_difference.rb +54 -0
  11. data/lib/moab/file_inventory.rb +279 -0
  12. data/lib/moab/file_inventory_difference.rb +132 -0
  13. data/lib/moab/file_manifestation.rb +85 -0
  14. data/lib/moab/file_signature.rb +200 -0
  15. data/lib/moab/signature_catalog.rb +195 -0
  16. data/lib/moab/signature_catalog_entry.rb +61 -0
  17. data/lib/moab/storage_object.rb +220 -0
  18. data/lib/moab/storage_object_version.rb +333 -0
  19. data/lib/moab/storage_repository.rb +57 -0
  20. data/lib/moab/storage_services.rb +104 -0
  21. data/lib/moab/verification_result.rb +83 -0
  22. data/lib/moab/version_metadata.rb +38 -0
  23. data/lib/moab/version_metadata_entry.rb +64 -0
  24. data/lib/moab/version_metadata_event.rb +47 -0
  25. data/lib/moab_stanford.rb +18 -0
  26. data/lib/monkey_patches.rb +65 -0
  27. data/lib/serializer.rb +36 -0
  28. data/lib/serializer/manifest.rb +76 -0
  29. data/lib/serializer/serializable.rb +178 -0
  30. data/lib/stanford/active_fedora_object.rb +34 -0
  31. data/lib/stanford/content_inventory.rb +236 -0
  32. data/lib/stanford/dor_metadata.rb +49 -0
  33. data/lib/stanford/storage_repository.rb +46 -0
  34. data/lib/stanford/storage_services.rb +66 -0
  35. data/lib/tasks/yard.rake +34 -0
  36. data/lib/tools/api_doc_generator.rb +396 -0
  37. data/lib/tools/spec_generator.rb +410 -0
  38. data/lib/tools/spec_generator_old.rb +49 -0
  39. metadata +252 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: fe7dbe157c6bc9418f8b336b65431a1266ca4375
4
+ data.tar.gz: 5328ef34ef060962cf9598001c6d146cde7afdc3
5
+ SHA512:
6
+ metadata.gz: 79893b2a0f5b5d5791cee7ef9d26213d2f6876ea32f71a258d6d3030982fa8a6824f5285de20e343775ca1dc4625cbc16d32c793182116f830c7a4065dd5c5ce
7
+ data.tar.gz: e73bae98354c6001887a6a1fa24589919e696596c74ffdaac0b3132b123c761fea0850e5000d3795443af49b696a3e4cbc92567d4ccb7fa5020f1cbfa7aeb191
data/lib/moab.rb ADDED
@@ -0,0 +1,59 @@
1
+ # Moab is a module that provides a distintive namespace for the collection of classes it contains.
2
+ #
3
+ # ====Data Model
4
+ #
5
+ # * <b>{FileInventory} = container for recording information about a collection of related files</b>
6
+ # * {FileGroup} [1..*] = subset allow segregation of content and metadata files
7
+ # * {FileManifestation} [1..*] = snapshot of a file's filesystem characteristics
8
+ # * {FileSignature} [1] = file fixity information
9
+ # * {FileInstance} [1..*] = filepath and timestamp of any physical file having that signature
10
+ #
11
+ # * <b>{SignatureCatalog} = lookup table containing a cumulative collection of all files ever ingested</b>
12
+ # * {SignatureCatalogEntry} [1..*] = an row in the lookup table containing storage information about a single file
13
+ # * {FileSignature} [1] = file fixity information
14
+ #
15
+ # * <b>{FileInventoryDifference} = compares two {FileInventory} instances based on file signatures and pathnames</b>
16
+ # * {FileGroupDifference} [1..*] = performs analysis and reports differences between two matching {FileGroup} objects
17
+ # * {FileGroupDifferenceSubset} [1..5] = collects a set of file-level differences of a give change type
18
+ # * {FileInstanceDifference} [1..*] = contains difference information at the file level
19
+ # * {FileSignature} [1..2] = contains the file signature(s) of two file instances being compared
20
+ #
21
+ # * <b>{VersionMetadata} = descriptive information about a digital object's versions</b>
22
+ # * {VersionMetadataEntry} [1..*] = attributes of a digital object version
23
+ # * {VersionMetadataEvent} [1..*] = object version lifecycle events with timestamps
24
+ #
25
+ # * <b>{StorageObject} = represents a digital object's repository storage location and ingest/dissemination methods</b>
26
+ # * {StorageObjectVersion} [1..*] = represents a version subdirectory within an object's home directory
27
+ # * {Bagger} [1] = utility for creating bagit packages for ingest or dissemination
28
+ #
29
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
30
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
31
+ module Moab
32
+ end
33
+
34
+ require 'serializer'
35
+ include Serializer
36
+ require 'confstruct/configuration'
37
+ require 'moab/config'
38
+ require 'moab/file_signature'
39
+ require 'moab/file_instance'
40
+ require 'moab/file_manifestation'
41
+ require 'moab/file_group'
42
+ require 'moab/file_inventory'
43
+ require 'moab/signature_catalog_entry'
44
+ require 'moab/signature_catalog'
45
+ require 'moab/file_instance_difference'
46
+ require 'moab/file_group_difference_subset'
47
+ require 'moab/file_group_difference'
48
+ require 'moab/file_inventory_difference'
49
+ require 'moab/version_metadata_event'
50
+ require 'moab/version_metadata_entry'
51
+ require 'moab/version_metadata'
52
+ require 'moab/bagger'
53
+ require 'moab/storage_object'
54
+ require 'moab/storage_object_version'
55
+ require 'moab/storage_repository'
56
+ require 'moab/storage_services'
57
+ require 'moab/exceptions'
58
+ require 'moab/verification_result'
59
+
@@ -0,0 +1,289 @@
1
+ require 'moab'
2
+ require 'systemu'
3
+
4
+ module Moab
5
+
6
+ # A class used to create a BagIt package from a version inventory and a set of source files.
7
+ # The {#fill_bag} method is called with a package_mode parameter that specifies
8
+ # whether the bag is being created for deposit into the repository or is to contain the output of a version reconstruction.
9
+ # * In <b>:depositor</b> mode, the version inventory is filtered using the digital object's signature catalog so that only new files are included
10
+ # * In <b>:reconstructor</b> mode, the version inventory and signature catalog are used together to regenerate the complete set of files for the version.
11
+ #
12
+ # ====Data Model
13
+ # * {StorageRepository} = represents a digital object repository storage node
14
+ # * {StorageServices} = supports application layer access to the repository's objects, data, and metadata
15
+ # * {StorageObject} = represents a digital object's repository storage location and ingest/dissemination methods
16
+ # * {StorageObjectVersion} [1..*] = represents a version subdirectory within an object's home directory
17
+ # * <b>{Bagger} [1] = utility for creating bagit packages for ingest or dissemination</b>
18
+ #
19
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
20
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
21
+ class Bagger
22
+
23
+ # @param version_inventory [FileInventory] The complete inventory of the files comprising a digital object version
24
+ # @param signature_catalog [SignatureCatalog] The signature catalog, used to specify source paths (in :reconstructor mode),
25
+ # or to filter the version inventory (in :depositor mode)
26
+ # @param bag_pathname [Pathname,String] The location of the Bagit bag to be created
27
+ def initialize(version_inventory, signature_catalog, bag_pathname)
28
+ @version_inventory = version_inventory
29
+ @signature_catalog = signature_catalog
30
+ @bag_pathname = Pathname.new(bag_pathname)
31
+ create_bagit_txt()
32
+ end
33
+
34
+ # @return [FileInventory] The complete inventory of the files comprising a digital object version
35
+ attr_accessor :version_inventory
36
+
37
+ # @return [SignatureCatalog] The signature catalog, used to specify source paths (in :reconstructor mode),
38
+ # or to filter the version inventory (in :depositor mode)
39
+ attr_accessor :signature_catalog
40
+
41
+ # @return [Pathname] The location of the Bagit bag to be created
42
+ attr_accessor :bag_pathname
43
+
44
+ # @return [FileInventory] The actual inventory of the files to be packaged (derived from @version_inventory in {#fill_bag})
45
+ attr_accessor :bag_inventory
46
+
47
+ # @return [Symbol] The operational mode controlling what gets bagged {#fill_bag}
48
+ # and the full path of source files {#fill_payload}
49
+ attr_accessor :package_mode
50
+
51
+ # @return [void] Delete any existing bag data and re-initialize the bag directory
52
+ def reset_bag
53
+ delete_bag
54
+ delete_tarfile
55
+ create_bagit_txt
56
+ end
57
+
58
+ # @api internal
59
+ # @return [void] Generate the bagit.txt tag file
60
+ def create_bagit_txt()
61
+ @bag_pathname.mkpath
62
+ @bag_pathname.join("bagit.txt").open('w') do |f|
63
+ f.puts "Tag-File-Character-Encoding: UTF-8"
64
+ f.puts "BagIt-Version: 0.97"
65
+ end
66
+ end
67
+
68
+ # @return [NilClass] Delete the bagit files
69
+ def delete_bag()
70
+ # make sure this looks like a bag before deleting
71
+ if @bag_pathname.join('bagit.txt').exist?
72
+ if @bag_pathname.join('data').exist?
73
+ @bag_pathname.rmtree
74
+ else
75
+ @bag_pathname.children.each {|file| file.delete}
76
+ @bag_pathname.rmdir
77
+ end
78
+ end
79
+ nil
80
+ end
81
+
82
+ # @param tar_pathname [Pathname] The location of the tar file (default is based on bag location)
83
+ def delete_tarfile()
84
+ bag_name = @bag_pathname.basename
85
+ bag_parent = @bag_pathname.parent
86
+ tar_pathname = bag_parent.join("#{bag_name}.tar")
87
+ tar_pathname.delete if tar_pathname.exist?
88
+ end
89
+
90
+ # @api external
91
+ # @param package_mode [Symbol] The operational mode controlling what gets bagged and the full path of source files (Bagger#fill_payload)
92
+ # @param source_base_pathname [Pathname] The home location of the source files
93
+ # @return [Bagger] Perform all the operations required to fill the bag payload, write the manifests and tagfiles, and checksum the tagfiles
94
+ # @example {include:file:spec/features/storage/deposit_spec.rb}
95
+ def fill_bag(package_mode, source_base_pathname)
96
+ create_bag_inventory(package_mode)
97
+ fill_payload(source_base_pathname)
98
+ create_tagfiles
99
+ self
100
+ end
101
+
102
+ # @api external
103
+ # @param package_mode [Symbol] The operational mode controlling what gets bagged and the full path of source files (Bagger#fill_payload)
104
+ # @return [FileInventory] Create, write, and return the inventory of the files that will become the payload
105
+ def create_bag_inventory(package_mode)
106
+ @package_mode = package_mode
107
+ @bag_pathname.mkpath
108
+ case package_mode
109
+ when :depositor
110
+ @version_inventory.write_xml_file(@bag_pathname, 'version')
111
+ @bag_inventory = @signature_catalog.version_additions(@version_inventory)
112
+ @bag_inventory.write_xml_file(@bag_pathname, 'additions')
113
+ when :reconstructor
114
+ @bag_inventory = @version_inventory
115
+ @bag_inventory.write_xml_file(@bag_pathname, 'version')
116
+ end
117
+ @bag_inventory
118
+ end
119
+
120
+ # @api internal
121
+ # @param source_base_pathname [Pathname] The home location of the source files
122
+ # @return [void] Fill in the bag's data folder with copies of all files to be packaged for delivery.
123
+ # This method uses Unix hard links in order to greatly speed up the process.
124
+ # Hard links, however, require that the target bag must be created within the same filesystem as the source files
125
+ def fill_payload(source_base_pathname)
126
+ @bag_inventory.groups.each do |group|
127
+ group_id = group.group_id
128
+ case @package_mode
129
+ when :depositor
130
+ deposit_group(group_id, source_base_pathname.join(group_id))
131
+ when :reconstructor
132
+ reconstuct_group(group_id, source_base_pathname)
133
+ end
134
+ end
135
+ end
136
+
137
+ # @param group_id [String] The name of the data group being copied to the bag
138
+ # @param source_dir [Pathname] The location from which files should be copied
139
+ # @return [Boolean] Copy all the files listed in the group inventory to the bag.
140
+ # Return true if successful or nil if the group was not found in the inventory
141
+ def deposit_group(group_id, source_dir)
142
+ group = @bag_inventory.group(group_id)
143
+ return nil? if group.nil? or group.files.empty?
144
+ target_dir = @bag_pathname.join('data',group_id)
145
+ group.path_list.each do |relative_path|
146
+ source = source_dir.join(relative_path)
147
+ target = target_dir.join(relative_path)
148
+ target.parent.mkpath
149
+ FileUtils.symlink source, target
150
+ end
151
+ true
152
+ end
153
+
154
+ # @param group_id [String] The name of the data group being copied to the bag
155
+ # @param storage_object_dir [Pathname] The home location of the object store from which files should be copied
156
+ # @return [Boolean] Copy all the files listed in the group inventory to the bag.
157
+ # Return true if successful or nil if the group was not found in the inventory
158
+ def reconstuct_group(group_id, storage_object_dir)
159
+ group = @bag_inventory.group(group_id)
160
+ return nil? if group.nil? or group.files.empty?
161
+ target_dir = @bag_pathname.join('data',group_id)
162
+ group.files.each do |file|
163
+ catalog_entry = @signature_catalog.signature_hash[file.signature]
164
+ source = storage_object_dir.join(catalog_entry.storage_path)
165
+ file.instances.each do |instance|
166
+ target = target_dir.join(instance.path)
167
+ target.parent.mkpath
168
+ FileUtils.symlink source, target
169
+ end
170
+ end
171
+ true
172
+ end
173
+
174
+ # @return [Boolean] create BagIt manifests and tag files. Return true if successful
175
+ def create_tagfiles
176
+ create_payload_manifests
177
+ create_bag_info_txt
178
+ create_bagit_txt
179
+ create_tagfile_manifests
180
+ true
181
+ end
182
+
183
+ # @api internal
184
+ # @return [void] Using the checksum information from the inventory, create BagIt manifest files for the payload
185
+ def create_payload_manifests
186
+ manifest_pathname = Hash.new
187
+ manifest_file = Hash.new
188
+ manifest_types = [:md5, :sha1, :sha256]
189
+ manifest_types.each do |type|
190
+ manifest_pathname[type] = @bag_pathname.join("manifest-#{type.to_s}.txt")
191
+ manifest_file[type] = manifest_pathname[type].open('w')
192
+ end
193
+ @bag_inventory.groups.each do |group|
194
+ group.files.each do |file|
195
+ fixity = file.signature.fixity
196
+ file.instances.each do |instance|
197
+ data_path = File.join('data', group.group_id, instance.path)
198
+ manifest_types.each do |type|
199
+ manifest_file[type].puts("#{fixity[type]} #{data_path}") if fixity[type]
200
+ end
201
+ end
202
+ end
203
+ end
204
+ ensure
205
+ manifest_types.each do |type|
206
+ if manifest_file[type]
207
+ manifest_file[type].close
208
+ manifest_pathname[type].delete if
209
+ manifest_pathname[type].exist? and manifest_pathname[type].size == 0
210
+ end
211
+ end
212
+ end
213
+
214
+ # @api internal
215
+ # @return [void] Generate the bag-info.txt tag file
216
+ def create_bag_info_txt
217
+ @bag_pathname.join("bag-info.txt").open('w') do |f|
218
+ f.puts "External-Identifier: #{@bag_inventory.package_id}"
219
+ f.puts "Payload-Oxum: #{@bag_inventory.byte_count}.#{@bag_inventory.file_count}"
220
+ f.puts "Bag-Size: #{@bag_inventory.human_size}"
221
+ end
222
+ end
223
+
224
+ # @api internal
225
+ # @return [void] create BagIt tag manifest files containing checksums for all files in the bag's root directory
226
+ def create_tagfile_manifests()
227
+ manifest_pathname = Hash.new
228
+ manifest_file = Hash.new
229
+ manifest_types = [:md5, :sha1, :sha256]
230
+ manifest_types.each do |type|
231
+ manifest_pathname[type] = @bag_pathname.join("tagmanifest-#{type.to_s}.txt")
232
+ manifest_file[type] = manifest_pathname[type].open('w')
233
+ end
234
+ @bag_pathname.children.each do |file|
235
+ unless file.directory? || file.basename.to_s[0, 11] == 'tagmanifest'
236
+ signature = FileSignature.new.signature_from_file(file)
237
+ fixity = signature.fixity
238
+ manifest_types.each do |type|
239
+ manifest_file[type].puts("#{fixity[type]} #{file.basename}") if fixity[type]
240
+ end
241
+ end
242
+ end
243
+ ensure
244
+ manifest_types.each do |type|
245
+ if manifest_file[type]
246
+ manifest_file[type].close
247
+ manifest_pathname[type].delete if
248
+ manifest_pathname[type].exist? and manifest_pathname[type].size == 0
249
+ end
250
+ end
251
+ end
252
+
253
+ # @return [Boolean] Create a tar file containing the bag
254
+ def create_tarfile(tar_pathname=nil)
255
+ bag_name = @bag_pathname.basename
256
+ bag_parent = @bag_pathname.parent
257
+ tar_pathname ||= bag_parent.join("#{bag_name}.tar")
258
+ tar_cmd="cd '#{bag_parent}'; tar --dereference --force-local -cf '#{tar_pathname}' '#{bag_name}'"
259
+ begin
260
+ shell_execute(tar_cmd)
261
+ rescue
262
+ shell_execute(tar_cmd.sub('--force-local',''))
263
+ end
264
+ raise "Unable to create tarfile #{tar_pathname}" unless tar_pathname.exist?
265
+ return true
266
+
267
+ end
268
+
269
+ # Executes a system command in a subprocess.
270
+ # The method will return stdout from the command if execution was successful.
271
+ # The method will raise an exception if if execution fails.
272
+ # The exception's message will contain the explaination of the failure.
273
+ # @param [String] command the command to be executed
274
+ # @return [String] stdout from the command if execution was successful
275
+ def shell_execute(command)
276
+ status, stdout, stderr = systemu(command)
277
+ if (status.exitstatus != 0)
278
+ raise stderr
279
+ end
280
+ return stdout
281
+ rescue
282
+ msg = "Command failed to execute: [#{command}] caused by <STDERR = #{stderr.split($/).join('; ')}>"
283
+ msg << " STDOUT = #{stdout.split($/).join('; ')}" if (stdout && (stdout.length > 0))
284
+ raise msg
285
+ end
286
+
287
+ end
288
+
289
+ end
@@ -0,0 +1,21 @@
1
+ require 'moab'
2
+
3
+ module Moab
4
+
5
+ #class Configuration < Confstruct::Configuration
6
+ #
7
+ # def configure(*args, &block)
8
+ # super(*args, &block)
9
+ #
10
+ # # Whatever you want to do after configuration
11
+ # # Something.initialize(self.repository_home)
12
+ # end
13
+ #end
14
+
15
+ # @return [Confstruct::Configuration] the configuration data
16
+ Config = Confstruct::Configuration.new do
17
+ repository_home nil
18
+ path_method :druid_tree
19
+ end
20
+
21
+ end
@@ -0,0 +1,18 @@
1
+ module Moab
2
+ class ObjectNotFoundException < RuntimeError
3
+
4
+ end
5
+
6
+ class FileNotFoundException < RuntimeError
7
+
8
+ end
9
+
10
+ class InvalidMetadataException < RuntimeError
11
+
12
+ end
13
+
14
+ class ValidationException < RuntimeError
15
+
16
+ end
17
+
18
+ end
@@ -0,0 +1,244 @@
1
+ require 'moab'
2
+
3
+ module Moab
4
+
5
+ # A container for a standard subset of a digital objects {FileManifestation} objects
6
+ # Used to segregate depositor content from repository metadata files
7
+ # This is a child element of {FileInventory}, which contains a full example
8
+ #
9
+ # ====Data Model
10
+ # * {FileInventory} = container for recording information about a collection of related files
11
+ # * <b>{FileGroup} [1..*] = subset allow segregation of content and metadata files</b>
12
+ # * {FileManifestation} [1..*] = snapshot of a file's filesystem characteristics
13
+ # * {FileSignature} [1] = file fixity information
14
+ # * {FileInstance} [1..*] = filepath and timestamp of any physical file having that signature
15
+ #
16
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
17
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
18
+ class FileGroup < Serializable
19
+
20
+ include HappyMapper
21
+
22
+ # The name of the XML element used to serialize this objects data
23
+ tag 'fileGroup'
24
+
25
+ # (see Serializable#initialize)
26
+ def initialize(opts={})
27
+ @signature_hash = OrderedHash.new
28
+ @data_source = ""
29
+ super(opts)
30
+ end
31
+
32
+ # @attribute
33
+ # @return [String] The name of the file group
34
+ attribute :group_id, String, :tag => 'groupId', :key => true
35
+
36
+ # @attribute
37
+ # @return [String] The directory location or other source of this groups file data
38
+ attribute :data_source, String, :tag => 'dataSource'
39
+
40
+ # @attribute
41
+ # @return [Integer] The total number of data files (dynamically calculated)
42
+ attribute :file_count, Integer, :tag => 'fileCount', :on_save => Proc.new {|i| i.to_s}
43
+
44
+ def file_count
45
+ files.inject(0) { |sum, manifestation| sum + manifestation.file_count }
46
+ end
47
+
48
+ # @attribute
49
+ # @return [Integer] The total size (in bytes) of all data files (dynamically calculated)
50
+ attribute :byte_count, Integer, :tag => 'byteCount', :on_save => Proc.new {|i| i.to_s}
51
+
52
+ def byte_count
53
+ files.inject(0) { |sum, manifestation| sum + manifestation.byte_count }
54
+ end
55
+
56
+ # @attribute
57
+ # @return [Integer] The total disk usage (in 1 kB blocks) of all data files (estimating du -k result) (dynamically calculated)
58
+ attribute :block_count, Integer, :tag => 'blockCount', :on_save => Proc.new {|i| i.to_s}
59
+
60
+ def block_count
61
+ files.inject(0) { |sum, manifestation| sum + manifestation.block_count }
62
+ end
63
+
64
+ # @return [Array<String>] The data fields to include in summary reports
65
+ def summary_fields
66
+ %w{group_id file_count byte_count block_count}
67
+ end
68
+
69
+
70
+ # @attribute
71
+ # @return [Array<FileManifestation>] The set of files comprising the group
72
+ has_many :files, FileManifestation, :tag => 'file'
73
+
74
+ def files
75
+ @signature_hash.values
76
+ end
77
+
78
+ # @return [OrderedHash<FileSignature, FileManifestation>] The actual in-memory store for the collection
79
+ # of {FileManifestation} objects that are contained in this file group.
80
+ attr_accessor :signature_hash
81
+
82
+ # @api internal
83
+ # @return [OrderedHash<String,FileSignature>] An index of file paths,
84
+ # used to test for existence of a filename in this file group
85
+ def path_hash
86
+ path_hash = OrderedHash.new
87
+ @signature_hash.each do |signature,manifestation|
88
+ manifestation.instances.each do |instance|
89
+ path_hash[instance.path] = signature
90
+ end
91
+ end
92
+ path_hash
93
+ end
94
+
95
+ # @return [Array<String>] The list of file paths in this group
96
+ def path_list
97
+ files.collect{|file| file.instances.collect{|instance| instance.path}}.flatten
98
+ end
99
+
100
+ # @api internal
101
+ # @param signature_subset [Array<FileSignature>] The signatures used to select the entries to return
102
+ # @return [OrderedHash<String,FileSignature>] A pathname,signature hash containing a subset of the filenames in this file group
103
+ def path_hash_subset(signature_subset)
104
+ path_hash = OrderedHash.new
105
+ signature_subset.each do |signature|
106
+ manifestation = @signature_hash[signature]
107
+ manifestation.instances.each do |instance|
108
+ path_hash[instance.path] = signature
109
+ end
110
+ end
111
+ path_hash
112
+ end
113
+
114
+ # @param manifestiation_array [Array<FileManifestation>] The collection of {FileManifestation} objects
115
+ # that are to be added to this file group. Used by HappyMapper when deserializing a {FileInventory} file
116
+ # Add the array of {FileManifestation} objects to this file group.
117
+ def files=(manifestiation_array)
118
+ manifestiation_array.each do |manifestiation|
119
+ add_file(manifestiation)
120
+ end
121
+ end
122
+
123
+ # @api internal
124
+ # @param manifestation [FileManifestation] The file manifestation to be added
125
+ # @return [void] Add a single {FileManifestation} object to this group
126
+ def add_file(manifestation)
127
+ manifestation.instances.each do |instance|
128
+ add_file_instance(manifestation.signature, instance)
129
+ end
130
+ end
131
+
132
+ # @api internal
133
+ # @param signature [FileSignature] The signature of the file instance to be added
134
+ # @param instance [FileInstance] The pathname and datetime of the file instance to be added
135
+ # @return [void] Add a single {FileSignature},{FileInstance} key/value pair to this group.
136
+ # Data is actually stored in the {#signature_hash}
137
+ def add_file_instance(signature,instance)
138
+ if @signature_hash.has_key?(signature)
139
+ manifestation = @signature_hash[signature]
140
+ else
141
+ manifestation = FileManifestation.new
142
+ manifestation.signature = signature
143
+ @signature_hash[signature] = manifestation
144
+ end
145
+ manifestation.instances << instance
146
+ end
147
+
148
+ # @param path [String] The path of the file to be removed
149
+ # @return [void] Remove a file from the inventory
150
+ # for example, the manifest inventory does not contain a file entry for itself
151
+ def remove_file_having_path(path)
152
+ signature = self.path_hash[path]
153
+ @signature_hash.delete(signature)
154
+ end
155
+
156
+ # @return [Pathname] The full path used as the basis of the relative paths reported
157
+ # in {FileInstance} objects that are children of the {FileManifestation} objects contained in this file group
158
+ attr_accessor :base_directory
159
+
160
+ def base_directory=(basepath)
161
+ @base_directory = Pathname.new(basepath).expand_path
162
+ end
163
+
164
+ # @api internal
165
+ # @param pathname [Pathname] The file path to be tested
166
+ # @return [Boolean] Test whether the given path is contained within the {#base_directory}
167
+ def is_descendent_of_base?(pathname)
168
+ raise("base_directory has not been set") if @base_directory.nil?
169
+ is_descendent = false
170
+ pathname.expand_path.ascend {|ancestor| is_descendent ||= (ancestor == @base_directory)}
171
+ raise("#{pathname} is not a descendent of #{@base_directory}") unless is_descendent
172
+ is_descendent
173
+ end
174
+
175
+ # @param directory [Pathame,String] The directory whose children are to be added to the file group
176
+ # @param signatures_from_bag [Hash<Pathname,Signature>] The fixity data already calculated for the files
177
+ # @param recursive [Boolean] if true, descend into child directories
178
+ # @return [FileGroup] Harvest a directory (using digest hash for fixity data) and add all files to the file group
179
+ def group_from_bagit_subdir(directory, signatures_from_bag, recursive=true)
180
+ @signatures_from_bag = signatures_from_bag
181
+ group_from_directory(directory, recursive)
182
+ end
183
+
184
+ # @api internal
185
+ # @param directory [Pathname,String] The location of the files to harvest
186
+ # @param recursive [Boolean] if true, descend into child directories
187
+ # @return [FileGroup] Harvest a directory and add all files to the file group
188
+ def group_from_directory(directory, recursive=true)
189
+ self.base_directory = directory
190
+ @data_source = @base_directory.to_s
191
+ harvest_directory(directory, recursive)
192
+ self
193
+ rescue Exception # Errno::ENOENT
194
+ @data_source = directory.to_s
195
+ self
196
+ end
197
+
198
+ # @api internal
199
+ # @param path [Pathname,String] pathname of the directory to be harvested
200
+ # @param recursive [Boolean] if true, also harvest subdirectories
201
+ # @param validated [Boolean] if true, path is verified to be descendant of (#base_directory)
202
+ # @return [void] Traverse a directory tree and add all files to the file group
203
+ # Note that unlike Find.find and Dir.glob, Pathname passes through symbolic links
204
+ # @see http://stackoverflow.com/questions/3974087/how-to-make-rubys-find-find-follow-symlinks
205
+ # @see http://stackoverflow.com/questions/357754/can-i-traverse-symlinked-directories-in-ruby-with-a-glob
206
+ def harvest_directory(path, recursive, validated=nil)
207
+ pathname=Pathname.new(path).expand_path
208
+ validated ||= is_descendent_of_base?(pathname)
209
+ pathname.children.sort.each do |child|
210
+ if child.basename.to_s == ".DS_Store"
211
+ next
212
+ elsif child.directory?
213
+ harvest_directory(child,recursive, validated) if recursive
214
+ else
215
+ add_physical_file(child, validated)
216
+ end
217
+ end
218
+ nil
219
+ end
220
+
221
+ # @api internal
222
+ # @param pathname [Pathname, String] The location of the file to be added
223
+ # @param validated [Boolean] if true, path is verified to be descendant of (#base_directory)
224
+ # @return [void] Add a single physical file's data to the array of files in this group.
225
+ # If fixity data was supplied in bag manifests, then utilize that data.
226
+ def add_physical_file(pathname, validated=nil)
227
+ pathname=Pathname.new(pathname).expand_path
228
+ validated ||= is_descendent_of_base?(pathname)
229
+ instance = FileInstance.new.instance_from_file(pathname, @base_directory)
230
+ if @signatures_from_bag && @signatures_from_bag[pathname]
231
+ signature = @signatures_from_bag[pathname]
232
+ unless signature.complete?
233
+ signature = signature.normalized_signature(pathname)
234
+ end
235
+ else
236
+ signature = FileSignature.new.signature_from_file(pathname)
237
+ end
238
+ add_file_instance(signature,instance)
239
+ end
240
+
241
+ end
242
+
243
+ end
244
+