moab-versioning 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/lib/moab.rb +59 -0
  3. data/lib/moab/bagger.rb +289 -0
  4. data/lib/moab/config.rb +21 -0
  5. data/lib/moab/exceptions.rb +18 -0
  6. data/lib/moab/file_group.rb +244 -0
  7. data/lib/moab/file_group_difference.rb +336 -0
  8. data/lib/moab/file_group_difference_subset.rb +45 -0
  9. data/lib/moab/file_instance.rb +82 -0
  10. data/lib/moab/file_instance_difference.rb +54 -0
  11. data/lib/moab/file_inventory.rb +279 -0
  12. data/lib/moab/file_inventory_difference.rb +132 -0
  13. data/lib/moab/file_manifestation.rb +85 -0
  14. data/lib/moab/file_signature.rb +200 -0
  15. data/lib/moab/signature_catalog.rb +195 -0
  16. data/lib/moab/signature_catalog_entry.rb +61 -0
  17. data/lib/moab/storage_object.rb +220 -0
  18. data/lib/moab/storage_object_version.rb +333 -0
  19. data/lib/moab/storage_repository.rb +57 -0
  20. data/lib/moab/storage_services.rb +104 -0
  21. data/lib/moab/verification_result.rb +83 -0
  22. data/lib/moab/version_metadata.rb +38 -0
  23. data/lib/moab/version_metadata_entry.rb +64 -0
  24. data/lib/moab/version_metadata_event.rb +47 -0
  25. data/lib/moab_stanford.rb +18 -0
  26. data/lib/monkey_patches.rb +65 -0
  27. data/lib/serializer.rb +36 -0
  28. data/lib/serializer/manifest.rb +76 -0
  29. data/lib/serializer/serializable.rb +178 -0
  30. data/lib/stanford/active_fedora_object.rb +34 -0
  31. data/lib/stanford/content_inventory.rb +236 -0
  32. data/lib/stanford/dor_metadata.rb +49 -0
  33. data/lib/stanford/storage_repository.rb +46 -0
  34. data/lib/stanford/storage_services.rb +66 -0
  35. data/lib/tasks/yard.rake +34 -0
  36. data/lib/tools/api_doc_generator.rb +396 -0
  37. data/lib/tools/spec_generator.rb +410 -0
  38. data/lib/tools/spec_generator_old.rb +49 -0
  39. metadata +252 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: fe7dbe157c6bc9418f8b336b65431a1266ca4375
4
+ data.tar.gz: 5328ef34ef060962cf9598001c6d146cde7afdc3
5
+ SHA512:
6
+ metadata.gz: 79893b2a0f5b5d5791cee7ef9d26213d2f6876ea32f71a258d6d3030982fa8a6824f5285de20e343775ca1dc4625cbc16d32c793182116f830c7a4065dd5c5ce
7
+ data.tar.gz: e73bae98354c6001887a6a1fa24589919e696596c74ffdaac0b3132b123c761fea0850e5000d3795443af49b696a3e4cbc92567d4ccb7fa5020f1cbfa7aeb191
data/lib/moab.rb ADDED
@@ -0,0 +1,59 @@
1
+ # Moab is a module that provides a distintive namespace for the collection of classes it contains.
2
+ #
3
+ # ====Data Model
4
+ #
5
+ # * <b>{FileInventory} = container for recording information about a collection of related files</b>
6
+ # * {FileGroup} [1..*] = subset allow segregation of content and metadata files
7
+ # * {FileManifestation} [1..*] = snapshot of a file's filesystem characteristics
8
+ # * {FileSignature} [1] = file fixity information
9
+ # * {FileInstance} [1..*] = filepath and timestamp of any physical file having that signature
10
+ #
11
+ # * <b>{SignatureCatalog} = lookup table containing a cumulative collection of all files ever ingested</b>
12
+ # * {SignatureCatalogEntry} [1..*] = an row in the lookup table containing storage information about a single file
13
+ # * {FileSignature} [1] = file fixity information
14
+ #
15
+ # * <b>{FileInventoryDifference} = compares two {FileInventory} instances based on file signatures and pathnames</b>
16
+ # * {FileGroupDifference} [1..*] = performs analysis and reports differences between two matching {FileGroup} objects
17
+ # * {FileGroupDifferenceSubset} [1..5] = collects a set of file-level differences of a give change type
18
+ # * {FileInstanceDifference} [1..*] = contains difference information at the file level
19
+ # * {FileSignature} [1..2] = contains the file signature(s) of two file instances being compared
20
+ #
21
+ # * <b>{VersionMetadata} = descriptive information about a digital object's versions</b>
22
+ # * {VersionMetadataEntry} [1..*] = attributes of a digital object version
23
+ # * {VersionMetadataEvent} [1..*] = object version lifecycle events with timestamps
24
+ #
25
+ # * <b>{StorageObject} = represents a digital object's repository storage location and ingest/dissemination methods</b>
26
+ # * {StorageObjectVersion} [1..*] = represents a version subdirectory within an object's home directory
27
+ # * {Bagger} [1] = utility for creating bagit packages for ingest or dissemination
28
+ #
29
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
30
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
31
+ module Moab
32
+ end
33
+
34
+ require 'serializer'
35
+ include Serializer
36
+ require 'confstruct/configuration'
37
+ require 'moab/config'
38
+ require 'moab/file_signature'
39
+ require 'moab/file_instance'
40
+ require 'moab/file_manifestation'
41
+ require 'moab/file_group'
42
+ require 'moab/file_inventory'
43
+ require 'moab/signature_catalog_entry'
44
+ require 'moab/signature_catalog'
45
+ require 'moab/file_instance_difference'
46
+ require 'moab/file_group_difference_subset'
47
+ require 'moab/file_group_difference'
48
+ require 'moab/file_inventory_difference'
49
+ require 'moab/version_metadata_event'
50
+ require 'moab/version_metadata_entry'
51
+ require 'moab/version_metadata'
52
+ require 'moab/bagger'
53
+ require 'moab/storage_object'
54
+ require 'moab/storage_object_version'
55
+ require 'moab/storage_repository'
56
+ require 'moab/storage_services'
57
+ require 'moab/exceptions'
58
+ require 'moab/verification_result'
59
+
@@ -0,0 +1,289 @@
1
+ require 'moab'
2
+ require 'systemu'
3
+
4
+ module Moab
5
+
6
+ # A class used to create a BagIt package from a version inventory and a set of source files.
7
+ # The {#fill_bag} method is called with a package_mode parameter that specifies
8
+ # whether the bag is being created for deposit into the repository or is to contain the output of a version reconstruction.
9
+ # * In <b>:depositor</b> mode, the version inventory is filtered using the digital object's signature catalog so that only new files are included
10
+ # * In <b>:reconstructor</b> mode, the version inventory and signature catalog are used together to regenerate the complete set of files for the version.
11
+ #
12
+ # ====Data Model
13
+ # * {StorageRepository} = represents a digital object repository storage node
14
+ # * {StorageServices} = supports application layer access to the repository's objects, data, and metadata
15
+ # * {StorageObject} = represents a digital object's repository storage location and ingest/dissemination methods
16
+ # * {StorageObjectVersion} [1..*] = represents a version subdirectory within an object's home directory
17
+ # * <b>{Bagger} [1] = utility for creating bagit packages for ingest or dissemination</b>
18
+ #
19
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
20
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
21
+ class Bagger
22
+
23
+ # @param version_inventory [FileInventory] The complete inventory of the files comprising a digital object version
24
+ # @param signature_catalog [SignatureCatalog] The signature catalog, used to specify source paths (in :reconstructor mode),
25
+ # or to filter the version inventory (in :depositor mode)
26
+ # @param bag_pathname [Pathname,String] The location of the Bagit bag to be created
27
+ def initialize(version_inventory, signature_catalog, bag_pathname)
28
+ @version_inventory = version_inventory
29
+ @signature_catalog = signature_catalog
30
+ @bag_pathname = Pathname.new(bag_pathname)
31
+ create_bagit_txt()
32
+ end
33
+
34
+ # @return [FileInventory] The complete inventory of the files comprising a digital object version
35
+ attr_accessor :version_inventory
36
+
37
+ # @return [SignatureCatalog] The signature catalog, used to specify source paths (in :reconstructor mode),
38
+ # or to filter the version inventory (in :depositor mode)
39
+ attr_accessor :signature_catalog
40
+
41
+ # @return [Pathname] The location of the Bagit bag to be created
42
+ attr_accessor :bag_pathname
43
+
44
+ # @return [FileInventory] The actual inventory of the files to be packaged (derived from @version_inventory in {#fill_bag})
45
+ attr_accessor :bag_inventory
46
+
47
+ # @return [Symbol] The operational mode controlling what gets bagged {#fill_bag}
48
+ # and the full path of source files {#fill_payload}
49
+ attr_accessor :package_mode
50
+
51
+ # @return [void] Delete any existing bag data and re-initialize the bag directory
52
+ def reset_bag
53
+ delete_bag
54
+ delete_tarfile
55
+ create_bagit_txt
56
+ end
57
+
58
+ # @api internal
59
+ # @return [void] Generate the bagit.txt tag file
60
+ def create_bagit_txt()
61
+ @bag_pathname.mkpath
62
+ @bag_pathname.join("bagit.txt").open('w') do |f|
63
+ f.puts "Tag-File-Character-Encoding: UTF-8"
64
+ f.puts "BagIt-Version: 0.97"
65
+ end
66
+ end
67
+
68
+ # @return [NilClass] Delete the bagit files
69
+ def delete_bag()
70
+ # make sure this looks like a bag before deleting
71
+ if @bag_pathname.join('bagit.txt').exist?
72
+ if @bag_pathname.join('data').exist?
73
+ @bag_pathname.rmtree
74
+ else
75
+ @bag_pathname.children.each {|file| file.delete}
76
+ @bag_pathname.rmdir
77
+ end
78
+ end
79
+ nil
80
+ end
81
+
82
+ # @param tar_pathname [Pathname] The location of the tar file (default is based on bag location)
83
+ def delete_tarfile()
84
+ bag_name = @bag_pathname.basename
85
+ bag_parent = @bag_pathname.parent
86
+ tar_pathname = bag_parent.join("#{bag_name}.tar")
87
+ tar_pathname.delete if tar_pathname.exist?
88
+ end
89
+
90
+ # @api external
91
+ # @param package_mode [Symbol] The operational mode controlling what gets bagged and the full path of source files (Bagger#fill_payload)
92
+ # @param source_base_pathname [Pathname] The home location of the source files
93
+ # @return [Bagger] Perform all the operations required to fill the bag payload, write the manifests and tagfiles, and checksum the tagfiles
94
+ # @example {include:file:spec/features/storage/deposit_spec.rb}
95
+ def fill_bag(package_mode, source_base_pathname)
96
+ create_bag_inventory(package_mode)
97
+ fill_payload(source_base_pathname)
98
+ create_tagfiles
99
+ self
100
+ end
101
+
102
+ # @api external
103
+ # @param package_mode [Symbol] The operational mode controlling what gets bagged and the full path of source files (Bagger#fill_payload)
104
+ # @return [FileInventory] Create, write, and return the inventory of the files that will become the payload
105
+ def create_bag_inventory(package_mode)
106
+ @package_mode = package_mode
107
+ @bag_pathname.mkpath
108
+ case package_mode
109
+ when :depositor
110
+ @version_inventory.write_xml_file(@bag_pathname, 'version')
111
+ @bag_inventory = @signature_catalog.version_additions(@version_inventory)
112
+ @bag_inventory.write_xml_file(@bag_pathname, 'additions')
113
+ when :reconstructor
114
+ @bag_inventory = @version_inventory
115
+ @bag_inventory.write_xml_file(@bag_pathname, 'version')
116
+ end
117
+ @bag_inventory
118
+ end
119
+
120
+ # @api internal
121
+ # @param source_base_pathname [Pathname] The home location of the source files
122
+ # @return [void] Fill in the bag's data folder with copies of all files to be packaged for delivery.
123
+ # This method uses Unix hard links in order to greatly speed up the process.
124
+ # Hard links, however, require that the target bag must be created within the same filesystem as the source files
125
+ def fill_payload(source_base_pathname)
126
+ @bag_inventory.groups.each do |group|
127
+ group_id = group.group_id
128
+ case @package_mode
129
+ when :depositor
130
+ deposit_group(group_id, source_base_pathname.join(group_id))
131
+ when :reconstructor
132
+ reconstuct_group(group_id, source_base_pathname)
133
+ end
134
+ end
135
+ end
136
+
137
+ # @param group_id [String] The name of the data group being copied to the bag
138
+ # @param source_dir [Pathname] The location from which files should be copied
139
+ # @return [Boolean] Copy all the files listed in the group inventory to the bag.
140
+ # Return true if successful or nil if the group was not found in the inventory
141
+ def deposit_group(group_id, source_dir)
142
+ group = @bag_inventory.group(group_id)
143
+ return nil? if group.nil? or group.files.empty?
144
+ target_dir = @bag_pathname.join('data',group_id)
145
+ group.path_list.each do |relative_path|
146
+ source = source_dir.join(relative_path)
147
+ target = target_dir.join(relative_path)
148
+ target.parent.mkpath
149
+ FileUtils.symlink source, target
150
+ end
151
+ true
152
+ end
153
+
154
+ # @param group_id [String] The name of the data group being copied to the bag
155
+ # @param storage_object_dir [Pathname] The home location of the object store from which files should be copied
156
+ # @return [Boolean] Copy all the files listed in the group inventory to the bag.
157
+ # Return true if successful or nil if the group was not found in the inventory
158
+ def reconstuct_group(group_id, storage_object_dir)
159
+ group = @bag_inventory.group(group_id)
160
+ return nil? if group.nil? or group.files.empty?
161
+ target_dir = @bag_pathname.join('data',group_id)
162
+ group.files.each do |file|
163
+ catalog_entry = @signature_catalog.signature_hash[file.signature]
164
+ source = storage_object_dir.join(catalog_entry.storage_path)
165
+ file.instances.each do |instance|
166
+ target = target_dir.join(instance.path)
167
+ target.parent.mkpath
168
+ FileUtils.symlink source, target
169
+ end
170
+ end
171
+ true
172
+ end
173
+
174
+ # @return [Boolean] create BagIt manifests and tag files. Return true if successful
175
+ def create_tagfiles
176
+ create_payload_manifests
177
+ create_bag_info_txt
178
+ create_bagit_txt
179
+ create_tagfile_manifests
180
+ true
181
+ end
182
+
183
+ # @api internal
184
+ # @return [void] Using the checksum information from the inventory, create BagIt manifest files for the payload
185
+ def create_payload_manifests
186
+ manifest_pathname = Hash.new
187
+ manifest_file = Hash.new
188
+ manifest_types = [:md5, :sha1, :sha256]
189
+ manifest_types.each do |type|
190
+ manifest_pathname[type] = @bag_pathname.join("manifest-#{type.to_s}.txt")
191
+ manifest_file[type] = manifest_pathname[type].open('w')
192
+ end
193
+ @bag_inventory.groups.each do |group|
194
+ group.files.each do |file|
195
+ fixity = file.signature.fixity
196
+ file.instances.each do |instance|
197
+ data_path = File.join('data', group.group_id, instance.path)
198
+ manifest_types.each do |type|
199
+ manifest_file[type].puts("#{fixity[type]} #{data_path}") if fixity[type]
200
+ end
201
+ end
202
+ end
203
+ end
204
+ ensure
205
+ manifest_types.each do |type|
206
+ if manifest_file[type]
207
+ manifest_file[type].close
208
+ manifest_pathname[type].delete if
209
+ manifest_pathname[type].exist? and manifest_pathname[type].size == 0
210
+ end
211
+ end
212
+ end
213
+
214
+ # @api internal
215
+ # @return [void] Generate the bag-info.txt tag file
216
+ def create_bag_info_txt
217
+ @bag_pathname.join("bag-info.txt").open('w') do |f|
218
+ f.puts "External-Identifier: #{@bag_inventory.package_id}"
219
+ f.puts "Payload-Oxum: #{@bag_inventory.byte_count}.#{@bag_inventory.file_count}"
220
+ f.puts "Bag-Size: #{@bag_inventory.human_size}"
221
+ end
222
+ end
223
+
224
+ # @api internal
225
+ # @return [void] create BagIt tag manifest files containing checksums for all files in the bag's root directory
226
+ def create_tagfile_manifests()
227
+ manifest_pathname = Hash.new
228
+ manifest_file = Hash.new
229
+ manifest_types = [:md5, :sha1, :sha256]
230
+ manifest_types.each do |type|
231
+ manifest_pathname[type] = @bag_pathname.join("tagmanifest-#{type.to_s}.txt")
232
+ manifest_file[type] = manifest_pathname[type].open('w')
233
+ end
234
+ @bag_pathname.children.each do |file|
235
+ unless file.directory? || file.basename.to_s[0, 11] == 'tagmanifest'
236
+ signature = FileSignature.new.signature_from_file(file)
237
+ fixity = signature.fixity
238
+ manifest_types.each do |type|
239
+ manifest_file[type].puts("#{fixity[type]} #{file.basename}") if fixity[type]
240
+ end
241
+ end
242
+ end
243
+ ensure
244
+ manifest_types.each do |type|
245
+ if manifest_file[type]
246
+ manifest_file[type].close
247
+ manifest_pathname[type].delete if
248
+ manifest_pathname[type].exist? and manifest_pathname[type].size == 0
249
+ end
250
+ end
251
+ end
252
+
253
+ # @return [Boolean] Create a tar file containing the bag
254
+ def create_tarfile(tar_pathname=nil)
255
+ bag_name = @bag_pathname.basename
256
+ bag_parent = @bag_pathname.parent
257
+ tar_pathname ||= bag_parent.join("#{bag_name}.tar")
258
+ tar_cmd="cd '#{bag_parent}'; tar --dereference --force-local -cf '#{tar_pathname}' '#{bag_name}'"
259
+ begin
260
+ shell_execute(tar_cmd)
261
+ rescue
262
+ shell_execute(tar_cmd.sub('--force-local',''))
263
+ end
264
+ raise "Unable to create tarfile #{tar_pathname}" unless tar_pathname.exist?
265
+ return true
266
+
267
+ end
268
+
269
+ # Executes a system command in a subprocess.
270
+ # The method will return stdout from the command if execution was successful.
271
+ # The method will raise an exception if if execution fails.
272
+ # The exception's message will contain the explaination of the failure.
273
+ # @param [String] command the command to be executed
274
+ # @return [String] stdout from the command if execution was successful
275
+ def shell_execute(command)
276
+ status, stdout, stderr = systemu(command)
277
+ if (status.exitstatus != 0)
278
+ raise stderr
279
+ end
280
+ return stdout
281
+ rescue
282
+ msg = "Command failed to execute: [#{command}] caused by <STDERR = #{stderr.split($/).join('; ')}>"
283
+ msg << " STDOUT = #{stdout.split($/).join('; ')}" if (stdout && (stdout.length > 0))
284
+ raise msg
285
+ end
286
+
287
+ end
288
+
289
+ end
@@ -0,0 +1,21 @@
1
+ require 'moab'
2
+
3
+ module Moab
4
+
5
+ #class Configuration < Confstruct::Configuration
6
+ #
7
+ # def configure(*args, &block)
8
+ # super(*args, &block)
9
+ #
10
+ # # Whatever you want to do after configuration
11
+ # # Something.initialize(self.repository_home)
12
+ # end
13
+ #end
14
+
15
+ # @return [Confstruct::Configuration] the configuration data
16
+ Config = Confstruct::Configuration.new do
17
+ repository_home nil
18
+ path_method :druid_tree
19
+ end
20
+
21
+ end
@@ -0,0 +1,18 @@
1
+ module Moab
2
+ class ObjectNotFoundException < RuntimeError
3
+
4
+ end
5
+
6
+ class FileNotFoundException < RuntimeError
7
+
8
+ end
9
+
10
+ class InvalidMetadataException < RuntimeError
11
+
12
+ end
13
+
14
+ class ValidationException < RuntimeError
15
+
16
+ end
17
+
18
+ end
@@ -0,0 +1,244 @@
1
+ require 'moab'
2
+
3
+ module Moab
4
+
5
+ # A container for a standard subset of a digital objects {FileManifestation} objects
6
+ # Used to segregate depositor content from repository metadata files
7
+ # This is a child element of {FileInventory}, which contains a full example
8
+ #
9
+ # ====Data Model
10
+ # * {FileInventory} = container for recording information about a collection of related files
11
+ # * <b>{FileGroup} [1..*] = subset allow segregation of content and metadata files</b>
12
+ # * {FileManifestation} [1..*] = snapshot of a file's filesystem characteristics
13
+ # * {FileSignature} [1] = file fixity information
14
+ # * {FileInstance} [1..*] = filepath and timestamp of any physical file having that signature
15
+ #
16
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
17
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
18
+ class FileGroup < Serializable
19
+
20
+ include HappyMapper
21
+
22
+ # The name of the XML element used to serialize this objects data
23
+ tag 'fileGroup'
24
+
25
+ # (see Serializable#initialize)
26
+ def initialize(opts={})
27
+ @signature_hash = OrderedHash.new
28
+ @data_source = ""
29
+ super(opts)
30
+ end
31
+
32
+ # @attribute
33
+ # @return [String] The name of the file group
34
+ attribute :group_id, String, :tag => 'groupId', :key => true
35
+
36
+ # @attribute
37
+ # @return [String] The directory location or other source of this groups file data
38
+ attribute :data_source, String, :tag => 'dataSource'
39
+
40
+ # @attribute
41
+ # @return [Integer] The total number of data files (dynamically calculated)
42
+ attribute :file_count, Integer, :tag => 'fileCount', :on_save => Proc.new {|i| i.to_s}
43
+
44
+ def file_count
45
+ files.inject(0) { |sum, manifestation| sum + manifestation.file_count }
46
+ end
47
+
48
+ # @attribute
49
+ # @return [Integer] The total size (in bytes) of all data files (dynamically calculated)
50
+ attribute :byte_count, Integer, :tag => 'byteCount', :on_save => Proc.new {|i| i.to_s}
51
+
52
+ def byte_count
53
+ files.inject(0) { |sum, manifestation| sum + manifestation.byte_count }
54
+ end
55
+
56
+ # @attribute
57
+ # @return [Integer] The total disk usage (in 1 kB blocks) of all data files (estimating du -k result) (dynamically calculated)
58
+ attribute :block_count, Integer, :tag => 'blockCount', :on_save => Proc.new {|i| i.to_s}
59
+
60
+ def block_count
61
+ files.inject(0) { |sum, manifestation| sum + manifestation.block_count }
62
+ end
63
+
64
+ # @return [Array<String>] The data fields to include in summary reports
65
+ def summary_fields
66
+ %w{group_id file_count byte_count block_count}
67
+ end
68
+
69
+
70
+ # @attribute
71
+ # @return [Array<FileManifestation>] The set of files comprising the group
72
+ has_many :files, FileManifestation, :tag => 'file'
73
+
74
+ def files
75
+ @signature_hash.values
76
+ end
77
+
78
+ # @return [OrderedHash<FileSignature, FileManifestation>] The actual in-memory store for the collection
79
+ # of {FileManifestation} objects that are contained in this file group.
80
+ attr_accessor :signature_hash
81
+
82
+ # @api internal
83
+ # @return [OrderedHash<String,FileSignature>] An index of file paths,
84
+ # used to test for existence of a filename in this file group
85
+ def path_hash
86
+ path_hash = OrderedHash.new
87
+ @signature_hash.each do |signature,manifestation|
88
+ manifestation.instances.each do |instance|
89
+ path_hash[instance.path] = signature
90
+ end
91
+ end
92
+ path_hash
93
+ end
94
+
95
+ # @return [Array<String>] The list of file paths in this group
96
+ def path_list
97
+ files.collect{|file| file.instances.collect{|instance| instance.path}}.flatten
98
+ end
99
+
100
+ # @api internal
101
+ # @param signature_subset [Array<FileSignature>] The signatures used to select the entries to return
102
+ # @return [OrderedHash<String,FileSignature>] A pathname,signature hash containing a subset of the filenames in this file group
103
+ def path_hash_subset(signature_subset)
104
+ path_hash = OrderedHash.new
105
+ signature_subset.each do |signature|
106
+ manifestation = @signature_hash[signature]
107
+ manifestation.instances.each do |instance|
108
+ path_hash[instance.path] = signature
109
+ end
110
+ end
111
+ path_hash
112
+ end
113
+
114
+ # @param manifestiation_array [Array<FileManifestation>] The collection of {FileManifestation} objects
115
+ # that are to be added to this file group. Used by HappyMapper when deserializing a {FileInventory} file
116
+ # Add the array of {FileManifestation} objects to this file group.
117
+ def files=(manifestiation_array)
118
+ manifestiation_array.each do |manifestiation|
119
+ add_file(manifestiation)
120
+ end
121
+ end
122
+
123
+ # @api internal
124
+ # @param manifestation [FileManifestation] The file manifestation to be added
125
+ # @return [void] Add a single {FileManifestation} object to this group
126
+ def add_file(manifestation)
127
+ manifestation.instances.each do |instance|
128
+ add_file_instance(manifestation.signature, instance)
129
+ end
130
+ end
131
+
132
+ # @api internal
133
+ # @param signature [FileSignature] The signature of the file instance to be added
134
+ # @param instance [FileInstance] The pathname and datetime of the file instance to be added
135
+ # @return [void] Add a single {FileSignature},{FileInstance} key/value pair to this group.
136
+ # Data is actually stored in the {#signature_hash}
137
+ def add_file_instance(signature,instance)
138
+ if @signature_hash.has_key?(signature)
139
+ manifestation = @signature_hash[signature]
140
+ else
141
+ manifestation = FileManifestation.new
142
+ manifestation.signature = signature
143
+ @signature_hash[signature] = manifestation
144
+ end
145
+ manifestation.instances << instance
146
+ end
147
+
148
+ # @param path [String] The path of the file to be removed
149
+ # @return [void] Remove a file from the inventory
150
+ # for example, the manifest inventory does not contain a file entry for itself
151
+ def remove_file_having_path(path)
152
+ signature = self.path_hash[path]
153
+ @signature_hash.delete(signature)
154
+ end
155
+
156
+ # @return [Pathname] The full path used as the basis of the relative paths reported
157
+ # in {FileInstance} objects that are children of the {FileManifestation} objects contained in this file group
158
+ attr_accessor :base_directory
159
+
160
+ def base_directory=(basepath)
161
+ @base_directory = Pathname.new(basepath).expand_path
162
+ end
163
+
164
+ # @api internal
165
+ # @param pathname [Pathname] The file path to be tested
166
+ # @return [Boolean] Test whether the given path is contained within the {#base_directory}
167
+ def is_descendent_of_base?(pathname)
168
+ raise("base_directory has not been set") if @base_directory.nil?
169
+ is_descendent = false
170
+ pathname.expand_path.ascend {|ancestor| is_descendent ||= (ancestor == @base_directory)}
171
+ raise("#{pathname} is not a descendent of #{@base_directory}") unless is_descendent
172
+ is_descendent
173
+ end
174
+
175
+ # @param directory [Pathame,String] The directory whose children are to be added to the file group
176
+ # @param signatures_from_bag [Hash<Pathname,Signature>] The fixity data already calculated for the files
177
+ # @param recursive [Boolean] if true, descend into child directories
178
+ # @return [FileGroup] Harvest a directory (using digest hash for fixity data) and add all files to the file group
179
+ def group_from_bagit_subdir(directory, signatures_from_bag, recursive=true)
180
+ @signatures_from_bag = signatures_from_bag
181
+ group_from_directory(directory, recursive)
182
+ end
183
+
184
+ # @api internal
185
+ # @param directory [Pathname,String] The location of the files to harvest
186
+ # @param recursive [Boolean] if true, descend into child directories
187
+ # @return [FileGroup] Harvest a directory and add all files to the file group
188
+ def group_from_directory(directory, recursive=true)
189
+ self.base_directory = directory
190
+ @data_source = @base_directory.to_s
191
+ harvest_directory(directory, recursive)
192
+ self
193
+ rescue Exception # Errno::ENOENT
194
+ @data_source = directory.to_s
195
+ self
196
+ end
197
+
198
+ # @api internal
199
+ # @param path [Pathname,String] pathname of the directory to be harvested
200
+ # @param recursive [Boolean] if true, also harvest subdirectories
201
+ # @param validated [Boolean] if true, path is verified to be descendant of (#base_directory)
202
+ # @return [void] Traverse a directory tree and add all files to the file group
203
+ # Note that unlike Find.find and Dir.glob, Pathname passes through symbolic links
204
+ # @see http://stackoverflow.com/questions/3974087/how-to-make-rubys-find-find-follow-symlinks
205
+ # @see http://stackoverflow.com/questions/357754/can-i-traverse-symlinked-directories-in-ruby-with-a-glob
206
+ def harvest_directory(path, recursive, validated=nil)
207
+ pathname=Pathname.new(path).expand_path
208
+ validated ||= is_descendent_of_base?(pathname)
209
+ pathname.children.sort.each do |child|
210
+ if child.basename.to_s == ".DS_Store"
211
+ next
212
+ elsif child.directory?
213
+ harvest_directory(child,recursive, validated) if recursive
214
+ else
215
+ add_physical_file(child, validated)
216
+ end
217
+ end
218
+ nil
219
+ end
220
+
221
+ # @api internal
222
+ # @param pathname [Pathname, String] The location of the file to be added
223
+ # @param validated [Boolean] if true, path is verified to be descendant of (#base_directory)
224
+ # @return [void] Add a single physical file's data to the array of files in this group.
225
+ # If fixity data was supplied in bag manifests, then utilize that data.
226
+ def add_physical_file(pathname, validated=nil)
227
+ pathname=Pathname.new(pathname).expand_path
228
+ validated ||= is_descendent_of_base?(pathname)
229
+ instance = FileInstance.new.instance_from_file(pathname, @base_directory)
230
+ if @signatures_from_bag && @signatures_from_bag[pathname]
231
+ signature = @signatures_from_bag[pathname]
232
+ unless signature.complete?
233
+ signature = signature.normalized_signature(pathname)
234
+ end
235
+ else
236
+ signature = FileSignature.new.signature_from_file(pathname)
237
+ end
238
+ add_file_instance(signature,instance)
239
+ end
240
+
241
+ end
242
+
243
+ end
244
+