moab-versioning 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/moab.rb +59 -0
- data/lib/moab/bagger.rb +289 -0
- data/lib/moab/config.rb +21 -0
- data/lib/moab/exceptions.rb +18 -0
- data/lib/moab/file_group.rb +244 -0
- data/lib/moab/file_group_difference.rb +336 -0
- data/lib/moab/file_group_difference_subset.rb +45 -0
- data/lib/moab/file_instance.rb +82 -0
- data/lib/moab/file_instance_difference.rb +54 -0
- data/lib/moab/file_inventory.rb +279 -0
- data/lib/moab/file_inventory_difference.rb +132 -0
- data/lib/moab/file_manifestation.rb +85 -0
- data/lib/moab/file_signature.rb +200 -0
- data/lib/moab/signature_catalog.rb +195 -0
- data/lib/moab/signature_catalog_entry.rb +61 -0
- data/lib/moab/storage_object.rb +220 -0
- data/lib/moab/storage_object_version.rb +333 -0
- data/lib/moab/storage_repository.rb +57 -0
- data/lib/moab/storage_services.rb +104 -0
- data/lib/moab/verification_result.rb +83 -0
- data/lib/moab/version_metadata.rb +38 -0
- data/lib/moab/version_metadata_entry.rb +64 -0
- data/lib/moab/version_metadata_event.rb +47 -0
- data/lib/moab_stanford.rb +18 -0
- data/lib/monkey_patches.rb +65 -0
- data/lib/serializer.rb +36 -0
- data/lib/serializer/manifest.rb +76 -0
- data/lib/serializer/serializable.rb +178 -0
- data/lib/stanford/active_fedora_object.rb +34 -0
- data/lib/stanford/content_inventory.rb +236 -0
- data/lib/stanford/dor_metadata.rb +49 -0
- data/lib/stanford/storage_repository.rb +46 -0
- data/lib/stanford/storage_services.rb +66 -0
- data/lib/tasks/yard.rake +34 -0
- data/lib/tools/api_doc_generator.rb +396 -0
- data/lib/tools/spec_generator.rb +410 -0
- data/lib/tools/spec_generator_old.rb +49 -0
- metadata +252 -0
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'moab'
|
2
|
+
|
3
|
+
module Moab
|
4
|
+
|
5
|
+
# A container for a file signature and all the physical file instances that have that signature
|
6
|
+
# This element has one child {FileSignature} element, and one or more {FileInstance} elements
|
7
|
+
# Regarding the class name, see
|
8
|
+
# * {http://en.wikipedia.org/wiki/Functional_Requirements_for_Bibliographic_Records}
|
9
|
+
# * {http://planets-project.eu/events/copenhagen-2009/pre-reading/docs/Modelling%20Organizational%20Preservation%20Goals_Angela%20Dappert.pdf}
|
10
|
+
#
|
11
|
+
# ====Data Model
|
12
|
+
# * {FileInventory} = container for recording information about a collection of related files
|
13
|
+
# * {FileGroup} [1..*] = subset allow segregation of content and metadata files.
|
14
|
+
# * <b>{FileManifestation} [1..*] = snapshot of a file's filesystem characteristics</b>
|
15
|
+
# * {FileSignature} [1] = file fixity information
|
16
|
+
# * {FileInstance} [1..*] = filepath and timestamp of any physical file having that signature
|
17
|
+
#
|
18
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
19
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
20
|
+
class FileManifestation < Serializable
|
21
|
+
include HappyMapper
|
22
|
+
|
23
|
+
# The name of the XML element used to serialize this objects data
|
24
|
+
tag 'file'
|
25
|
+
|
26
|
+
# (see Serializable#initialize)
|
27
|
+
def initialize(opts={})
|
28
|
+
@instances = Array.new
|
29
|
+
super(opts)
|
30
|
+
end
|
31
|
+
|
32
|
+
# @attribute
|
33
|
+
# @return [FileSignature] The fixity data of the file instance
|
34
|
+
element :signature, FileSignature, :tag => 'fileSignature'
|
35
|
+
|
36
|
+
def signature
|
37
|
+
@signature.is_a?(Array) ? @signature[0] : @signature
|
38
|
+
end
|
39
|
+
|
40
|
+
def signature=(signature)
|
41
|
+
@signature = signature.is_a?(Array) ? signature[0] : signature
|
42
|
+
end
|
43
|
+
|
44
|
+
# @attribute
|
45
|
+
# @return [Array<FileInstance>] The location(s) of the file manifestation's file instances
|
46
|
+
has_many :instances, FileInstance, :tag => 'fileInstance'
|
47
|
+
|
48
|
+
# @api internal
|
49
|
+
# @return [Array<String>] Create an array from all the file paths of the child {FileInstance} objects
|
50
|
+
def paths
|
51
|
+
instances.collect { |i| i.path}
|
52
|
+
end
|
53
|
+
|
54
|
+
# @api internal
|
55
|
+
# @return [Integer] The total number of {FileInstance} objects in this manifestation.
|
56
|
+
# (Number of files that share this manifestation's signature)
|
57
|
+
def file_count
|
58
|
+
instances.size
|
59
|
+
end
|
60
|
+
|
61
|
+
# @api internal
|
62
|
+
# @return [Integer] The total size (in bytes) of all files that share this manifestation's signature
|
63
|
+
def byte_count
|
64
|
+
file_count.to_i * signature.size.to_i
|
65
|
+
end
|
66
|
+
|
67
|
+
# @api internal
|
68
|
+
# @return [Integer] The total disk usage (in 1 kB blocks) of all files that share this manifestation's signature
|
69
|
+
# (estimating du -k result)
|
70
|
+
def block_count
|
71
|
+
block_size=1024
|
72
|
+
instance_blocks = (signature.size.to_i + block_size - 1)/block_size
|
73
|
+
file_count * instance_blocks
|
74
|
+
end
|
75
|
+
|
76
|
+
# @api internal
|
77
|
+
# @param other [FileManifestation] The {FileManifestation} object to compare with self
|
78
|
+
# @return [Boolean] True if {FileManifestation} objects have same content
|
79
|
+
def ==(other)
|
80
|
+
(self.signature == other.signature) && (self.instances == other.instances)
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
@@ -0,0 +1,200 @@
|
|
1
|
+
require 'moab'
|
2
|
+
|
3
|
+
module Moab
|
4
|
+
|
5
|
+
# The fixity properties of a file, used to determine file content equivalence regardless of filename.
|
6
|
+
# Placing this data in a class by itself facilitates using file size together with the MD5 and SHA1 checksums
|
7
|
+
# as a single key when doing comparisons against other file instances. The Moab design assumes that this file signature
|
8
|
+
# is sufficiently unique to act as a comparator for determining file equality and eliminating file redundancy.
|
9
|
+
#
|
10
|
+
# The use of signatures for a compare-by-hash mechanism introduces a miniscule (but non-zero) risk
|
11
|
+
# that two non-identical files will have the same checksum. While this risk is only about 1 in 1048
|
12
|
+
# when using the SHA1 checksum alone, it can be reduced even further (to about 1 in 1086)
|
13
|
+
# if we use the MD5 and SHA1 checksums together. And we gain a bit more comfort by including a comparison of file sizes.
|
14
|
+
#
|
15
|
+
# Finally, the "collision" risk is reduced by isolation of each digital object's file pool within an object folder,
|
16
|
+
# instead of in a common storage area shared by the whole repository.
|
17
|
+
#
|
18
|
+
# ====Data Model
|
19
|
+
# * {FileInventory} = container for recording information about a collection of related files
|
20
|
+
# * {FileGroup} [1..*] = subset allow segregation of content and metadata files
|
21
|
+
# * {FileManifestation} [1..*] = snapshot of a file's filesystem characteristics
|
22
|
+
# * <b>{FileSignature} [1] = file fixity information</b>
|
23
|
+
# * {FileInstance} [1..*] = filepath and timestamp of any physical file having that signature
|
24
|
+
#
|
25
|
+
# * {SignatureCatalog} = lookup table containing a cumulative collection of all files ever ingested
|
26
|
+
# * {SignatureCatalogEntry} [1..*] = an row in the lookup table containing storage information about a single file
|
27
|
+
# * <b>{FileSignature} [1] = file fixity information</b>
|
28
|
+
#
|
29
|
+
# * {FileInventoryDifference} = compares two {FileInventory} instances based on file signatures and pathnames
|
30
|
+
# * {FileGroupDifference} [1..*] = performs analysis and reports differences between two matching {FileGroup} objects
|
31
|
+
# * {FileGroupDifferenceSubset} [1..5] = collects a set of file-level differences of a give change type
|
32
|
+
# * {FileInstanceDifference} [1..*] = contains difference information at the file level
|
33
|
+
# * <b>{FileSignature} [1..2] = contains the file signature(s) of two file instances being compared</b>
|
34
|
+
#
|
35
|
+
# @see http://searchstorage.techtarget.com/feature/The-skinny-on-data-deduplication
|
36
|
+
# @see http://www.ibm.com/developerworks/wikis/download/attachments/106987789/TSMDataDeduplication.pdf
|
37
|
+
# @see https://www.redlegg.com/pdf_file/3_1320410927_HowDataDedupeWorks_WP_100809.pdf
|
38
|
+
# @see http://www.library.yale.edu/iac/DPC/AN_DPC_FixityChecksFinal11.pdf
|
39
|
+
#
|
40
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
41
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
42
|
+
class FileSignature < Serializable
|
43
|
+
|
44
|
+
include HappyMapper
|
45
|
+
|
46
|
+
# The name of the XML element used to serialize this objects data
|
47
|
+
tag 'fileSignature'
|
48
|
+
|
49
|
+
# (see Serializable#initialize)
|
50
|
+
def initialize(opts={})
|
51
|
+
super(opts)
|
52
|
+
end
|
53
|
+
|
54
|
+
# @attribute
|
55
|
+
# @return [Integer] The size of the file in bytes
|
56
|
+
attribute :size, Integer, :on_save => Proc.new { |n| n.to_s }
|
57
|
+
|
58
|
+
# @attribute
|
59
|
+
# @return [String] The MD5 checksum value of the file
|
60
|
+
attribute :md5, String, :on_save => Proc.new { |n| n.nil? ? "" : n.to_s }
|
61
|
+
|
62
|
+
# @attribute
|
63
|
+
# @return [String] The SHA1 checksum value of the file
|
64
|
+
attribute :sha1, String, :on_save => Proc.new { |n| n.nil? ? "" : n.to_s }
|
65
|
+
|
66
|
+
# @attribute
|
67
|
+
# @return [String] The SHA256 checksum value of the file
|
68
|
+
attribute :sha256, String, :on_save => Proc.new { |n| n.nil? ? "" : n.to_s }
|
69
|
+
|
70
|
+
# @param type [Symbol,String] The type of checksum
|
71
|
+
# @param value [String] The checksum value
|
72
|
+
# @return [void] Set the value of the specified checksum type
|
73
|
+
def set_checksum(type,value)
|
74
|
+
case type.to_s.downcase.to_sym
|
75
|
+
when :md5
|
76
|
+
@md5 = value
|
77
|
+
when :sha1
|
78
|
+
@sha1 = value
|
79
|
+
when :sha256
|
80
|
+
@sha256 = value
|
81
|
+
else
|
82
|
+
raise "Unknown checksum type '#{type.to_s}'"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# @return [Hash<Symbol,String>] A hash of the checksum data
|
87
|
+
def checksums
|
88
|
+
checksum_hash = OrderedHash.new
|
89
|
+
checksum_hash[:md5] = @md5
|
90
|
+
checksum_hash[:sha1] = @sha1
|
91
|
+
checksum_hash[:sha256] = @sha256
|
92
|
+
checksum_hash.delete_if { |key,value| value.nil? or value.empty?}
|
93
|
+
checksum_hash
|
94
|
+
end
|
95
|
+
|
96
|
+
# @return [Boolean] The signature contains all of the 3 desired checksums
|
97
|
+
def complete?
|
98
|
+
checksums.size == 3
|
99
|
+
end
|
100
|
+
|
101
|
+
# @api internal
|
102
|
+
# @return [Hash<Symbol,String>] A hash of fixity data from this signataure object
|
103
|
+
def fixity
|
104
|
+
fixity_hash = OrderedHash.new
|
105
|
+
fixity_hash[:size] = @size.to_s
|
106
|
+
fixity_hash.merge!(checksums)
|
107
|
+
fixity_hash
|
108
|
+
end
|
109
|
+
|
110
|
+
# @api internal
|
111
|
+
# @param other [FileSignature] The other file signature being compared to this signature
|
112
|
+
# @return [Boolean] Returns true if self and other have comparable fixity data.
|
113
|
+
def eql?(other)
|
114
|
+
return false if self.size.to_i != other.size.to_i
|
115
|
+
self_checksums = self.checksums
|
116
|
+
other_checksums = other.checksums
|
117
|
+
matching_keys = self_checksums.keys & other_checksums.keys
|
118
|
+
return false if matching_keys.size == 0
|
119
|
+
matching_keys.each do |key|
|
120
|
+
return false if self_checksums[key] != other_checksums[key]
|
121
|
+
end
|
122
|
+
true
|
123
|
+
end
|
124
|
+
|
125
|
+
# @api internal
|
126
|
+
# (see #eql?)
|
127
|
+
def ==(other)
|
128
|
+
eql?(other)
|
129
|
+
end
|
130
|
+
|
131
|
+
# @api internal
|
132
|
+
# @return [Fixnum] Compute a hash-code for the fixity value array.
|
133
|
+
# Two file instances with the same content will have the same hash code (and will compare using eql?).
|
134
|
+
# @note The hash and eql? methods override the methods inherited from Object.
|
135
|
+
# These methods ensure that instances of this class can be used as Hash keys. See
|
136
|
+
# * {http://www.paulbutcher.com/2007/10/navigating-the-equality-maze/}
|
137
|
+
# * {http://techbot.me/2011/05/ruby-basics-equality-operators-ruby/}
|
138
|
+
# Also overriden is {#==} so that equality tests in other contexts will also return the expected result.
|
139
|
+
def hash
|
140
|
+
@size.to_i
|
141
|
+
end
|
142
|
+
|
143
|
+
# @api internal
|
144
|
+
# @param pathname [Pathname] The location of the file to be digested
|
145
|
+
# @return [FileSignature] Generate a FileSignature instance containing size and checksums for a physical file
|
146
|
+
def signature_from_file(pathname)
|
147
|
+
@size = pathname.size
|
148
|
+
md5_digest = Digest::MD5.new
|
149
|
+
sha1_digest = Digest::SHA1.new
|
150
|
+
sha256_digest = Digest::SHA2.new(256)
|
151
|
+
pathname.open("r") do |stream|
|
152
|
+
while buffer = stream.read(8192)
|
153
|
+
md5_digest.update(buffer)
|
154
|
+
sha1_digest.update(buffer)
|
155
|
+
sha256_digest.update(buffer)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
@md5 = md5_digest.hexdigest
|
159
|
+
@sha1 = sha1_digest.hexdigest
|
160
|
+
@sha256 = sha256_digest.hexdigest
|
161
|
+
self
|
162
|
+
end
|
163
|
+
|
164
|
+
# @api internal
|
165
|
+
# @param pathname [Pathname] The location of the file whose full signature will be returned
|
166
|
+
# @return [FileSignature] The full signature derived from the file, unless the fixity is inconsistent with current values
|
167
|
+
def normalized_signature(pathname)
|
168
|
+
sig_from_file = FileSignature.new.signature_from_file(pathname)
|
169
|
+
if self.eql?(sig_from_file)
|
170
|
+
# The full signature from file is consistent with current values
|
171
|
+
return sig_from_file
|
172
|
+
else
|
173
|
+
# One or more of the fixity values is inconsistent, so raise an exception
|
174
|
+
raise "Signature inconsistent between inventory and file for #{pathname}: #{self.diff(sig_from_file).inspect}"
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
# @return [Hash<Symbol,String>] Key is type (e.g. :sha1), value is checksum names (e.g. ['SHA-1', 'SHA1'])
|
179
|
+
def FileSignature.checksum_names_for_type
|
180
|
+
names_for_type = OrderedHash.new
|
181
|
+
names_for_type[:md5] = ['MD5']
|
182
|
+
names_for_type[:sha1] = ['SHA-1', 'SHA1']
|
183
|
+
names_for_type[:sha256] = ['SHA-256', 'SHA256']
|
184
|
+
names_for_type
|
185
|
+
end
|
186
|
+
|
187
|
+
# @return [Hash<String, Symbol>] Key is checksum name (e.g. MD5), value is checksum type (e.g. :md5)
|
188
|
+
def FileSignature.checksum_type_for_name
|
189
|
+
type_for_name = OrderedHash.new
|
190
|
+
self.checksum_names_for_type.each do |type, names|
|
191
|
+
names.each do |name|
|
192
|
+
type_for_name[name] = type
|
193
|
+
end
|
194
|
+
end
|
195
|
+
type_for_name
|
196
|
+
end
|
197
|
+
|
198
|
+
end
|
199
|
+
|
200
|
+
end
|
@@ -0,0 +1,195 @@
|
|
1
|
+
require 'moab'
|
2
|
+
|
3
|
+
module Moab
|
4
|
+
|
5
|
+
# A digital object's Signature Catalog is derived from an filtered aggregation of the file inventories
|
6
|
+
# of a digital object's set of versions. (see {#update})
|
7
|
+
# It has an entry for every file (identified by {FileSignature}) found in any of the versions,
|
8
|
+
# along with a record of the SDR storage location that was used to preserve a single file instance.
|
9
|
+
# Once this catalog has been populated, it has multiple uses:
|
10
|
+
# * The signature index is used to determine which files of a newly submitted object version
|
11
|
+
# are new additions and which are duplicates of files previously ingested. (See {#version_additions})
|
12
|
+
# (When a new version contains a mixture of added files and files carried over from the previous version
|
13
|
+
# we only need to store the files from the new version that have unique file signatures.)
|
14
|
+
# * Reconstruction of an object version (see {StorageObject#reconstruct_version}) requires a combination
|
15
|
+
# of a full version's {FileInventory} and the SignatureCatalog.
|
16
|
+
# * The catalog can also be used for performing consistency checks between manifest files and storage
|
17
|
+
#
|
18
|
+
# ====Data Model
|
19
|
+
# * <b>{SignatureCatalog} = lookup table containing a cumulative collection of all files ever ingested</b>
|
20
|
+
# * {SignatureCatalogEntry} [1..*] = an row in the lookup table containing storage information about a single file
|
21
|
+
# * {FileSignature} [1] = file fixity information
|
22
|
+
#
|
23
|
+
# @example {include:file:spec/fixtures/derivatives/manifests/v3/signatureCatalog.xml}
|
24
|
+
# @see StorageObject
|
25
|
+
# @see Bagger
|
26
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
27
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
28
|
+
class SignatureCatalog < Manifest
|
29
|
+
include HappyMapper
|
30
|
+
|
31
|
+
# The name of the XML element used to serialize this objects data
|
32
|
+
tag 'signatureCatalog'
|
33
|
+
|
34
|
+
# (see Serializable#initialize)
|
35
|
+
def initialize(opts={})
|
36
|
+
@entries = Array.new
|
37
|
+
@signature_hash = OrderedHash.new
|
38
|
+
super(opts)
|
39
|
+
end
|
40
|
+
|
41
|
+
# @attribute
|
42
|
+
# @return [String] The object ID (druid)
|
43
|
+
attribute :digital_object_id, String, :tag => 'objectId'
|
44
|
+
|
45
|
+
# @attribute
|
46
|
+
# @return [Integer] The ordinal version number
|
47
|
+
attribute :version_id, Integer, :tag => 'versionId', :key => true, :on_save => Proc.new {|n| n.to_s}
|
48
|
+
|
49
|
+
# @return [String] The unique identifier concatenating digital object id with version id
|
50
|
+
def composite_key
|
51
|
+
@digital_object_id + '-' + StorageObject.version_dirname(@version_id)
|
52
|
+
end
|
53
|
+
|
54
|
+
# @attribute
|
55
|
+
# @return [Time] The datetime at which the catalog was updated
|
56
|
+
attribute :catalog_datetime, Time, :tag => 'catalogDatetime', :on_save => Proc.new {|t| t.to_s}
|
57
|
+
|
58
|
+
def catalog_datetime=(datetime)
|
59
|
+
@catalog_datetime=Time.input(datetime)
|
60
|
+
end
|
61
|
+
|
62
|
+
def catalog_datetime
|
63
|
+
Time.output(@catalog_datetime)
|
64
|
+
end
|
65
|
+
|
66
|
+
# @attribute
|
67
|
+
# @return [Integer] The total number of data files (dynamically calculated)
|
68
|
+
attribute :file_count, Integer, :tag => 'fileCount', :on_save => Proc.new {|t| t.to_s}
|
69
|
+
|
70
|
+
def file_count
|
71
|
+
entries.size
|
72
|
+
end
|
73
|
+
|
74
|
+
# @attribute
|
75
|
+
# @return [Integer] The total size (in bytes) of all data files (dynamically calculated)
|
76
|
+
attribute :byte_count, Integer, :tag => 'byteCount', :on_save => Proc.new {|t| t.to_s}
|
77
|
+
|
78
|
+
def byte_count
|
79
|
+
entries.inject(0) { |sum, entry| sum + entry.signature.size.to_i }
|
80
|
+
end
|
81
|
+
|
82
|
+
# @attribute
|
83
|
+
# @return [Integer] The total disk usage (in 1 kB blocks) of all data files (estimating du -k result) (dynamically calculated)
|
84
|
+
attribute :block_count, Integer, :tag => 'blockCount', :on_save => Proc.new {|t| t.to_s}
|
85
|
+
|
86
|
+
def block_count
|
87
|
+
block_size=1024
|
88
|
+
entries.inject(0) { |sum, entry| sum + (entry.signature.size.to_i + block_size - 1)/block_size }
|
89
|
+
end
|
90
|
+
|
91
|
+
# @return [Array<String>] The data fields to include in summary reports
|
92
|
+
def summary_fields
|
93
|
+
%w{digital_object_id version_id catalog_datetime file_count byte_count block_count}
|
94
|
+
end
|
95
|
+
|
96
|
+
# @attribute
|
97
|
+
# @return [Array<SignatureCatalogEntry>] The set of data groups comprising the version
|
98
|
+
has_many :entries, SignatureCatalogEntry, :tag => 'entry'
|
99
|
+
|
100
|
+
def entries=(entry_array)
|
101
|
+
entry_array.each do |entry|
|
102
|
+
add_entry(entry)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# @return [OrderedHash] An index having {FileSignature} objects as keys and {SignatureCatalogEntry} objects as values
|
107
|
+
attr_accessor :signature_hash
|
108
|
+
|
109
|
+
# @api internal
|
110
|
+
# @param entry [SignatureCatalogEntry] The new catalog entry
|
111
|
+
# @return [void] Add a new entry to the catalog and to the {#signature_hash} index
|
112
|
+
def add_entry(entry)
|
113
|
+
@signature_hash[entry.signature] = entry
|
114
|
+
entries << entry
|
115
|
+
end
|
116
|
+
|
117
|
+
# @param [FileSignature] file_signature The signature of the file whose path is sought
|
118
|
+
# @return [String] The object-relative path of the file having the specified signature
|
119
|
+
def catalog_filepath(file_signature)
|
120
|
+
catalog_entry = @signature_hash[file_signature]
|
121
|
+
raise FileNotFoundException, "catalog entry not found for #{file_signature.fixity.inspect} in #{@digital_object_id} - #{@version_id}" if catalog_entry.nil?
|
122
|
+
catalog_entry.storage_path
|
123
|
+
end
|
124
|
+
|
125
|
+
# @param group [FileGroup] A group of the files from a file inventory
|
126
|
+
# @param group_pathname [Pathname] The location of the directory containing the group's files
|
127
|
+
# @return [void] Inspect and upgrade the group's signature data to include all desired checksums
|
128
|
+
def normalize_group_signatures(group, group_pathname=nil)
|
129
|
+
unless group_pathname.nil?
|
130
|
+
group_pathname = Pathname(group_pathname)
|
131
|
+
raise "Could not locate #{group_pathname}" unless group_pathname.exist?
|
132
|
+
end
|
133
|
+
group.files.each do |file|
|
134
|
+
unless file.signature.complete?
|
135
|
+
if @signature_hash.has_key?(file.signature)
|
136
|
+
file.signature = @signature_hash.find {|k,v| k == file.signature}[0]
|
137
|
+
elsif group_pathname
|
138
|
+
file_pathname = group_pathname.join(file.instances[0].path)
|
139
|
+
file.signature = file.signature.normalized_signature(file_pathname)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# @api external
|
146
|
+
# @param version_inventory [FileInventory] The complete inventory of the files comprising a digital object version
|
147
|
+
# @param data_pathname [Pathname] The location of the object's data directory
|
148
|
+
# @return [void] Compares the {FileSignature} entries in the new versions {FileInventory} against the signatures
|
149
|
+
# in this catalog and create new {SignatureCatalogEntry} addtions to the catalog
|
150
|
+
# @example {include:file:spec/features/catalog/catalog_update_spec.rb}
|
151
|
+
def update(version_inventory, data_pathname)
|
152
|
+
version_inventory.groups.each do |group|
|
153
|
+
group.files.each do |file|
|
154
|
+
unless @signature_hash.has_key?(file.signature)
|
155
|
+
entry = SignatureCatalogEntry.new
|
156
|
+
entry.version_id = version_inventory.version_id
|
157
|
+
entry.group_id = group.group_id
|
158
|
+
entry.path = file.instances[0].path
|
159
|
+
if file.signature.complete?
|
160
|
+
entry.signature = file.signature
|
161
|
+
else
|
162
|
+
file_pathname = data_pathname.join(group.group_id,entry.path)
|
163
|
+
entry.signature = file.signature.normalized_signature(file_pathname)
|
164
|
+
end
|
165
|
+
add_entry(entry)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
@version_id = version_inventory.version_id
|
170
|
+
@catalog_datetime = Time.now
|
171
|
+
end
|
172
|
+
|
173
|
+
# @api external
|
174
|
+
# @param version_inventory (see #update)
|
175
|
+
# @return [FileInventory] Retrurns a filtered copy of the input inventory
|
176
|
+
# containing only those files that were added in this version
|
177
|
+
# @example {include:file:spec/features/catalog/version_additions_spec.rb}
|
178
|
+
def version_additions(version_inventory)
|
179
|
+
version_additions = FileInventory.new(:type=>'additions')
|
180
|
+
version_additions.copy_ids(version_inventory)
|
181
|
+
version_inventory.groups.each do |group|
|
182
|
+
group_addtions = FileGroup.new(:group_id => group.group_id)
|
183
|
+
group.files.each do |file|
|
184
|
+
unless @signature_hash.has_key?(file.signature)
|
185
|
+
group_addtions.add_file_instance(file.signature,file.instances[0])
|
186
|
+
end
|
187
|
+
end
|
188
|
+
version_additions.groups << group_addtions if group_addtions.files.size > 0
|
189
|
+
end
|
190
|
+
version_additions
|
191
|
+
end
|
192
|
+
|
193
|
+
end
|
194
|
+
|
195
|
+
end
|