moab-versioning 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/moab.rb +59 -0
- data/lib/moab/bagger.rb +289 -0
- data/lib/moab/config.rb +21 -0
- data/lib/moab/exceptions.rb +18 -0
- data/lib/moab/file_group.rb +244 -0
- data/lib/moab/file_group_difference.rb +336 -0
- data/lib/moab/file_group_difference_subset.rb +45 -0
- data/lib/moab/file_instance.rb +82 -0
- data/lib/moab/file_instance_difference.rb +54 -0
- data/lib/moab/file_inventory.rb +279 -0
- data/lib/moab/file_inventory_difference.rb +132 -0
- data/lib/moab/file_manifestation.rb +85 -0
- data/lib/moab/file_signature.rb +200 -0
- data/lib/moab/signature_catalog.rb +195 -0
- data/lib/moab/signature_catalog_entry.rb +61 -0
- data/lib/moab/storage_object.rb +220 -0
- data/lib/moab/storage_object_version.rb +333 -0
- data/lib/moab/storage_repository.rb +57 -0
- data/lib/moab/storage_services.rb +104 -0
- data/lib/moab/verification_result.rb +83 -0
- data/lib/moab/version_metadata.rb +38 -0
- data/lib/moab/version_metadata_entry.rb +64 -0
- data/lib/moab/version_metadata_event.rb +47 -0
- data/lib/moab_stanford.rb +18 -0
- data/lib/monkey_patches.rb +65 -0
- data/lib/serializer.rb +36 -0
- data/lib/serializer/manifest.rb +76 -0
- data/lib/serializer/serializable.rb +178 -0
- data/lib/stanford/active_fedora_object.rb +34 -0
- data/lib/stanford/content_inventory.rb +236 -0
- data/lib/stanford/dor_metadata.rb +49 -0
- data/lib/stanford/storage_repository.rb +46 -0
- data/lib/stanford/storage_services.rb +66 -0
- data/lib/tasks/yard.rake +34 -0
- data/lib/tools/api_doc_generator.rb +396 -0
- data/lib/tools/spec_generator.rb +410 -0
- data/lib/tools/spec_generator_old.rb +49 -0
- metadata +252 -0
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'moab'
|
2
|
+
|
3
|
+
module Moab
|
4
|
+
|
5
|
+
# A container for a file signature and all the physical file instances that have that signature
|
6
|
+
# This element has one child {FileSignature} element, and one or more {FileInstance} elements
|
7
|
+
# Regarding the class name, see
|
8
|
+
# * {http://en.wikipedia.org/wiki/Functional_Requirements_for_Bibliographic_Records}
|
9
|
+
# * {http://planets-project.eu/events/copenhagen-2009/pre-reading/docs/Modelling%20Organizational%20Preservation%20Goals_Angela%20Dappert.pdf}
|
10
|
+
#
|
11
|
+
# ====Data Model
|
12
|
+
# * {FileInventory} = container for recording information about a collection of related files
|
13
|
+
# * {FileGroup} [1..*] = subset allow segregation of content and metadata files.
|
14
|
+
# * <b>{FileManifestation} [1..*] = snapshot of a file's filesystem characteristics</b>
|
15
|
+
# * {FileSignature} [1] = file fixity information
|
16
|
+
# * {FileInstance} [1..*] = filepath and timestamp of any physical file having that signature
|
17
|
+
#
|
18
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
19
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
20
|
+
class FileManifestation < Serializable
|
21
|
+
include HappyMapper
|
22
|
+
|
23
|
+
# The name of the XML element used to serialize this objects data
|
24
|
+
tag 'file'
|
25
|
+
|
26
|
+
# (see Serializable#initialize)
|
27
|
+
def initialize(opts={})
|
28
|
+
@instances = Array.new
|
29
|
+
super(opts)
|
30
|
+
end
|
31
|
+
|
32
|
+
# @attribute
|
33
|
+
# @return [FileSignature] The fixity data of the file instance
|
34
|
+
element :signature, FileSignature, :tag => 'fileSignature'
|
35
|
+
|
36
|
+
def signature
|
37
|
+
@signature.is_a?(Array) ? @signature[0] : @signature
|
38
|
+
end
|
39
|
+
|
40
|
+
def signature=(signature)
|
41
|
+
@signature = signature.is_a?(Array) ? signature[0] : signature
|
42
|
+
end
|
43
|
+
|
44
|
+
# @attribute
|
45
|
+
# @return [Array<FileInstance>] The location(s) of the file manifestation's file instances
|
46
|
+
has_many :instances, FileInstance, :tag => 'fileInstance'
|
47
|
+
|
48
|
+
# @api internal
|
49
|
+
# @return [Array<String>] Create an array from all the file paths of the child {FileInstance} objects
|
50
|
+
def paths
|
51
|
+
instances.collect { |i| i.path}
|
52
|
+
end
|
53
|
+
|
54
|
+
# @api internal
|
55
|
+
# @return [Integer] The total number of {FileInstance} objects in this manifestation.
|
56
|
+
# (Number of files that share this manifestation's signature)
|
57
|
+
def file_count
|
58
|
+
instances.size
|
59
|
+
end
|
60
|
+
|
61
|
+
# @api internal
|
62
|
+
# @return [Integer] The total size (in bytes) of all files that share this manifestation's signature
|
63
|
+
def byte_count
|
64
|
+
file_count.to_i * signature.size.to_i
|
65
|
+
end
|
66
|
+
|
67
|
+
# @api internal
|
68
|
+
# @return [Integer] The total disk usage (in 1 kB blocks) of all files that share this manifestation's signature
|
69
|
+
# (estimating du -k result)
|
70
|
+
def block_count
|
71
|
+
block_size=1024
|
72
|
+
instance_blocks = (signature.size.to_i + block_size - 1)/block_size
|
73
|
+
file_count * instance_blocks
|
74
|
+
end
|
75
|
+
|
76
|
+
# @api internal
|
77
|
+
# @param other [FileManifestation] The {FileManifestation} object to compare with self
|
78
|
+
# @return [Boolean] True if {FileManifestation} objects have same content
|
79
|
+
def ==(other)
|
80
|
+
(self.signature == other.signature) && (self.instances == other.instances)
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
@@ -0,0 +1,200 @@
|
|
1
|
+
require 'moab'
|
2
|
+
|
3
|
+
module Moab
|
4
|
+
|
5
|
+
# The fixity properties of a file, used to determine file content equivalence regardless of filename.
|
6
|
+
# Placing this data in a class by itself facilitates using file size together with the MD5 and SHA1 checksums
|
7
|
+
# as a single key when doing comparisons against other file instances. The Moab design assumes that this file signature
|
8
|
+
# is sufficiently unique to act as a comparator for determining file equality and eliminating file redundancy.
|
9
|
+
#
|
10
|
+
# The use of signatures for a compare-by-hash mechanism introduces a miniscule (but non-zero) risk
|
11
|
+
# that two non-identical files will have the same checksum. While this risk is only about 1 in 1048
|
12
|
+
# when using the SHA1 checksum alone, it can be reduced even further (to about 1 in 1086)
|
13
|
+
# if we use the MD5 and SHA1 checksums together. And we gain a bit more comfort by including a comparison of file sizes.
|
14
|
+
#
|
15
|
+
# Finally, the "collision" risk is reduced by isolation of each digital object's file pool within an object folder,
|
16
|
+
# instead of in a common storage area shared by the whole repository.
|
17
|
+
#
|
18
|
+
# ====Data Model
|
19
|
+
# * {FileInventory} = container for recording information about a collection of related files
|
20
|
+
# * {FileGroup} [1..*] = subset allow segregation of content and metadata files
|
21
|
+
# * {FileManifestation} [1..*] = snapshot of a file's filesystem characteristics
|
22
|
+
# * <b>{FileSignature} [1] = file fixity information</b>
|
23
|
+
# * {FileInstance} [1..*] = filepath and timestamp of any physical file having that signature
|
24
|
+
#
|
25
|
+
# * {SignatureCatalog} = lookup table containing a cumulative collection of all files ever ingested
|
26
|
+
# * {SignatureCatalogEntry} [1..*] = an row in the lookup table containing storage information about a single file
|
27
|
+
# * <b>{FileSignature} [1] = file fixity information</b>
|
28
|
+
#
|
29
|
+
# * {FileInventoryDifference} = compares two {FileInventory} instances based on file signatures and pathnames
|
30
|
+
# * {FileGroupDifference} [1..*] = performs analysis and reports differences between two matching {FileGroup} objects
|
31
|
+
# * {FileGroupDifferenceSubset} [1..5] = collects a set of file-level differences of a give change type
|
32
|
+
# * {FileInstanceDifference} [1..*] = contains difference information at the file level
|
33
|
+
# * <b>{FileSignature} [1..2] = contains the file signature(s) of two file instances being compared</b>
|
34
|
+
#
|
35
|
+
# @see http://searchstorage.techtarget.com/feature/The-skinny-on-data-deduplication
|
36
|
+
# @see http://www.ibm.com/developerworks/wikis/download/attachments/106987789/TSMDataDeduplication.pdf
|
37
|
+
# @see https://www.redlegg.com/pdf_file/3_1320410927_HowDataDedupeWorks_WP_100809.pdf
|
38
|
+
# @see http://www.library.yale.edu/iac/DPC/AN_DPC_FixityChecksFinal11.pdf
|
39
|
+
#
|
40
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
41
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
42
|
+
class FileSignature < Serializable
|
43
|
+
|
44
|
+
include HappyMapper
|
45
|
+
|
46
|
+
# The name of the XML element used to serialize this objects data
|
47
|
+
tag 'fileSignature'
|
48
|
+
|
49
|
+
# (see Serializable#initialize)
|
50
|
+
def initialize(opts={})
|
51
|
+
super(opts)
|
52
|
+
end
|
53
|
+
|
54
|
+
# @attribute
|
55
|
+
# @return [Integer] The size of the file in bytes
|
56
|
+
attribute :size, Integer, :on_save => Proc.new { |n| n.to_s }
|
57
|
+
|
58
|
+
# @attribute
|
59
|
+
# @return [String] The MD5 checksum value of the file
|
60
|
+
attribute :md5, String, :on_save => Proc.new { |n| n.nil? ? "" : n.to_s }
|
61
|
+
|
62
|
+
# @attribute
|
63
|
+
# @return [String] The SHA1 checksum value of the file
|
64
|
+
attribute :sha1, String, :on_save => Proc.new { |n| n.nil? ? "" : n.to_s }
|
65
|
+
|
66
|
+
# @attribute
|
67
|
+
# @return [String] The SHA256 checksum value of the file
|
68
|
+
attribute :sha256, String, :on_save => Proc.new { |n| n.nil? ? "" : n.to_s }
|
69
|
+
|
70
|
+
# @param type [Symbol,String] The type of checksum
|
71
|
+
# @param value [String] The checksum value
|
72
|
+
# @return [void] Set the value of the specified checksum type
|
73
|
+
def set_checksum(type,value)
|
74
|
+
case type.to_s.downcase.to_sym
|
75
|
+
when :md5
|
76
|
+
@md5 = value
|
77
|
+
when :sha1
|
78
|
+
@sha1 = value
|
79
|
+
when :sha256
|
80
|
+
@sha256 = value
|
81
|
+
else
|
82
|
+
raise "Unknown checksum type '#{type.to_s}'"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# @return [Hash<Symbol,String>] A hash of the checksum data
|
87
|
+
def checksums
|
88
|
+
checksum_hash = OrderedHash.new
|
89
|
+
checksum_hash[:md5] = @md5
|
90
|
+
checksum_hash[:sha1] = @sha1
|
91
|
+
checksum_hash[:sha256] = @sha256
|
92
|
+
checksum_hash.delete_if { |key,value| value.nil? or value.empty?}
|
93
|
+
checksum_hash
|
94
|
+
end
|
95
|
+
|
96
|
+
# @return [Boolean] The signature contains all of the 3 desired checksums
|
97
|
+
def complete?
|
98
|
+
checksums.size == 3
|
99
|
+
end
|
100
|
+
|
101
|
+
# @api internal
|
102
|
+
# @return [Hash<Symbol,String>] A hash of fixity data from this signataure object
|
103
|
+
def fixity
|
104
|
+
fixity_hash = OrderedHash.new
|
105
|
+
fixity_hash[:size] = @size.to_s
|
106
|
+
fixity_hash.merge!(checksums)
|
107
|
+
fixity_hash
|
108
|
+
end
|
109
|
+
|
110
|
+
# @api internal
|
111
|
+
# @param other [FileSignature] The other file signature being compared to this signature
|
112
|
+
# @return [Boolean] Returns true if self and other have comparable fixity data.
|
113
|
+
def eql?(other)
|
114
|
+
return false if self.size.to_i != other.size.to_i
|
115
|
+
self_checksums = self.checksums
|
116
|
+
other_checksums = other.checksums
|
117
|
+
matching_keys = self_checksums.keys & other_checksums.keys
|
118
|
+
return false if matching_keys.size == 0
|
119
|
+
matching_keys.each do |key|
|
120
|
+
return false if self_checksums[key] != other_checksums[key]
|
121
|
+
end
|
122
|
+
true
|
123
|
+
end
|
124
|
+
|
125
|
+
# @api internal
|
126
|
+
# (see #eql?)
|
127
|
+
def ==(other)
|
128
|
+
eql?(other)
|
129
|
+
end
|
130
|
+
|
131
|
+
# @api internal
|
132
|
+
# @return [Fixnum] Compute a hash-code for the fixity value array.
|
133
|
+
# Two file instances with the same content will have the same hash code (and will compare using eql?).
|
134
|
+
# @note The hash and eql? methods override the methods inherited from Object.
|
135
|
+
# These methods ensure that instances of this class can be used as Hash keys. See
|
136
|
+
# * {http://www.paulbutcher.com/2007/10/navigating-the-equality-maze/}
|
137
|
+
# * {http://techbot.me/2011/05/ruby-basics-equality-operators-ruby/}
|
138
|
+
# Also overriden is {#==} so that equality tests in other contexts will also return the expected result.
|
139
|
+
def hash
|
140
|
+
@size.to_i
|
141
|
+
end
|
142
|
+
|
143
|
+
# @api internal
|
144
|
+
# @param pathname [Pathname] The location of the file to be digested
|
145
|
+
# @return [FileSignature] Generate a FileSignature instance containing size and checksums for a physical file
|
146
|
+
def signature_from_file(pathname)
|
147
|
+
@size = pathname.size
|
148
|
+
md5_digest = Digest::MD5.new
|
149
|
+
sha1_digest = Digest::SHA1.new
|
150
|
+
sha256_digest = Digest::SHA2.new(256)
|
151
|
+
pathname.open("r") do |stream|
|
152
|
+
while buffer = stream.read(8192)
|
153
|
+
md5_digest.update(buffer)
|
154
|
+
sha1_digest.update(buffer)
|
155
|
+
sha256_digest.update(buffer)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
@md5 = md5_digest.hexdigest
|
159
|
+
@sha1 = sha1_digest.hexdigest
|
160
|
+
@sha256 = sha256_digest.hexdigest
|
161
|
+
self
|
162
|
+
end
|
163
|
+
|
164
|
+
# @api internal
|
165
|
+
# @param pathname [Pathname] The location of the file whose full signature will be returned
|
166
|
+
# @return [FileSignature] The full signature derived from the file, unless the fixity is inconsistent with current values
|
167
|
+
def normalized_signature(pathname)
|
168
|
+
sig_from_file = FileSignature.new.signature_from_file(pathname)
|
169
|
+
if self.eql?(sig_from_file)
|
170
|
+
# The full signature from file is consistent with current values
|
171
|
+
return sig_from_file
|
172
|
+
else
|
173
|
+
# One or more of the fixity values is inconsistent, so raise an exception
|
174
|
+
raise "Signature inconsistent between inventory and file for #{pathname}: #{self.diff(sig_from_file).inspect}"
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
# @return [Hash<Symbol,String>] Key is type (e.g. :sha1), value is checksum names (e.g. ['SHA-1', 'SHA1'])
|
179
|
+
def FileSignature.checksum_names_for_type
|
180
|
+
names_for_type = OrderedHash.new
|
181
|
+
names_for_type[:md5] = ['MD5']
|
182
|
+
names_for_type[:sha1] = ['SHA-1', 'SHA1']
|
183
|
+
names_for_type[:sha256] = ['SHA-256', 'SHA256']
|
184
|
+
names_for_type
|
185
|
+
end
|
186
|
+
|
187
|
+
# @return [Hash<String, Symbol>] Key is checksum name (e.g. MD5), value is checksum type (e.g. :md5)
|
188
|
+
def FileSignature.checksum_type_for_name
|
189
|
+
type_for_name = OrderedHash.new
|
190
|
+
self.checksum_names_for_type.each do |type, names|
|
191
|
+
names.each do |name|
|
192
|
+
type_for_name[name] = type
|
193
|
+
end
|
194
|
+
end
|
195
|
+
type_for_name
|
196
|
+
end
|
197
|
+
|
198
|
+
end
|
199
|
+
|
200
|
+
end
|
@@ -0,0 +1,195 @@
|
|
1
|
+
require 'moab'
|
2
|
+
|
3
|
+
module Moab
|
4
|
+
|
5
|
+
# A digital object's Signature Catalog is derived from an filtered aggregation of the file inventories
|
6
|
+
# of a digital object's set of versions. (see {#update})
|
7
|
+
# It has an entry for every file (identified by {FileSignature}) found in any of the versions,
|
8
|
+
# along with a record of the SDR storage location that was used to preserve a single file instance.
|
9
|
+
# Once this catalog has been populated, it has multiple uses:
|
10
|
+
# * The signature index is used to determine which files of a newly submitted object version
|
11
|
+
# are new additions and which are duplicates of files previously ingested. (See {#version_additions})
|
12
|
+
# (When a new version contains a mixture of added files and files carried over from the previous version
|
13
|
+
# we only need to store the files from the new version that have unique file signatures.)
|
14
|
+
# * Reconstruction of an object version (see {StorageObject#reconstruct_version}) requires a combination
|
15
|
+
# of a full version's {FileInventory} and the SignatureCatalog.
|
16
|
+
# * The catalog can also be used for performing consistency checks between manifest files and storage
|
17
|
+
#
|
18
|
+
# ====Data Model
|
19
|
+
# * <b>{SignatureCatalog} = lookup table containing a cumulative collection of all files ever ingested</b>
|
20
|
+
# * {SignatureCatalogEntry} [1..*] = an row in the lookup table containing storage information about a single file
|
21
|
+
# * {FileSignature} [1] = file fixity information
|
22
|
+
#
|
23
|
+
# @example {include:file:spec/fixtures/derivatives/manifests/v3/signatureCatalog.xml}
|
24
|
+
# @see StorageObject
|
25
|
+
# @see Bagger
|
26
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
27
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
28
|
+
class SignatureCatalog < Manifest
|
29
|
+
include HappyMapper
|
30
|
+
|
31
|
+
# The name of the XML element used to serialize this objects data
|
32
|
+
tag 'signatureCatalog'
|
33
|
+
|
34
|
+
# (see Serializable#initialize)
|
35
|
+
def initialize(opts={})
|
36
|
+
@entries = Array.new
|
37
|
+
@signature_hash = OrderedHash.new
|
38
|
+
super(opts)
|
39
|
+
end
|
40
|
+
|
41
|
+
# @attribute
|
42
|
+
# @return [String] The object ID (druid)
|
43
|
+
attribute :digital_object_id, String, :tag => 'objectId'
|
44
|
+
|
45
|
+
# @attribute
|
46
|
+
# @return [Integer] The ordinal version number
|
47
|
+
attribute :version_id, Integer, :tag => 'versionId', :key => true, :on_save => Proc.new {|n| n.to_s}
|
48
|
+
|
49
|
+
# @return [String] The unique identifier concatenating digital object id with version id
|
50
|
+
def composite_key
|
51
|
+
@digital_object_id + '-' + StorageObject.version_dirname(@version_id)
|
52
|
+
end
|
53
|
+
|
54
|
+
# @attribute
|
55
|
+
# @return [Time] The datetime at which the catalog was updated
|
56
|
+
attribute :catalog_datetime, Time, :tag => 'catalogDatetime', :on_save => Proc.new {|t| t.to_s}
|
57
|
+
|
58
|
+
def catalog_datetime=(datetime)
|
59
|
+
@catalog_datetime=Time.input(datetime)
|
60
|
+
end
|
61
|
+
|
62
|
+
def catalog_datetime
|
63
|
+
Time.output(@catalog_datetime)
|
64
|
+
end
|
65
|
+
|
66
|
+
# @attribute
|
67
|
+
# @return [Integer] The total number of data files (dynamically calculated)
|
68
|
+
attribute :file_count, Integer, :tag => 'fileCount', :on_save => Proc.new {|t| t.to_s}
|
69
|
+
|
70
|
+
def file_count
|
71
|
+
entries.size
|
72
|
+
end
|
73
|
+
|
74
|
+
# @attribute
|
75
|
+
# @return [Integer] The total size (in bytes) of all data files (dynamically calculated)
|
76
|
+
attribute :byte_count, Integer, :tag => 'byteCount', :on_save => Proc.new {|t| t.to_s}
|
77
|
+
|
78
|
+
def byte_count
|
79
|
+
entries.inject(0) { |sum, entry| sum + entry.signature.size.to_i }
|
80
|
+
end
|
81
|
+
|
82
|
+
# @attribute
|
83
|
+
# @return [Integer] The total disk usage (in 1 kB blocks) of all data files (estimating du -k result) (dynamically calculated)
|
84
|
+
attribute :block_count, Integer, :tag => 'blockCount', :on_save => Proc.new {|t| t.to_s}
|
85
|
+
|
86
|
+
def block_count
|
87
|
+
block_size=1024
|
88
|
+
entries.inject(0) { |sum, entry| sum + (entry.signature.size.to_i + block_size - 1)/block_size }
|
89
|
+
end
|
90
|
+
|
91
|
+
# @return [Array<String>] The data fields to include in summary reports
|
92
|
+
def summary_fields
|
93
|
+
%w{digital_object_id version_id catalog_datetime file_count byte_count block_count}
|
94
|
+
end
|
95
|
+
|
96
|
+
# @attribute
|
97
|
+
# @return [Array<SignatureCatalogEntry>] The set of data groups comprising the version
|
98
|
+
has_many :entries, SignatureCatalogEntry, :tag => 'entry'
|
99
|
+
|
100
|
+
def entries=(entry_array)
|
101
|
+
entry_array.each do |entry|
|
102
|
+
add_entry(entry)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# @return [OrderedHash] An index having {FileSignature} objects as keys and {SignatureCatalogEntry} objects as values
|
107
|
+
attr_accessor :signature_hash
|
108
|
+
|
109
|
+
# @api internal
|
110
|
+
# @param entry [SignatureCatalogEntry] The new catalog entry
|
111
|
+
# @return [void] Add a new entry to the catalog and to the {#signature_hash} index
|
112
|
+
def add_entry(entry)
|
113
|
+
@signature_hash[entry.signature] = entry
|
114
|
+
entries << entry
|
115
|
+
end
|
116
|
+
|
117
|
+
# @param [FileSignature] file_signature The signature of the file whose path is sought
|
118
|
+
# @return [String] The object-relative path of the file having the specified signature
|
119
|
+
def catalog_filepath(file_signature)
|
120
|
+
catalog_entry = @signature_hash[file_signature]
|
121
|
+
raise FileNotFoundException, "catalog entry not found for #{file_signature.fixity.inspect} in #{@digital_object_id} - #{@version_id}" if catalog_entry.nil?
|
122
|
+
catalog_entry.storage_path
|
123
|
+
end
|
124
|
+
|
125
|
+
# @param group [FileGroup] A group of the files from a file inventory
|
126
|
+
# @param group_pathname [Pathname] The location of the directory containing the group's files
|
127
|
+
# @return [void] Inspect and upgrade the group's signature data to include all desired checksums
|
128
|
+
def normalize_group_signatures(group, group_pathname=nil)
|
129
|
+
unless group_pathname.nil?
|
130
|
+
group_pathname = Pathname(group_pathname)
|
131
|
+
raise "Could not locate #{group_pathname}" unless group_pathname.exist?
|
132
|
+
end
|
133
|
+
group.files.each do |file|
|
134
|
+
unless file.signature.complete?
|
135
|
+
if @signature_hash.has_key?(file.signature)
|
136
|
+
file.signature = @signature_hash.find {|k,v| k == file.signature}[0]
|
137
|
+
elsif group_pathname
|
138
|
+
file_pathname = group_pathname.join(file.instances[0].path)
|
139
|
+
file.signature = file.signature.normalized_signature(file_pathname)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# @api external
|
146
|
+
# @param version_inventory [FileInventory] The complete inventory of the files comprising a digital object version
|
147
|
+
# @param data_pathname [Pathname] The location of the object's data directory
|
148
|
+
# @return [void] Compares the {FileSignature} entries in the new versions {FileInventory} against the signatures
|
149
|
+
# in this catalog and create new {SignatureCatalogEntry} addtions to the catalog
|
150
|
+
# @example {include:file:spec/features/catalog/catalog_update_spec.rb}
|
151
|
+
def update(version_inventory, data_pathname)
|
152
|
+
version_inventory.groups.each do |group|
|
153
|
+
group.files.each do |file|
|
154
|
+
unless @signature_hash.has_key?(file.signature)
|
155
|
+
entry = SignatureCatalogEntry.new
|
156
|
+
entry.version_id = version_inventory.version_id
|
157
|
+
entry.group_id = group.group_id
|
158
|
+
entry.path = file.instances[0].path
|
159
|
+
if file.signature.complete?
|
160
|
+
entry.signature = file.signature
|
161
|
+
else
|
162
|
+
file_pathname = data_pathname.join(group.group_id,entry.path)
|
163
|
+
entry.signature = file.signature.normalized_signature(file_pathname)
|
164
|
+
end
|
165
|
+
add_entry(entry)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
@version_id = version_inventory.version_id
|
170
|
+
@catalog_datetime = Time.now
|
171
|
+
end
|
172
|
+
|
173
|
+
# @api external
|
174
|
+
# @param version_inventory (see #update)
|
175
|
+
# @return [FileInventory] Retrurns a filtered copy of the input inventory
|
176
|
+
# containing only those files that were added in this version
|
177
|
+
# @example {include:file:spec/features/catalog/version_additions_spec.rb}
|
178
|
+
def version_additions(version_inventory)
|
179
|
+
version_additions = FileInventory.new(:type=>'additions')
|
180
|
+
version_additions.copy_ids(version_inventory)
|
181
|
+
version_inventory.groups.each do |group|
|
182
|
+
group_addtions = FileGroup.new(:group_id => group.group_id)
|
183
|
+
group.files.each do |file|
|
184
|
+
unless @signature_hash.has_key?(file.signature)
|
185
|
+
group_addtions.add_file_instance(file.signature,file.instances[0])
|
186
|
+
end
|
187
|
+
end
|
188
|
+
version_additions.groups << group_addtions if group_addtions.files.size > 0
|
189
|
+
end
|
190
|
+
version_additions
|
191
|
+
end
|
192
|
+
|
193
|
+
end
|
194
|
+
|
195
|
+
end
|