moab-versioning 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/lib/moab.rb +59 -0
  3. data/lib/moab/bagger.rb +289 -0
  4. data/lib/moab/config.rb +21 -0
  5. data/lib/moab/exceptions.rb +18 -0
  6. data/lib/moab/file_group.rb +244 -0
  7. data/lib/moab/file_group_difference.rb +336 -0
  8. data/lib/moab/file_group_difference_subset.rb +45 -0
  9. data/lib/moab/file_instance.rb +82 -0
  10. data/lib/moab/file_instance_difference.rb +54 -0
  11. data/lib/moab/file_inventory.rb +279 -0
  12. data/lib/moab/file_inventory_difference.rb +132 -0
  13. data/lib/moab/file_manifestation.rb +85 -0
  14. data/lib/moab/file_signature.rb +200 -0
  15. data/lib/moab/signature_catalog.rb +195 -0
  16. data/lib/moab/signature_catalog_entry.rb +61 -0
  17. data/lib/moab/storage_object.rb +220 -0
  18. data/lib/moab/storage_object_version.rb +333 -0
  19. data/lib/moab/storage_repository.rb +57 -0
  20. data/lib/moab/storage_services.rb +104 -0
  21. data/lib/moab/verification_result.rb +83 -0
  22. data/lib/moab/version_metadata.rb +38 -0
  23. data/lib/moab/version_metadata_entry.rb +64 -0
  24. data/lib/moab/version_metadata_event.rb +47 -0
  25. data/lib/moab_stanford.rb +18 -0
  26. data/lib/monkey_patches.rb +65 -0
  27. data/lib/serializer.rb +36 -0
  28. data/lib/serializer/manifest.rb +76 -0
  29. data/lib/serializer/serializable.rb +178 -0
  30. data/lib/stanford/active_fedora_object.rb +34 -0
  31. data/lib/stanford/content_inventory.rb +236 -0
  32. data/lib/stanford/dor_metadata.rb +49 -0
  33. data/lib/stanford/storage_repository.rb +46 -0
  34. data/lib/stanford/storage_services.rb +66 -0
  35. data/lib/tasks/yard.rake +34 -0
  36. data/lib/tools/api_doc_generator.rb +396 -0
  37. data/lib/tools/spec_generator.rb +410 -0
  38. data/lib/tools/spec_generator_old.rb +49 -0
  39. metadata +252 -0
@@ -0,0 +1,336 @@
1
+ require 'moab'
2
+
3
+ module Moab
4
+
5
+ # Performs analysis and reports the differences between two matching {FileGroup} objects.
6
+ # The descending elements of the report hold a detailed breakdown of file-level differences, organized by change type.
7
+ # This stanza is a child element of {FileInventoryDifference}, the documentation of which contains a full example.
8
+ #
9
+ # In order to determine the detailed nature of the differences that are present between the two manifests,
10
+ # this algorithm first compares the sets of file signatures present in the groups being compared,
11
+ # then uses the result of that operation for subsequent analysis of filename correspondences.
12
+ #
13
+ # For the first step, a Ruby Hash is extracted from each of the of the two groups, with an array of
14
+ # {FileSignature} object used as hash keys, and the corresponding {FileInstance} arrays as the hash values.
15
+ # The set of keys from the basis hash can be compared against the keys from the other hash using {Array} operators:
16
+ # * <i>matching</i> = basis_array & other_array
17
+ # * <i>basis_only</i> = basis_array - other_array
18
+ # * <i>other_only</i> = other_array - basis_array
19
+ #
20
+ # For the second step of the comparison, the matching and non-matching sets of hash entries
21
+ # are further categorized as follows:
22
+ # * <i>identical</i> = signature and file path is the same in both basis and other file group
23
+ # * <i>renamed</i> = signature is unchanged, but the path has moved
24
+ # * <i>modified</i> = path is present in both groups, but the signature has changed
25
+ # * <i>deleted</i> = signature and path are only in the basis inventory
26
+ # * <i>added</i> = signature and path are only in the other inventor
27
+ #
28
+ # ====Data Model
29
+ # * {FileInventoryDifference} = compares two {FileInventory} instances based on file signatures and pathnames
30
+ # * <b>{FileGroupDifference} [1..*] = performs analysis and reports differences between two matching {FileGroup} objects</b>
31
+ # * {FileGroupDifferenceSubset} [1..5] = collects a set of file-level differences of a give change type
32
+ # * {FileInstanceDifference} [1..*] = contains difference information at the file level
33
+ # * {FileSignature} [1..2] = contains the file signature(s) of two file instances being compared
34
+ #
35
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
36
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
37
+ class FileGroupDifference < Serializable
38
+ include HappyMapper
39
+
40
+ # The name of the XML element used to serialize this objects data
41
+ tag 'fileGroupDifference'
42
+
43
+ # (see Serializable#initialize)
44
+ def initialize(opts={})
45
+ @subsets = Array.new
46
+ super(opts)
47
+ end
48
+
49
+ # @attribute
50
+ # @return [String] The name of the file group
51
+ attribute :group_id, String, :tag => 'groupId', :key => true
52
+
53
+ # @attribute
54
+ # @return [Integer] the total number of differences found between the two inventories that were compared (dynamically calculated)
55
+ attribute :difference_count, Integer, :tag => 'differenceCount', :on_save => Proc.new { |i| i.to_s }
56
+
57
+ def difference_count
58
+ @renamed + @modified + @deleted +@added
59
+ end
60
+
61
+ # @attribute
62
+ # @return [Integer] How many files were unchanged
63
+ attribute :identical, Integer, :on_save => Proc.new { |n| n.to_s }
64
+
65
+ # @attribute
66
+ # @return [Integer] How many files were renamed
67
+ attribute :renamed, Integer, :on_save => Proc.new { |n| n.to_s }
68
+
69
+ # @attribute
70
+ # @return [Integer] How many files were modified
71
+ attribute :modified, Integer, :on_save => Proc.new { |n| n.to_s }
72
+
73
+ # @attribute
74
+ # @return [Integer] How many files were deleted
75
+ attribute :deleted, Integer, :on_save => Proc.new { |n| n.to_s }
76
+
77
+ # @attribute
78
+ # @return [Integer] How many files were added
79
+ attribute :added, Integer, :on_save => Proc.new { |n| n.to_s }
80
+
81
+ # @attribute
82
+ # @return [Array<FileGroupDifferenceSubset>] A set of Arrays (one for each change type),
83
+ # each of which contains an collection of file-level differences having that change type.
84
+ has_many :subsets, FileGroupDifferenceSubset, :tag => 'subset'
85
+
86
+ # @param change [String] the change type to search for
87
+ # @return [FileGroupDifferenceSubset] Find a specified subset of changes
88
+ def subset(change)
89
+ @subsets.find{ |subset| subset.change == change}
90
+ end
91
+
92
+ # @return [Array<String>] The data fields to include in summary reports
93
+ def summary_fields
94
+ %w{group_id difference_count identical renamed modified deleted added}
95
+ end
96
+
97
+
98
+ # @api internal
99
+ # @return [FileGroupDifference] Clone just this element for inclusion in a versionMetadata structure
100
+ def summary()
101
+ FileGroupDifference.new(
102
+ :group_id => @group_id,
103
+ :identical => @identical,
104
+ :renamed => @renamed,
105
+ :modified => @modified,
106
+ :deleted => @deleted,
107
+ :added => @added
108
+ )
109
+ end
110
+
111
+
112
+ # @return [Hash<Symbol,Array>] Sets of filenames grouped by change type for use in performing file or metadata operations
113
+ def file_deltas()
114
+ # The hash to be returned
115
+ deltas = Hash.new
116
+ # Container for a files whose checksums matched across versions, but may have copies removed, added, or renamed
117
+ copied = Hash.new {|hash, key| hash[key] = {:basis=>Array.new , :other=>Array.new} }
118
+ # Capture the filename data
119
+ @subsets.each do |subset|
120
+ case subset.change
121
+ when "added"
122
+ deltas[:added] = subset.files.collect {|file| file.other_path}
123
+ when "deleted"
124
+ deltas[:deleted] = subset.files.collect {|file| file.basis_path}
125
+ when "modified"
126
+ deltas[:modified] = subset.files.collect {|file| file.basis_path}
127
+ when "identical"
128
+ subset.files.each do |instance|
129
+ signature = instance.signatures[0]
130
+ copied[signature][:basis] << instance.basis_path
131
+ copied[signature][:other] << instance.basis_path
132
+ end
133
+ when "renamed"
134
+ subset.files.each do |instance|
135
+ signature = instance.signatures[0]
136
+ copied[signature][:basis] << instance.basis_path unless (instance.basis_path.nil? or instance.basis_path.empty?)
137
+ copied[signature][:other] << instance.other_path unless (instance.other_path.nil? or instance.other_path.empty?)
138
+ end
139
+ end
140
+ end
141
+ deltas[:copied] = copied.values
142
+ deltas
143
+ end
144
+
145
+ # @api internal
146
+ # @param basis_hash [Hash] The first hash being compared
147
+ # @param other_hash [Hash] The second hash being compared
148
+ # @return [Array] Compare the keys of two hashes and return the intersection
149
+ def matching_keys(basis_hash, other_hash)
150
+ basis_hash.keys & other_hash.keys
151
+ end
152
+
153
+ # @api internal
154
+ # @param (see #matching_keys)
155
+ # @return [Array] Compare the keys of two hashes and return the keys unique to the first hash
156
+ def basis_only_keys(basis_hash, other_hash)
157
+ basis_hash.keys - other_hash.keys
158
+ end
159
+
160
+ # @api internal
161
+ # @param (see #matching_keys)
162
+ # @return [Array] Compare the keys of two hashes and return the keys unique to the second hash
163
+ def other_only_keys(basis_hash, other_hash)
164
+ other_hash.keys - basis_hash.keys
165
+ end
166
+
167
+ # @api internal
168
+ # @param basis_group [FileGroup] The file group that is the basis of the comparison
169
+ # @param other_group [FileGroup] The file group that is compared against the basis group
170
+ # @return [FileGroupDifference] Compare two file groups and return a differences report
171
+ def compare_file_groups(basis_group, other_group)
172
+ @group_id = basis_group.group_id
173
+ compare_matching_signatures(basis_group, other_group)
174
+ compare_non_matching_signatures(basis_group, other_group)
175
+ self
176
+ end
177
+
178
+ # @api internal
179
+ # @param (see #compare_file_groups)
180
+ # @return [void] For signatures that are present in both groups,
181
+ # report which file instances are identical or renamed
182
+ def compare_matching_signatures(basis_group, other_group)
183
+ matching_signatures = matching_keys(basis_group.signature_hash, other_group.signature_hash)
184
+ tabulate_unchanged_files(matching_signatures, basis_group.signature_hash, other_group.signature_hash)
185
+ tabulate_renamed_files(matching_signatures, basis_group.signature_hash, other_group.signature_hash)
186
+ end
187
+
188
+ # @api internal
189
+ # @param (see #compare_file_groups)
190
+ # @return [void] For signatures that are present in only one or the other group,
191
+ # report which file instances are modified, deleted, or added
192
+ def compare_non_matching_signatures(basis_group, other_group)
193
+ basis_only_signatures = basis_only_keys(basis_group.signature_hash, other_group.signature_hash)
194
+ other_only_signatures = other_only_keys(basis_group.signature_hash, other_group.signature_hash)
195
+ basis_path_hash = basis_group.path_hash_subset(basis_only_signatures)
196
+ other_path_hash = other_group.path_hash_subset(other_only_signatures)
197
+ tabulate_modified_files(basis_path_hash, other_path_hash)
198
+ tabulate_deleted_files(basis_path_hash, other_path_hash)
199
+ tabulate_added_files(basis_path_hash, other_path_hash)
200
+ end
201
+
202
+ # @api internal
203
+ # @param matching_signatures [Array<FileSignature>] The file signature of the file manifestations being compared
204
+ # @param basis_signature_hash [OrderedHash<FileSignature, FileManifestation>]
205
+ # Signature to file path mapping from the file group that is the basis of the comparison
206
+ # @param other_signature_hash [OrderedHash<FileSignature, FileManifestation>]
207
+ # Signature to file path mapping from the file group that is the being compared to the basis group
208
+ # @return [FileGroupDifferenceSubset]
209
+ # Container for reporting the set of file-level differences of type 'identical'
210
+ def tabulate_unchanged_files(matching_signatures, basis_signature_hash, other_signature_hash)
211
+ unchanged_files = Array.new
212
+ matching_signatures.each do |signature|
213
+ basis_paths = basis_signature_hash[signature].paths
214
+ other_paths = other_signature_hash[signature].paths
215
+ matching_paths = basis_paths & other_paths
216
+ matching_paths.each do |path|
217
+ fid = FileInstanceDifference.new(:change => 'identical')
218
+ fid.basis_path = path
219
+ fid.other_path = "same"
220
+ fid.signatures << signature
221
+ unchanged_files << fid
222
+ end
223
+ end
224
+ unchanged_subset = FileGroupDifferenceSubset.new(:change => 'identical')
225
+ unchanged_subset.files = unchanged_files
226
+ @subsets << unchanged_subset
227
+ @identical = unchanged_subset.count
228
+ unchanged_subset
229
+ end
230
+
231
+
232
+ # @api internal
233
+ # @param matching_signatures [Array<FileSignature>] The file signature of the file manifestations being compared
234
+ # @param basis_signature_hash [OrderedHash<FileSignature, FileManifestation>]
235
+ # Signature to file path mapping from the file group that is the basis of the comparison
236
+ # @param other_signature_hash [OrderedHash<FileSignature, FileManifestation>]
237
+ # Signature to file path mapping from the file group that is the being compared to the basis group
238
+ # @return [FileGroupDifferenceSubset]
239
+ # Container for reporting the set of file-level differences of type 'renamed'
240
+ def tabulate_renamed_files(matching_signatures, basis_signature_hash, other_signature_hash)
241
+ renamed_files = Array.new
242
+ matching_signatures.each do |signature|
243
+ basis_paths = basis_signature_hash[signature].paths
244
+ other_paths = other_signature_hash[signature].paths
245
+ basis_only_paths = basis_paths - other_paths
246
+ other_only_paths = other_paths - basis_paths
247
+ maxsize = [basis_only_paths.size, other_only_paths.size].max
248
+ (0..maxsize-1).each do |n|
249
+ fid = FileInstanceDifference.new(:change => 'renamed')
250
+ fid.basis_path = basis_only_paths[n]
251
+ fid.other_path = other_only_paths[n]
252
+ fid.signatures << signature
253
+ renamed_files << fid
254
+ end
255
+ end
256
+ renamed_subset = FileGroupDifferenceSubset.new(:change => 'renamed')
257
+ renamed_subset.files = renamed_files
258
+ @subsets << renamed_subset
259
+ @renamed = renamed_subset.count
260
+ renamed_subset
261
+ end
262
+
263
+
264
+ # @api internal
265
+ # @param basis_path_hash [OrderedHash<String,FileSignature>]
266
+ # The file paths and associated signatures for manifestations appearing only in the basis group
267
+ # @param other_path_hash [OrderedHash<String,FileSignature>]
268
+ # The file paths and associated signatures for manifestations appearing only in the other group
269
+ # @return [FileGroupDifferenceSubset]
270
+ # Container for reporting the set of file-level differences of type 'modified'
271
+ def tabulate_modified_files(basis_path_hash, other_path_hash)
272
+ modified_files = Array.new
273
+ matching_keys(basis_path_hash, other_path_hash).each do |path|
274
+ fid = FileInstanceDifference.new(:change => 'modified')
275
+ fid.basis_path = path
276
+ fid.other_path = "same"
277
+ fid.signatures << basis_path_hash[path]
278
+ fid.signatures << other_path_hash[path]
279
+ modified_files << fid
280
+ end
281
+ modified_subset = FileGroupDifferenceSubset.new(:change => 'modified')
282
+ modified_subset.files = modified_files
283
+ @subsets << modified_subset
284
+ @modified = modified_subset.count
285
+ modified_subset
286
+ end
287
+
288
+ # @api internal
289
+ # @param basis_path_hash [OrderedHash<String,FileSignature>]
290
+ # The file paths and associated signatures for manifestations appearing only in the basis group
291
+ # @param other_path_hash [OrderedHash<String,FileSignature>]
292
+ # The file paths and associated signatures for manifestations appearing only in the other group
293
+ # @return [FileGroupDifferenceSubset]
294
+ # Container for reporting the set of file-level differences of type 'deleted'
295
+ def tabulate_deleted_files(basis_path_hash, other_path_hash)
296
+ deleted_files = Array.new
297
+ basis_only_keys(basis_path_hash, other_path_hash).each do |path|
298
+ fid = FileInstanceDifference.new(:change => 'deleted')
299
+ fid.basis_path = path
300
+ fid.other_path = ""
301
+ fid.signatures << basis_path_hash[path]
302
+ deleted_files << fid
303
+ end
304
+ deleted_subset = FileGroupDifferenceSubset.new(:change => 'deleted')
305
+ deleted_subset.files = deleted_files
306
+ @subsets << deleted_subset
307
+ @deleted = deleted_subset.count
308
+ deleted_subset
309
+ end
310
+
311
+ # @api internal
312
+ # @param basis_path_hash [OrderedHash<String,FileSignature>]
313
+ # The file paths and associated signatures for manifestations appearing only in the basis group
314
+ # @param other_path_hash [OrderedHash<String,FileSignature>]
315
+ # The file paths and associated signatures for manifestations appearing only in the other group
316
+ # @return [FileGroupDifferenceSubset]
317
+ # Container for reporting the set of file-level differences of type 'added'
318
+ def tabulate_added_files(basis_path_hash, other_path_hash)
319
+ added_files = Array.new
320
+ other_only_keys(basis_path_hash, other_path_hash).each do |path|
321
+ fid = FileInstanceDifference.new(:change => 'added')
322
+ fid.basis_path = ""
323
+ fid.other_path = path
324
+ fid.signatures << other_path_hash[path]
325
+ added_files << fid
326
+ end
327
+ added_subset = FileGroupDifferenceSubset.new(:change => 'added')
328
+ added_subset.files = added_files
329
+ @subsets << added_subset
330
+ @added = added_subset.count
331
+ added_subset
332
+ end
333
+
334
+ end
335
+
336
+ end
@@ -0,0 +1,45 @@
1
+ require 'moab'
2
+
3
+ module Moab
4
+
5
+ # A container for reporting a set of file-level differences of the type specified by the change attribute
6
+ #
7
+ # ====Data Model
8
+ # * {FileInventoryDifference} = compares two {FileInventory} instances based on file signatures and pathnames
9
+ # * {FileGroupDifference} [1..*] = performs analysis and reports differences between two matching {FileGroup} objects
10
+ # * <b>{FileGroupDifferenceSubset} [1..5] = collects a set of file-level differences of a give change type</b>
11
+ # * {FileInstanceDifference} [1..*] = contains difference information at the file level
12
+ # * {FileSignature} [1..2] = contains the file signature(s) of two file instances being compared
13
+ #
14
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
15
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
16
+ class FileGroupDifferenceSubset < Serializable
17
+ include HappyMapper
18
+
19
+ # The name of the XML element used to serialize this objects data
20
+ tag 'subset'
21
+
22
+ # (see Serializable#initialize)
23
+ def initialize(opts={})
24
+ @files = Array.new
25
+ super(opts)
26
+ end
27
+
28
+ # @attribute
29
+ # @return [String] The type of change (identical|renamed|modified|deleted|added)
30
+ attribute :change, String, :key => true
31
+
32
+ # @attribute
33
+ # @return [Integer] How many files were changed
34
+ attribute :count, Integer, :on_save => Proc.new { |n| n.to_s }
35
+
36
+ def count
37
+ files.size
38
+ end
39
+
40
+ # @attribute
41
+ # @return [Array<FileInstanceDifference>] The set of file instances having this type of change
42
+ has_many :files, FileInstanceDifference, :tag=>'file'
43
+
44
+ end
45
+ end
@@ -0,0 +1,82 @@
1
+ require 'moab'
2
+
3
+ module Moab
4
+
5
+ # The file path and last modification date properties of a file
6
+ #
7
+ # ====Data Model
8
+ # * {FileInventory} = container for recording information about a collection of related files
9
+ # * {FileGroup} [1..*] = subset allow segregation of content and metadata files
10
+ # * {FileManifestation} [1..*] = snapshot of a file's filesystem characteristics
11
+ # * {FileSignature} [1] = file fixity information
12
+ # * <b>{FileInstance} [1..*] = filepath and timestamp of any physical file having that signature</b>
13
+ #
14
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
15
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
16
+ class FileInstance < Serializable
17
+
18
+ include HappyMapper
19
+
20
+ # The name of the XML element used to serialize this objects data
21
+ tag 'fileInstance'
22
+
23
+ # (see Serializable#initialize)
24
+ def initialize(opts={})
25
+ super(opts)
26
+ end
27
+
28
+ # @attribute
29
+ # @return [String] The id is the filename path, relative to the file group's base directory
30
+ attribute :path, String, :key => true
31
+
32
+ # @attribute
33
+ # @return [Time] gsub(/\n/,' ')
34
+ attribute :datetime, Time, :on_save => Proc.new {|t| t.to_s}
35
+
36
+ def datetime=(event_datetime)
37
+ @datetime=Time.input(event_datetime)
38
+ end
39
+
40
+ def datetime
41
+ Time.output(@datetime)
42
+ end
43
+
44
+
45
+ # @api internal
46
+ # @param pathname [Pathname] The location of the physical file
47
+ # @param base_directory [Pathname] The full path used as the basis of the relative paths reported
48
+ # @return [FileInstance] Returns a file instance containing a physical file's' properties
49
+ def instance_from_file(pathname, base_directory)
50
+ @path = pathname.expand_path.relative_path_from(base_directory.expand_path).to_s
51
+ @datetime = pathname.mtime.iso8601
52
+ self
53
+ end
54
+
55
+ # @api internal
56
+ # @param other [FileInstance] The other file instance being compared to this instance
57
+ # @return [Boolean] Returns true if self and other have the same path.
58
+ def eql?(other)
59
+ self.path == other.path
60
+ end
61
+
62
+ # @api internal
63
+ # (see #eql?)
64
+ def ==(other)
65
+ eql?(other)
66
+ end
67
+
68
+ # @api internal
69
+ # @return [Fixnum] Compute a hash-code for the path string.
70
+ # Two file instances with the same relative path will have the same hash code (and will compare using eql?).
71
+ # @note The hash and eql? methods override the methods inherited from Object.
72
+ # These methods ensure that instances of this class can be used as Hash keys. See
73
+ # * {http://www.paulbutcher.com/2007/10/navigating-the-equality-maze/}
74
+ # * {http://techbot.me/2011/05/ruby-basics-equality-operators-ruby/}
75
+ # Also overriden is {#==} so that equality tests in other contexts will also return the expected result.
76
+ def hash
77
+ path.hash
78
+ end
79
+
80
+ end
81
+
82
+ end