moab-versioning 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/lib/moab.rb +59 -0
  3. data/lib/moab/bagger.rb +289 -0
  4. data/lib/moab/config.rb +21 -0
  5. data/lib/moab/exceptions.rb +18 -0
  6. data/lib/moab/file_group.rb +244 -0
  7. data/lib/moab/file_group_difference.rb +336 -0
  8. data/lib/moab/file_group_difference_subset.rb +45 -0
  9. data/lib/moab/file_instance.rb +82 -0
  10. data/lib/moab/file_instance_difference.rb +54 -0
  11. data/lib/moab/file_inventory.rb +279 -0
  12. data/lib/moab/file_inventory_difference.rb +132 -0
  13. data/lib/moab/file_manifestation.rb +85 -0
  14. data/lib/moab/file_signature.rb +200 -0
  15. data/lib/moab/signature_catalog.rb +195 -0
  16. data/lib/moab/signature_catalog_entry.rb +61 -0
  17. data/lib/moab/storage_object.rb +220 -0
  18. data/lib/moab/storage_object_version.rb +333 -0
  19. data/lib/moab/storage_repository.rb +57 -0
  20. data/lib/moab/storage_services.rb +104 -0
  21. data/lib/moab/verification_result.rb +83 -0
  22. data/lib/moab/version_metadata.rb +38 -0
  23. data/lib/moab/version_metadata_entry.rb +64 -0
  24. data/lib/moab/version_metadata_event.rb +47 -0
  25. data/lib/moab_stanford.rb +18 -0
  26. data/lib/monkey_patches.rb +65 -0
  27. data/lib/serializer.rb +36 -0
  28. data/lib/serializer/manifest.rb +76 -0
  29. data/lib/serializer/serializable.rb +178 -0
  30. data/lib/stanford/active_fedora_object.rb +34 -0
  31. data/lib/stanford/content_inventory.rb +236 -0
  32. data/lib/stanford/dor_metadata.rb +49 -0
  33. data/lib/stanford/storage_repository.rb +46 -0
  34. data/lib/stanford/storage_services.rb +66 -0
  35. data/lib/tasks/yard.rake +34 -0
  36. data/lib/tools/api_doc_generator.rb +396 -0
  37. data/lib/tools/spec_generator.rb +410 -0
  38. data/lib/tools/spec_generator_old.rb +49 -0
  39. metadata +252 -0
@@ -0,0 +1,336 @@
1
+ require 'moab'
2
+
3
+ module Moab
4
+
5
+ # Performs analysis and reports the differences between two matching {FileGroup} objects.
6
+ # The descending elements of the report hold a detailed breakdown of file-level differences, organized by change type.
7
+ # This stanza is a child element of {FileInventoryDifference}, the documentation of which contains a full example.
8
+ #
9
+ # In order to determine the detailed nature of the differences that are present between the two manifests,
10
+ # this algorithm first compares the sets of file signatures present in the groups being compared,
11
+ # then uses the result of that operation for subsequent analysis of filename correspondences.
12
+ #
13
+ # For the first step, a Ruby Hash is extracted from each of the of the two groups, with an array of
14
+ # {FileSignature} object used as hash keys, and the corresponding {FileInstance} arrays as the hash values.
15
+ # The set of keys from the basis hash can be compared against the keys from the other hash using {Array} operators:
16
+ # * <i>matching</i> = basis_array & other_array
17
+ # * <i>basis_only</i> = basis_array - other_array
18
+ # * <i>other_only</i> = other_array - basis_array
19
+ #
20
+ # For the second step of the comparison, the matching and non-matching sets of hash entries
21
+ # are further categorized as follows:
22
+ # * <i>identical</i> = signature and file path is the same in both basis and other file group
23
+ # * <i>renamed</i> = signature is unchanged, but the path has moved
24
+ # * <i>modified</i> = path is present in both groups, but the signature has changed
25
+ # * <i>deleted</i> = signature and path are only in the basis inventory
26
+ # * <i>added</i> = signature and path are only in the other inventor
27
+ #
28
+ # ====Data Model
29
+ # * {FileInventoryDifference} = compares two {FileInventory} instances based on file signatures and pathnames
30
+ # * <b>{FileGroupDifference} [1..*] = performs analysis and reports differences between two matching {FileGroup} objects</b>
31
+ # * {FileGroupDifferenceSubset} [1..5] = collects a set of file-level differences of a give change type
32
+ # * {FileInstanceDifference} [1..*] = contains difference information at the file level
33
+ # * {FileSignature} [1..2] = contains the file signature(s) of two file instances being compared
34
+ #
35
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
36
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
37
+ class FileGroupDifference < Serializable
38
+ include HappyMapper
39
+
40
+ # The name of the XML element used to serialize this objects data
41
+ tag 'fileGroupDifference'
42
+
43
+ # (see Serializable#initialize)
44
+ def initialize(opts={})
45
+ @subsets = Array.new
46
+ super(opts)
47
+ end
48
+
49
+ # @attribute
50
+ # @return [String] The name of the file group
51
+ attribute :group_id, String, :tag => 'groupId', :key => true
52
+
53
+ # @attribute
54
+ # @return [Integer] the total number of differences found between the two inventories that were compared (dynamically calculated)
55
+ attribute :difference_count, Integer, :tag => 'differenceCount', :on_save => Proc.new { |i| i.to_s }
56
+
57
+ def difference_count
58
+ @renamed + @modified + @deleted +@added
59
+ end
60
+
61
+ # @attribute
62
+ # @return [Integer] How many files were unchanged
63
+ attribute :identical, Integer, :on_save => Proc.new { |n| n.to_s }
64
+
65
+ # @attribute
66
+ # @return [Integer] How many files were renamed
67
+ attribute :renamed, Integer, :on_save => Proc.new { |n| n.to_s }
68
+
69
+ # @attribute
70
+ # @return [Integer] How many files were modified
71
+ attribute :modified, Integer, :on_save => Proc.new { |n| n.to_s }
72
+
73
+ # @attribute
74
+ # @return [Integer] How many files were deleted
75
+ attribute :deleted, Integer, :on_save => Proc.new { |n| n.to_s }
76
+
77
+ # @attribute
78
+ # @return [Integer] How many files were added
79
+ attribute :added, Integer, :on_save => Proc.new { |n| n.to_s }
80
+
81
+ # @attribute
82
+ # @return [Array<FileGroupDifferenceSubset>] A set of Arrays (one for each change type),
83
+ # each of which contains an collection of file-level differences having that change type.
84
+ has_many :subsets, FileGroupDifferenceSubset, :tag => 'subset'
85
+
86
+ # @param change [String] the change type to search for
87
+ # @return [FileGroupDifferenceSubset] Find a specified subset of changes
88
+ def subset(change)
89
+ @subsets.find{ |subset| subset.change == change}
90
+ end
91
+
92
+ # @return [Array<String>] The data fields to include in summary reports
93
+ def summary_fields
94
+ %w{group_id difference_count identical renamed modified deleted added}
95
+ end
96
+
97
+
98
+ # @api internal
99
+ # @return [FileGroupDifference] Clone just this element for inclusion in a versionMetadata structure
100
+ def summary()
101
+ FileGroupDifference.new(
102
+ :group_id => @group_id,
103
+ :identical => @identical,
104
+ :renamed => @renamed,
105
+ :modified => @modified,
106
+ :deleted => @deleted,
107
+ :added => @added
108
+ )
109
+ end
110
+
111
+
112
+ # @return [Hash<Symbol,Array>] Sets of filenames grouped by change type for use in performing file or metadata operations
113
+ def file_deltas()
114
+ # The hash to be returned
115
+ deltas = Hash.new
116
+ # Container for a files whose checksums matched across versions, but may have copies removed, added, or renamed
117
+ copied = Hash.new {|hash, key| hash[key] = {:basis=>Array.new , :other=>Array.new} }
118
+ # Capture the filename data
119
+ @subsets.each do |subset|
120
+ case subset.change
121
+ when "added"
122
+ deltas[:added] = subset.files.collect {|file| file.other_path}
123
+ when "deleted"
124
+ deltas[:deleted] = subset.files.collect {|file| file.basis_path}
125
+ when "modified"
126
+ deltas[:modified] = subset.files.collect {|file| file.basis_path}
127
+ when "identical"
128
+ subset.files.each do |instance|
129
+ signature = instance.signatures[0]
130
+ copied[signature][:basis] << instance.basis_path
131
+ copied[signature][:other] << instance.basis_path
132
+ end
133
+ when "renamed"
134
+ subset.files.each do |instance|
135
+ signature = instance.signatures[0]
136
+ copied[signature][:basis] << instance.basis_path unless (instance.basis_path.nil? or instance.basis_path.empty?)
137
+ copied[signature][:other] << instance.other_path unless (instance.other_path.nil? or instance.other_path.empty?)
138
+ end
139
+ end
140
+ end
141
+ deltas[:copied] = copied.values
142
+ deltas
143
+ end
144
+
145
+ # @api internal
146
+ # @param basis_hash [Hash] The first hash being compared
147
+ # @param other_hash [Hash] The second hash being compared
148
+ # @return [Array] Compare the keys of two hashes and return the intersection
149
+ def matching_keys(basis_hash, other_hash)
150
+ basis_hash.keys & other_hash.keys
151
+ end
152
+
153
+ # @api internal
154
+ # @param (see #matching_keys)
155
+ # @return [Array] Compare the keys of two hashes and return the keys unique to the first hash
156
+ def basis_only_keys(basis_hash, other_hash)
157
+ basis_hash.keys - other_hash.keys
158
+ end
159
+
160
+ # @api internal
161
+ # @param (see #matching_keys)
162
+ # @return [Array] Compare the keys of two hashes and return the keys unique to the second hash
163
+ def other_only_keys(basis_hash, other_hash)
164
+ other_hash.keys - basis_hash.keys
165
+ end
166
+
167
+ # @api internal
168
+ # @param basis_group [FileGroup] The file group that is the basis of the comparison
169
+ # @param other_group [FileGroup] The file group that is compared against the basis group
170
+ # @return [FileGroupDifference] Compare two file groups and return a differences report
171
+ def compare_file_groups(basis_group, other_group)
172
+ @group_id = basis_group.group_id
173
+ compare_matching_signatures(basis_group, other_group)
174
+ compare_non_matching_signatures(basis_group, other_group)
175
+ self
176
+ end
177
+
178
+ # @api internal
179
+ # @param (see #compare_file_groups)
180
+ # @return [void] For signatures that are present in both groups,
181
+ # report which file instances are identical or renamed
182
+ def compare_matching_signatures(basis_group, other_group)
183
+ matching_signatures = matching_keys(basis_group.signature_hash, other_group.signature_hash)
184
+ tabulate_unchanged_files(matching_signatures, basis_group.signature_hash, other_group.signature_hash)
185
+ tabulate_renamed_files(matching_signatures, basis_group.signature_hash, other_group.signature_hash)
186
+ end
187
+
188
+ # @api internal
189
+ # @param (see #compare_file_groups)
190
+ # @return [void] For signatures that are present in only one or the other group,
191
+ # report which file instances are modified, deleted, or added
192
+ def compare_non_matching_signatures(basis_group, other_group)
193
+ basis_only_signatures = basis_only_keys(basis_group.signature_hash, other_group.signature_hash)
194
+ other_only_signatures = other_only_keys(basis_group.signature_hash, other_group.signature_hash)
195
+ basis_path_hash = basis_group.path_hash_subset(basis_only_signatures)
196
+ other_path_hash = other_group.path_hash_subset(other_only_signatures)
197
+ tabulate_modified_files(basis_path_hash, other_path_hash)
198
+ tabulate_deleted_files(basis_path_hash, other_path_hash)
199
+ tabulate_added_files(basis_path_hash, other_path_hash)
200
+ end
201
+
202
+ # @api internal
203
+ # @param matching_signatures [Array<FileSignature>] The file signature of the file manifestations being compared
204
+ # @param basis_signature_hash [OrderedHash<FileSignature, FileManifestation>]
205
+ # Signature to file path mapping from the file group that is the basis of the comparison
206
+ # @param other_signature_hash [OrderedHash<FileSignature, FileManifestation>]
207
+ # Signature to file path mapping from the file group that is the being compared to the basis group
208
+ # @return [FileGroupDifferenceSubset]
209
+ # Container for reporting the set of file-level differences of type 'identical'
210
+ def tabulate_unchanged_files(matching_signatures, basis_signature_hash, other_signature_hash)
211
+ unchanged_files = Array.new
212
+ matching_signatures.each do |signature|
213
+ basis_paths = basis_signature_hash[signature].paths
214
+ other_paths = other_signature_hash[signature].paths
215
+ matching_paths = basis_paths & other_paths
216
+ matching_paths.each do |path|
217
+ fid = FileInstanceDifference.new(:change => 'identical')
218
+ fid.basis_path = path
219
+ fid.other_path = "same"
220
+ fid.signatures << signature
221
+ unchanged_files << fid
222
+ end
223
+ end
224
+ unchanged_subset = FileGroupDifferenceSubset.new(:change => 'identical')
225
+ unchanged_subset.files = unchanged_files
226
+ @subsets << unchanged_subset
227
+ @identical = unchanged_subset.count
228
+ unchanged_subset
229
+ end
230
+
231
+
232
+ # @api internal
233
+ # @param matching_signatures [Array<FileSignature>] The file signature of the file manifestations being compared
234
+ # @param basis_signature_hash [OrderedHash<FileSignature, FileManifestation>]
235
+ # Signature to file path mapping from the file group that is the basis of the comparison
236
+ # @param other_signature_hash [OrderedHash<FileSignature, FileManifestation>]
237
+ # Signature to file path mapping from the file group that is the being compared to the basis group
238
+ # @return [FileGroupDifferenceSubset]
239
+ # Container for reporting the set of file-level differences of type 'renamed'
240
+ def tabulate_renamed_files(matching_signatures, basis_signature_hash, other_signature_hash)
241
+ renamed_files = Array.new
242
+ matching_signatures.each do |signature|
243
+ basis_paths = basis_signature_hash[signature].paths
244
+ other_paths = other_signature_hash[signature].paths
245
+ basis_only_paths = basis_paths - other_paths
246
+ other_only_paths = other_paths - basis_paths
247
+ maxsize = [basis_only_paths.size, other_only_paths.size].max
248
+ (0..maxsize-1).each do |n|
249
+ fid = FileInstanceDifference.new(:change => 'renamed')
250
+ fid.basis_path = basis_only_paths[n]
251
+ fid.other_path = other_only_paths[n]
252
+ fid.signatures << signature
253
+ renamed_files << fid
254
+ end
255
+ end
256
+ renamed_subset = FileGroupDifferenceSubset.new(:change => 'renamed')
257
+ renamed_subset.files = renamed_files
258
+ @subsets << renamed_subset
259
+ @renamed = renamed_subset.count
260
+ renamed_subset
261
+ end
262
+
263
+
264
+ # @api internal
265
+ # @param basis_path_hash [OrderedHash<String,FileSignature>]
266
+ # The file paths and associated signatures for manifestations appearing only in the basis group
267
+ # @param other_path_hash [OrderedHash<String,FileSignature>]
268
+ # The file paths and associated signatures for manifestations appearing only in the other group
269
+ # @return [FileGroupDifferenceSubset]
270
+ # Container for reporting the set of file-level differences of type 'modified'
271
+ def tabulate_modified_files(basis_path_hash, other_path_hash)
272
+ modified_files = Array.new
273
+ matching_keys(basis_path_hash, other_path_hash).each do |path|
274
+ fid = FileInstanceDifference.new(:change => 'modified')
275
+ fid.basis_path = path
276
+ fid.other_path = "same"
277
+ fid.signatures << basis_path_hash[path]
278
+ fid.signatures << other_path_hash[path]
279
+ modified_files << fid
280
+ end
281
+ modified_subset = FileGroupDifferenceSubset.new(:change => 'modified')
282
+ modified_subset.files = modified_files
283
+ @subsets << modified_subset
284
+ @modified = modified_subset.count
285
+ modified_subset
286
+ end
287
+
288
+ # @api internal
289
+ # @param basis_path_hash [OrderedHash<String,FileSignature>]
290
+ # The file paths and associated signatures for manifestations appearing only in the basis group
291
+ # @param other_path_hash [OrderedHash<String,FileSignature>]
292
+ # The file paths and associated signatures for manifestations appearing only in the other group
293
+ # @return [FileGroupDifferenceSubset]
294
+ # Container for reporting the set of file-level differences of type 'deleted'
295
+ def tabulate_deleted_files(basis_path_hash, other_path_hash)
296
+ deleted_files = Array.new
297
+ basis_only_keys(basis_path_hash, other_path_hash).each do |path|
298
+ fid = FileInstanceDifference.new(:change => 'deleted')
299
+ fid.basis_path = path
300
+ fid.other_path = ""
301
+ fid.signatures << basis_path_hash[path]
302
+ deleted_files << fid
303
+ end
304
+ deleted_subset = FileGroupDifferenceSubset.new(:change => 'deleted')
305
+ deleted_subset.files = deleted_files
306
+ @subsets << deleted_subset
307
+ @deleted = deleted_subset.count
308
+ deleted_subset
309
+ end
310
+
311
+ # @api internal
312
+ # @param basis_path_hash [OrderedHash<String,FileSignature>]
313
+ # The file paths and associated signatures for manifestations appearing only in the basis group
314
+ # @param other_path_hash [OrderedHash<String,FileSignature>]
315
+ # The file paths and associated signatures for manifestations appearing only in the other group
316
+ # @return [FileGroupDifferenceSubset]
317
+ # Container for reporting the set of file-level differences of type 'added'
318
+ def tabulate_added_files(basis_path_hash, other_path_hash)
319
+ added_files = Array.new
320
+ other_only_keys(basis_path_hash, other_path_hash).each do |path|
321
+ fid = FileInstanceDifference.new(:change => 'added')
322
+ fid.basis_path = ""
323
+ fid.other_path = path
324
+ fid.signatures << other_path_hash[path]
325
+ added_files << fid
326
+ end
327
+ added_subset = FileGroupDifferenceSubset.new(:change => 'added')
328
+ added_subset.files = added_files
329
+ @subsets << added_subset
330
+ @added = added_subset.count
331
+ added_subset
332
+ end
333
+
334
+ end
335
+
336
+ end
@@ -0,0 +1,45 @@
1
+ require 'moab'
2
+
3
+ module Moab
4
+
5
+ # A container for reporting a set of file-level differences of the type specified by the change attribute
6
+ #
7
+ # ====Data Model
8
+ # * {FileInventoryDifference} = compares two {FileInventory} instances based on file signatures and pathnames
9
+ # * {FileGroupDifference} [1..*] = performs analysis and reports differences between two matching {FileGroup} objects
10
+ # * <b>{FileGroupDifferenceSubset} [1..5] = collects a set of file-level differences of a give change type</b>
11
+ # * {FileInstanceDifference} [1..*] = contains difference information at the file level
12
+ # * {FileSignature} [1..2] = contains the file signature(s) of two file instances being compared
13
+ #
14
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
15
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
16
+ class FileGroupDifferenceSubset < Serializable
17
+ include HappyMapper
18
+
19
+ # The name of the XML element used to serialize this objects data
20
+ tag 'subset'
21
+
22
+ # (see Serializable#initialize)
23
+ def initialize(opts={})
24
+ @files = Array.new
25
+ super(opts)
26
+ end
27
+
28
+ # @attribute
29
+ # @return [String] The type of change (identical|renamed|modified|deleted|added)
30
+ attribute :change, String, :key => true
31
+
32
+ # @attribute
33
+ # @return [Integer] How many files were changed
34
+ attribute :count, Integer, :on_save => Proc.new { |n| n.to_s }
35
+
36
+ def count
37
+ files.size
38
+ end
39
+
40
+ # @attribute
41
+ # @return [Array<FileInstanceDifference>] The set of file instances having this type of change
42
+ has_many :files, FileInstanceDifference, :tag=>'file'
43
+
44
+ end
45
+ end
@@ -0,0 +1,82 @@
1
+ require 'moab'
2
+
3
+ module Moab
4
+
5
+ # The file path and last modification date properties of a file
6
+ #
7
+ # ====Data Model
8
+ # * {FileInventory} = container for recording information about a collection of related files
9
+ # * {FileGroup} [1..*] = subset allow segregation of content and metadata files
10
+ # * {FileManifestation} [1..*] = snapshot of a file's filesystem characteristics
11
+ # * {FileSignature} [1] = file fixity information
12
+ # * <b>{FileInstance} [1..*] = filepath and timestamp of any physical file having that signature</b>
13
+ #
14
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
15
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
16
+ class FileInstance < Serializable
17
+
18
+ include HappyMapper
19
+
20
+ # The name of the XML element used to serialize this objects data
21
+ tag 'fileInstance'
22
+
23
+ # (see Serializable#initialize)
24
+ def initialize(opts={})
25
+ super(opts)
26
+ end
27
+
28
+ # @attribute
29
+ # @return [String] The id is the filename path, relative to the file group's base directory
30
+ attribute :path, String, :key => true
31
+
32
+ # @attribute
33
+ # @return [Time] gsub(/\n/,' ')
34
+ attribute :datetime, Time, :on_save => Proc.new {|t| t.to_s}
35
+
36
+ def datetime=(event_datetime)
37
+ @datetime=Time.input(event_datetime)
38
+ end
39
+
40
+ def datetime
41
+ Time.output(@datetime)
42
+ end
43
+
44
+
45
+ # @api internal
46
+ # @param pathname [Pathname] The location of the physical file
47
+ # @param base_directory [Pathname] The full path used as the basis of the relative paths reported
48
+ # @return [FileInstance] Returns a file instance containing a physical file's' properties
49
+ def instance_from_file(pathname, base_directory)
50
+ @path = pathname.expand_path.relative_path_from(base_directory.expand_path).to_s
51
+ @datetime = pathname.mtime.iso8601
52
+ self
53
+ end
54
+
55
+ # @api internal
56
+ # @param other [FileInstance] The other file instance being compared to this instance
57
+ # @return [Boolean] Returns true if self and other have the same path.
58
+ def eql?(other)
59
+ self.path == other.path
60
+ end
61
+
62
+ # @api internal
63
+ # (see #eql?)
64
+ def ==(other)
65
+ eql?(other)
66
+ end
67
+
68
+ # @api internal
69
+ # @return [Fixnum] Compute a hash-code for the path string.
70
+ # Two file instances with the same relative path will have the same hash code (and will compare using eql?).
71
+ # @note The hash and eql? methods override the methods inherited from Object.
72
+ # These methods ensure that instances of this class can be used as Hash keys. See
73
+ # * {http://www.paulbutcher.com/2007/10/navigating-the-equality-maze/}
74
+ # * {http://techbot.me/2011/05/ruby-basics-equality-operators-ruby/}
75
+ # Also overriden is {#==} so that equality tests in other contexts will also return the expected result.
76
+ def hash
77
+ path.hash
78
+ end
79
+
80
+ end
81
+
82
+ end