moab-versioning 4.2.0 → 4.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,6 @@
1
1
  require 'moab'
2
2
 
3
3
  module Moab
4
-
5
4
  # A container for recording difference information at the file level
6
5
  # * If there was no change, the change type is set to <i>identical</i>
7
6
  # * If the signature is unchanged, but the path has moved, the change type is set to <i>renamed</i>
@@ -21,15 +20,14 @@ module Moab
21
20
  #
22
21
  # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
23
22
  # All rights reserved. See {file:LICENSE.rdoc} for details.
24
- class FileInstanceDifference < Serializer::Serializable
25
-
23
+ class FileInstanceDifference < Serializer::Serializable
26
24
  include HappyMapper
27
25
 
28
26
  # The name of the XML element used to serialize this objects data
29
27
  tag 'file'
30
28
 
31
29
  # (see Serializable#initialize)
32
- def initialize(opts={})
30
+ def initialize(opts = {})
33
31
  @signatures = Array.new
34
32
  super(opts)
35
33
  end
@@ -49,7 +47,5 @@ module Moab
49
47
  # @attribute
50
48
  # @return [Array<FileSignature>] The fixity data of the file manifestation(s) (plural if change was a content modification)
51
49
  has_many :signatures, FileSignature, :tag => 'fileSignature'
52
-
53
50
  end
54
-
55
51
  end
@@ -1,7 +1,6 @@
1
1
  require 'moab'
2
2
 
3
3
  module Moab
4
-
5
4
  # A structured container for recording information about a collection of related files.
6
5
  #
7
6
  # The <b>scope</b> of the file collection depends on inventory type:
@@ -29,14 +28,13 @@ module Moab
29
28
  # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
30
29
  # All rights reserved. See {file:LICENSE.rdoc} for details.
31
30
  class FileInventory < Serializer::Manifest
32
-
33
31
  include HappyMapper
34
32
 
35
33
  # The name of the XML element used to serialize this object's data
36
34
  tag 'fileInventory'
37
35
 
38
36
  # (see Serializable#initialize)
39
- def initialize(opts={})
37
+ def initialize(opts = {})
40
38
  @groups = Array.new
41
39
  @inventory_datetime = Time.now
42
40
  super(opts)
@@ -106,7 +104,7 @@ module Moab
106
104
 
107
105
  # @param non_empty [Boolean] if true, return group_id's only for groups having files
108
106
  # @return [Array<String>] group identifiers contained in this file inventory
109
- def group_ids(non_empty=nil)
107
+ def group_ids(non_empty = nil)
110
108
  my_groups = non_empty ? self.non_empty_groups : groups
111
109
  my_groups.map { |g| g.group_id }
112
110
  end
@@ -114,7 +112,7 @@ module Moab
114
112
  # @param [String] group_id The identifer of the group to be selected
115
113
  # @return [FileGroup] The file group in this inventory for the specified group_id
116
114
  def group(group_id)
117
- groups.find { |group| group.group_id == group_id}
115
+ groups.find { |group| group.group_id == group_id }
118
116
  end
119
117
 
120
118
  # @param group_id [String] File group identifer (e.g. data, metadata, manifests)
@@ -183,7 +181,7 @@ module Moab
183
181
  # if nil, then the directory is assumed to contain both content and metadata subdirectories
184
182
  # @return [FileInventory] Traverse a directory and return an inventory of the files it contains
185
183
  # @example {include:file:spec/features/inventory/harvest_inventory_spec.rb}
186
- def inventory_from_directory(data_dir, group_id=nil)
184
+ def inventory_from_directory(data_dir, group_id = nil)
187
185
  if group_id
188
186
  groups << FileGroup.new(group_id: group_id).group_from_directory(data_dir)
189
187
  else
@@ -202,7 +200,7 @@ module Moab
202
200
  signatures_from_bag = signatures_from_bagit_manifests(bag_pathname)
203
201
  bag_data_subdirs = bag_pathname.join('data').children
204
202
  bag_data_subdirs.each do |subdir|
205
- groups << FileGroup.new(:group_id=>subdir.basename.to_s).group_from_bagit_subdir(subdir, signatures_from_bag)
203
+ groups << FileGroup.new(:group_id => subdir.basename.to_s).group_from_bagit_subdir(subdir, signatures_from_bag)
206
204
  end
207
205
  self
208
206
  end
@@ -219,7 +217,7 @@ module Moab
219
217
  if manifest_pathname[type].exist?
220
218
  manifest_pathname[type].each_line do |line|
221
219
  line.chomp!
222
- checksum,data_path = line.split(/\s+\**/,2)
220
+ checksum, data_path = line.split(/\s+\**/, 2)
223
221
  if checksum && data_path
224
222
  file_pathname = bag_pathname.join(data_path)
225
223
  signature = signatures[file_pathname]
@@ -251,18 +249,18 @@ module Moab
251
249
  # @api internal
252
250
  # @param type [String] Specifies the type of inventory, and thus the filename used for storage
253
251
  # @return [String] The standard name for the serialized inventory file of the given type
254
- def self.xml_filename(type=nil)
252
+ def self.xml_filename(type = nil)
255
253
  case type
256
- when "version"
257
- 'versionInventory.xml'
258
- when "additions"
259
- 'versionAdditions.xml'
260
- when "manifests"
261
- 'manifestInventory.xml'
262
- when "directory"
263
- 'directoryInventory.xml'
264
- else
265
- raise ArgumentError, "unknown inventory type: #{type}"
254
+ when "version"
255
+ 'versionInventory.xml'
256
+ when "additions"
257
+ 'versionAdditions.xml'
258
+ when "manifests"
259
+ 'manifestInventory.xml'
260
+ when "directory"
261
+ 'directoryInventory.xml'
262
+ else
263
+ raise ArgumentError, "unknown inventory type: #{type}"
266
264
  end
267
265
  end
268
266
 
@@ -271,11 +269,9 @@ module Moab
271
269
  # @param type [String] The inventory type, which governs the filename used for serialization
272
270
  # @return [void] write the {FileInventory} instance to a file
273
271
  # @example {include:file:spec/features/inventory/write_inventory_xml_spec.rb}
274
- def write_xml_file(parent_dir, type=nil)
272
+ def write_xml_file(parent_dir, type = nil)
275
273
  type = @type if type.nil?
276
274
  self.class.write_xml_file(self, parent_dir, type)
277
275
  end
278
-
279
276
  end
280
-
281
277
  end
@@ -1,7 +1,6 @@
1
1
  require 'moab'
2
2
 
3
3
  module Moab
4
-
5
4
  # Compares two {FileInventory} instances based primarily on file signatures and secondarily on file pathnames.
6
5
  # Although the usual use will be to compare the content of 2 different temporal versions of the same object,
7
6
  # it can also be used to verify an inventory document against an inventory harvested from a directory.
@@ -18,14 +17,13 @@ module Moab
18
17
  # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
19
18
  # All rights reserved. See {file:LICENSE.rdoc} for details.
20
19
  class FileInventoryDifference < Serializer::Manifest
21
-
22
20
  include HappyMapper
23
21
 
24
22
  # The name of the XML element used to serialize this objects data
25
23
  tag 'fileInventoryDifference'
26
24
 
27
25
  # (see Serializable#initialize)
28
- def initialize(opts={})
26
+ def initialize(opts = {})
29
27
  @group_differences = Array.new
30
28
  super(opts)
31
29
  end
@@ -36,7 +34,7 @@ module Moab
36
34
 
37
35
  # @attribute
38
36
  # @return [Integer] the number of differences found between the two inventories that were compared (dynamically calculated)
39
- attribute :difference_count, Integer, :tag=> 'differenceCount',:on_save => Proc.new {|i| i.to_s}
37
+ attribute :difference_count, Integer, :tag => 'differenceCount', :on_save => Proc.new { |i| i.to_s }
40
38
 
41
39
  def difference_count
42
40
  @group_differences.inject(0) { |sum, group| sum + group.difference_count }
@@ -74,7 +72,7 @@ module Moab
74
72
  # @param [String] group_id The identifer of the group to be selected
75
73
  # @return [FileGroupDifference] The subset of this report for the specified group_id (or nil if not found)
76
74
  def group_difference(group_id)
77
- @group_differences.find{ |group_difference| group_difference.group_id == group_id}
75
+ @group_differences.find { |group_difference| group_difference.group_id == group_id }
78
76
  end
79
77
 
80
78
  # @api external
@@ -126,7 +124,5 @@ module Moab
126
124
  end
127
125
  inv_diff
128
126
  end
129
-
130
127
  end
131
-
132
128
  end
@@ -1,7 +1,6 @@
1
1
  require 'moab'
2
2
 
3
3
  module Moab
4
-
5
4
  # A container for a file signature and all the physical file instances that have that signature
6
5
  # This element has one child {FileSignature} element, and one or more {FileInstance} elements
7
6
  # Regarding the class name, see
@@ -24,7 +23,7 @@ module Moab
24
23
  tag 'file'
25
24
 
26
25
  # (see Serializable#initialize)
27
- def initialize(opts={})
26
+ def initialize(opts = {})
28
27
  @instances = Array.new
29
28
  super(opts)
30
29
  end
@@ -48,7 +47,7 @@ module Moab
48
47
  # @api internal
49
48
  # @return [Array<String>] Create an array from all the file paths of the child {FileInstance} objects
50
49
  def paths
51
- instances.collect { |i| i.path}
50
+ instances.collect { |i| i.path }
52
51
  end
53
52
 
54
53
  # @api internal
@@ -69,7 +68,7 @@ module Moab
69
68
  # (estimating du -k result)
70
69
  def block_count
71
70
  block_size = 1024
72
- instance_blocks = (signature.size.to_i + block_size - 1)/block_size
71
+ instance_blocks = (signature.size.to_i + block_size - 1) / block_size
73
72
  file_count * instance_blocks
74
73
  end
75
74
 
@@ -80,7 +79,5 @@ module Moab
80
79
  return false unless (other.respond_to?(:signature) && other.respond_to?(:instances)) # Cannot equal an incomparable type!
81
80
  (self.signature == other.signature) && (self.instances == other.instances)
82
81
  end
83
-
84
82
  end
85
-
86
83
  end
@@ -1,7 +1,6 @@
1
1
  require 'moab'
2
2
 
3
3
  module Moab
4
-
5
4
  # The fixity properties of a file, used to determine file content equivalence regardless of filename.
6
5
  # Placing this data in a class by itself facilitates using file size together with the MD5 and SHA1 checksums
7
6
  # as a single key when doing comparisons against other file instances. The Moab design assumes that this file signature
@@ -40,17 +39,11 @@ module Moab
40
39
  # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
41
40
  # All rights reserved. See {file:LICENSE.rdoc} for details.
42
41
  class FileSignature < Serializer::Serializable
43
-
44
42
  include HappyMapper
45
43
 
46
44
  # The name of the XML element used to serialize this objects data
47
45
  tag 'fileSignature'
48
46
 
49
- # (see Serializable#initialize)
50
- def initialize(opts={})
51
- super(opts)
52
- end
53
-
54
47
  # @attribute
55
48
  # @return [Integer] The size of the file in bytes
56
49
  attribute :size, Integer, :on_save => Proc.new { |n| n.to_s }
@@ -67,19 +60,47 @@ module Moab
67
60
  # @return [String] The SHA256 checksum value of the file
68
61
  attribute :sha256, String, :on_save => Proc.new { |n| n.nil? ? "" : n.to_s }
69
62
 
63
+ KNOWN_ALGOS = {
64
+ md5: proc { Digest::MD5.new },
65
+ sha1: proc { Digest::SHA1.new },
66
+ sha256: proc { Digest::SHA2.new(256) }
67
+ }.freeze
68
+
69
+ def self.active_algos
70
+ Moab::Config.checksum_algos
71
+ end
72
+
73
+ # Reads the file once for ALL (requested) algorithms, not once per.
74
+ # @param [Pathname] pathname
75
+ # @param [Array<Symbol>] one or more keys of KNOWN_ALGOS to be computed
76
+ # @return [Moab::FileSignature] object populated with (requested) checksums
77
+ def self.from_file(pathname, algos_to_use = active_algos)
78
+ raise 'Unrecognized algorithm requested' unless algos_to_use.all? { |a| KNOWN_ALGOS.include?(a) }
79
+
80
+ signatures = algos_to_use.map { |k| [k, KNOWN_ALGOS[k].call] }.to_h
81
+
82
+ pathname.open("r") do |stream|
83
+ while (buffer = stream.read(8192))
84
+ signatures.each_value { |digest| digest.update(buffer) }
85
+ end
86
+ end
87
+
88
+ new(signatures.map { |k, digest| [k, digest.hexdigest] }.to_h.merge(size: pathname.size))
89
+ end
90
+
70
91
  # @param type [Symbol,String] The type of checksum
71
92
  # @param value [String] The checksum value
72
93
  # @return [void] Set the value of the specified checksum type
73
- def set_checksum(type,value)
94
+ def set_checksum(type, value)
74
95
  case type.to_s.downcase.to_sym
75
- when :md5
76
- @md5 = value
77
- when :sha1
78
- @sha1 = value
79
- when :sha256
80
- @sha256 = value
81
- else
82
- raise ArgumentError, "Unknown checksum type '#{type}'"
96
+ when :md5
97
+ @md5 = value
98
+ when :sha1
99
+ @sha1 = value
100
+ when :sha256
101
+ @sha256 = value
102
+ else
103
+ raise ArgumentError, "Unknown checksum type '#{type}'"
83
104
  end
84
105
  end
85
106
 
@@ -141,24 +162,17 @@ module Moab
141
162
  @size.to_i
142
163
  end
143
164
 
144
- # @api internal
165
+ # @deprecated
166
+ # this method is a holdover from an earlier version. use the class method .from_file going forward.
167
+ # @api external
145
168
  # @param pathname [Pathname] The location of the file to be digested
146
169
  # @return [FileSignature] Generate a FileSignature instance containing size and checksums for a physical file
147
170
  def signature_from_file(pathname)
148
- @size = pathname.size
149
- md5_digest = Digest::MD5.new
150
- sha1_digest = Digest::SHA1.new
151
- sha256_digest = Digest::SHA2.new(256)
152
- pathname.open("r") do |stream|
153
- while buffer = stream.read(8192)
154
- md5_digest.update(buffer)
155
- sha1_digest.update(buffer)
156
- sha256_digest.update(buffer)
157
- end
158
- end
159
- @md5 = md5_digest.hexdigest
160
- @sha1 = sha1_digest.hexdigest
161
- @sha256 = sha256_digest.hexdigest
171
+ file_signature = self.class.from_file(pathname)
172
+ self.size = file_signature.size
173
+ self.md5 = file_signature.md5
174
+ self.sha1 = file_signature.sha1
175
+ self.sha256 = file_signature.sha256
162
176
  self
163
177
  end
164
178
 
@@ -195,7 +209,5 @@ module Moab
195
209
  end
196
210
  type_for_name
197
211
  end
198
-
199
212
  end
200
-
201
213
  end
@@ -1,7 +1,6 @@
1
1
  require 'moab'
2
2
 
3
3
  module Moab
4
-
5
4
  # A digital object's Signature Catalog is derived from an filtered aggregation of the file inventories
6
5
  # of a digital object's set of versions. (see {#update})
7
6
  # It has an entry for every file (identified by {FileSignature}) found in any of the versions,
@@ -32,7 +31,7 @@ module Moab
32
31
  tag 'signatureCatalog'
33
32
 
34
33
  # (see Serializable#initialize)
35
- def initialize(opts={})
34
+ def initialize(opts = {})
36
35
  @entries = Array.new
37
36
  @signature_hash = Hash.new
38
37
  super(opts)
@@ -44,7 +43,7 @@ module Moab
44
43
 
45
44
  # @attribute
46
45
  # @return [Integer] The ordinal version number
47
- attribute :version_id, Integer, :tag => 'versionId', :key => true, :on_save => Proc.new {|n| n.to_s}
46
+ attribute :version_id, Integer, :tag => 'versionId', :key => true, :on_save => Proc.new { |n| n.to_s }
48
47
 
49
48
  # @return [String] The unique identifier concatenating digital object id with version id
50
49
  def composite_key
@@ -56,7 +55,7 @@ module Moab
56
55
  attribute :catalog_datetime, Time, :tag => 'catalogDatetime'
57
56
 
58
57
  def catalog_datetime=(datetime)
59
- @catalog_datetime=Moab::UtcTime.input(datetime)
58
+ @catalog_datetime = Moab::UtcTime.input(datetime)
60
59
  end
61
60
 
62
61
  def catalog_datetime
@@ -65,7 +64,7 @@ module Moab
65
64
 
66
65
  # @attribute
67
66
  # @return [Integer] The total number of data files (dynamically calculated)
68
- attribute :file_count, Integer, :tag => 'fileCount', :on_save => Proc.new {|t| t.to_s}
67
+ attribute :file_count, Integer, :tag => 'fileCount', :on_save => Proc.new { |t| t.to_s }
69
68
 
70
69
  def file_count
71
70
  entries.size
@@ -73,7 +72,7 @@ module Moab
73
72
 
74
73
  # @attribute
75
74
  # @return [Integer] The total size (in bytes) of all data files (dynamically calculated)
76
- attribute :byte_count, Integer, :tag => 'byteCount', :on_save => Proc.new {|t| t.to_s}
75
+ attribute :byte_count, Integer, :tag => 'byteCount', :on_save => Proc.new { |t| t.to_s }
77
76
 
78
77
  def byte_count
79
78
  entries.inject(0) { |sum, entry| sum + entry.signature.size.to_i }
@@ -81,11 +80,11 @@ module Moab
81
80
 
82
81
  # @attribute
83
82
  # @return [Integer] The total disk usage (in 1 kB blocks) of all data files (estimating du -k result) (dynamically calculated)
84
- attribute :block_count, Integer, :tag => 'blockCount', :on_save => Proc.new {|t| t.to_s}
83
+ attribute :block_count, Integer, :tag => 'blockCount', :on_save => Proc.new { |t| t.to_s }
85
84
 
86
85
  def block_count
87
- block_size=1024
88
- entries.inject(0) { |sum, entry| sum + (entry.signature.size.to_i + block_size - 1)/block_size }
86
+ block_size = 1024
87
+ entries.inject(0) { |sum, entry| sum + (entry.signature.size.to_i + block_size - 1) / block_size }
89
88
  end
90
89
 
91
90
  # @return [Array<String>] The data fields to include in summary reports
@@ -128,8 +127,8 @@ module Moab
128
127
  # @param group [FileGroup] A group of the files from a file inventory
129
128
  # @param group_pathname [Pathname] The location of the directory containing the group's files
130
129
  # @return [void] Inspect and upgrade the group's signature data to include all desired checksums
131
- def normalize_group_signatures(group, group_pathname=nil)
132
- unless group_pathname.nil?
130
+ def normalize_group_signatures(group, group_pathname = nil)
131
+ unless group_pathname.nil?
133
132
  group_pathname = Pathname(group_pathname)
134
133
  raise "Could not locate #{group_pathname}" unless group_pathname.exist?
135
134
  end
@@ -162,7 +161,7 @@ module Moab
162
161
  if file.signature.complete?
163
162
  entry.signature = file.signature
164
163
  else
165
- file_pathname = data_pathname.join(group.group_id,entry.path)
164
+ file_pathname = data_pathname.join(group.group_id, entry.path)
166
165
  entry.signature = file.signature.normalized_signature(file_pathname)
167
166
  end
168
167
  add_entry(entry)
@@ -179,20 +178,18 @@ module Moab
179
178
  # containing only those files that were added in this version
180
179
  # @example {include:file:spec/features/catalog/version_additions_spec.rb}
181
180
  def version_additions(version_inventory)
182
- version_additions = FileInventory.new(:type=>'additions')
181
+ version_additions = FileInventory.new(:type => 'additions')
183
182
  version_additions.copy_ids(version_inventory)
184
183
  version_inventory.groups.each do |group|
185
184
  group_addtions = FileGroup.new(:group_id => group.group_id)
186
185
  group.files.each do |file|
187
186
  unless @signature_hash.has_key?(file.signature)
188
- group_addtions.add_file_instance(file.signature,file.instances[0])
187
+ group_addtions.add_file_instance(file.signature, file.instances[0])
189
188
  end
190
189
  end
191
190
  version_additions.groups << group_addtions if group_addtions.files.size > 0
192
191
  end
193
192
  version_additions
194
193
  end
195
-
196
194
  end
197
-
198
195
  end