moab-versioning 4.2.0 → 4.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,6 @@
1
1
  require 'moab'
2
2
 
3
3
  module Moab
4
-
5
4
  # A container for recording difference information at the file level
6
5
  # * If there was no change, the change type is set to <i>identical</i>
7
6
  # * If the signature is unchanged, but the path has moved, the change type is set to <i>renamed</i>
@@ -21,15 +20,14 @@ module Moab
21
20
  #
22
21
  # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
23
22
  # All rights reserved. See {file:LICENSE.rdoc} for details.
24
- class FileInstanceDifference < Serializer::Serializable
25
-
23
+ class FileInstanceDifference < Serializer::Serializable
26
24
  include HappyMapper
27
25
 
28
26
  # The name of the XML element used to serialize this objects data
29
27
  tag 'file'
30
28
 
31
29
  # (see Serializable#initialize)
32
- def initialize(opts={})
30
+ def initialize(opts = {})
33
31
  @signatures = Array.new
34
32
  super(opts)
35
33
  end
@@ -49,7 +47,5 @@ module Moab
49
47
  # @attribute
50
48
  # @return [Array<FileSignature>] The fixity data of the file manifestation(s) (plural if change was a content modification)
51
49
  has_many :signatures, FileSignature, :tag => 'fileSignature'
52
-
53
50
  end
54
-
55
51
  end
@@ -1,7 +1,6 @@
1
1
  require 'moab'
2
2
 
3
3
  module Moab
4
-
5
4
  # A structured container for recording information about a collection of related files.
6
5
  #
7
6
  # The <b>scope</b> of the file collection depends on inventory type:
@@ -29,14 +28,13 @@ module Moab
29
28
  # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
30
29
  # All rights reserved. See {file:LICENSE.rdoc} for details.
31
30
  class FileInventory < Serializer::Manifest
32
-
33
31
  include HappyMapper
34
32
 
35
33
  # The name of the XML element used to serialize this object's data
36
34
  tag 'fileInventory'
37
35
 
38
36
  # (see Serializable#initialize)
39
- def initialize(opts={})
37
+ def initialize(opts = {})
40
38
  @groups = Array.new
41
39
  @inventory_datetime = Time.now
42
40
  super(opts)
@@ -106,7 +104,7 @@ module Moab
106
104
 
107
105
  # @param non_empty [Boolean] if true, return group_id's only for groups having files
108
106
  # @return [Array<String>] group identifiers contained in this file inventory
109
- def group_ids(non_empty=nil)
107
+ def group_ids(non_empty = nil)
110
108
  my_groups = non_empty ? self.non_empty_groups : groups
111
109
  my_groups.map { |g| g.group_id }
112
110
  end
@@ -114,7 +112,7 @@ module Moab
114
112
  # @param [String] group_id The identifer of the group to be selected
115
113
  # @return [FileGroup] The file group in this inventory for the specified group_id
116
114
  def group(group_id)
117
- groups.find { |group| group.group_id == group_id}
115
+ groups.find { |group| group.group_id == group_id }
118
116
  end
119
117
 
120
118
  # @param group_id [String] File group identifer (e.g. data, metadata, manifests)
@@ -183,7 +181,7 @@ module Moab
183
181
  # if nil, then the directory is assumed to contain both content and metadata subdirectories
184
182
  # @return [FileInventory] Traverse a directory and return an inventory of the files it contains
185
183
  # @example {include:file:spec/features/inventory/harvest_inventory_spec.rb}
186
- def inventory_from_directory(data_dir, group_id=nil)
184
+ def inventory_from_directory(data_dir, group_id = nil)
187
185
  if group_id
188
186
  groups << FileGroup.new(group_id: group_id).group_from_directory(data_dir)
189
187
  else
@@ -202,7 +200,7 @@ module Moab
202
200
  signatures_from_bag = signatures_from_bagit_manifests(bag_pathname)
203
201
  bag_data_subdirs = bag_pathname.join('data').children
204
202
  bag_data_subdirs.each do |subdir|
205
- groups << FileGroup.new(:group_id=>subdir.basename.to_s).group_from_bagit_subdir(subdir, signatures_from_bag)
203
+ groups << FileGroup.new(:group_id => subdir.basename.to_s).group_from_bagit_subdir(subdir, signatures_from_bag)
206
204
  end
207
205
  self
208
206
  end
@@ -219,7 +217,7 @@ module Moab
219
217
  if manifest_pathname[type].exist?
220
218
  manifest_pathname[type].each_line do |line|
221
219
  line.chomp!
222
- checksum,data_path = line.split(/\s+\**/,2)
220
+ checksum, data_path = line.split(/\s+\**/, 2)
223
221
  if checksum && data_path
224
222
  file_pathname = bag_pathname.join(data_path)
225
223
  signature = signatures[file_pathname]
@@ -251,18 +249,18 @@ module Moab
251
249
  # @api internal
252
250
  # @param type [String] Specifies the type of inventory, and thus the filename used for storage
253
251
  # @return [String] The standard name for the serialized inventory file of the given type
254
- def self.xml_filename(type=nil)
252
+ def self.xml_filename(type = nil)
255
253
  case type
256
- when "version"
257
- 'versionInventory.xml'
258
- when "additions"
259
- 'versionAdditions.xml'
260
- when "manifests"
261
- 'manifestInventory.xml'
262
- when "directory"
263
- 'directoryInventory.xml'
264
- else
265
- raise ArgumentError, "unknown inventory type: #{type}"
254
+ when "version"
255
+ 'versionInventory.xml'
256
+ when "additions"
257
+ 'versionAdditions.xml'
258
+ when "manifests"
259
+ 'manifestInventory.xml'
260
+ when "directory"
261
+ 'directoryInventory.xml'
262
+ else
263
+ raise ArgumentError, "unknown inventory type: #{type}"
266
264
  end
267
265
  end
268
266
 
@@ -271,11 +269,9 @@ module Moab
271
269
  # @param type [String] The inventory type, which governs the filename used for serialization
272
270
  # @return [void] write the {FileInventory} instance to a file
273
271
  # @example {include:file:spec/features/inventory/write_inventory_xml_spec.rb}
274
- def write_xml_file(parent_dir, type=nil)
272
+ def write_xml_file(parent_dir, type = nil)
275
273
  type = @type if type.nil?
276
274
  self.class.write_xml_file(self, parent_dir, type)
277
275
  end
278
-
279
276
  end
280
-
281
277
  end
@@ -1,7 +1,6 @@
1
1
  require 'moab'
2
2
 
3
3
  module Moab
4
-
5
4
  # Compares two {FileInventory} instances based primarily on file signatures and secondarily on file pathnames.
6
5
  # Although the usual use will be to compare the content of 2 different temporal versions of the same object,
7
6
  # it can also be used to verify an inventory document against an inventory harvested from a directory.
@@ -18,14 +17,13 @@ module Moab
18
17
  # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
19
18
  # All rights reserved. See {file:LICENSE.rdoc} for details.
20
19
  class FileInventoryDifference < Serializer::Manifest
21
-
22
20
  include HappyMapper
23
21
 
24
22
  # The name of the XML element used to serialize this objects data
25
23
  tag 'fileInventoryDifference'
26
24
 
27
25
  # (see Serializable#initialize)
28
- def initialize(opts={})
26
+ def initialize(opts = {})
29
27
  @group_differences = Array.new
30
28
  super(opts)
31
29
  end
@@ -36,7 +34,7 @@ module Moab
36
34
 
37
35
  # @attribute
38
36
  # @return [Integer] the number of differences found between the two inventories that were compared (dynamically calculated)
39
- attribute :difference_count, Integer, :tag=> 'differenceCount',:on_save => Proc.new {|i| i.to_s}
37
+ attribute :difference_count, Integer, :tag => 'differenceCount', :on_save => Proc.new { |i| i.to_s }
40
38
 
41
39
  def difference_count
42
40
  @group_differences.inject(0) { |sum, group| sum + group.difference_count }
@@ -74,7 +72,7 @@ module Moab
74
72
  # @param [String] group_id The identifer of the group to be selected
75
73
  # @return [FileGroupDifference] The subset of this report for the specified group_id (or nil if not found)
76
74
  def group_difference(group_id)
77
- @group_differences.find{ |group_difference| group_difference.group_id == group_id}
75
+ @group_differences.find { |group_difference| group_difference.group_id == group_id }
78
76
  end
79
77
 
80
78
  # @api external
@@ -126,7 +124,5 @@ module Moab
126
124
  end
127
125
  inv_diff
128
126
  end
129
-
130
127
  end
131
-
132
128
  end
@@ -1,7 +1,6 @@
1
1
  require 'moab'
2
2
 
3
3
  module Moab
4
-
5
4
  # A container for a file signature and all the physical file instances that have that signature
6
5
  # This element has one child {FileSignature} element, and one or more {FileInstance} elements
7
6
  # Regarding the class name, see
@@ -24,7 +23,7 @@ module Moab
24
23
  tag 'file'
25
24
 
26
25
  # (see Serializable#initialize)
27
- def initialize(opts={})
26
+ def initialize(opts = {})
28
27
  @instances = Array.new
29
28
  super(opts)
30
29
  end
@@ -48,7 +47,7 @@ module Moab
48
47
  # @api internal
49
48
  # @return [Array<String>] Create an array from all the file paths of the child {FileInstance} objects
50
49
  def paths
51
- instances.collect { |i| i.path}
50
+ instances.collect { |i| i.path }
52
51
  end
53
52
 
54
53
  # @api internal
@@ -69,7 +68,7 @@ module Moab
69
68
  # (estimating du -k result)
70
69
  def block_count
71
70
  block_size = 1024
72
- instance_blocks = (signature.size.to_i + block_size - 1)/block_size
71
+ instance_blocks = (signature.size.to_i + block_size - 1) / block_size
73
72
  file_count * instance_blocks
74
73
  end
75
74
 
@@ -80,7 +79,5 @@ module Moab
80
79
  return false unless (other.respond_to?(:signature) && other.respond_to?(:instances)) # Cannot equal an incomparable type!
81
80
  (self.signature == other.signature) && (self.instances == other.instances)
82
81
  end
83
-
84
82
  end
85
-
86
83
  end
@@ -1,7 +1,6 @@
1
1
  require 'moab'
2
2
 
3
3
  module Moab
4
-
5
4
  # The fixity properties of a file, used to determine file content equivalence regardless of filename.
6
5
  # Placing this data in a class by itself facilitates using file size together with the MD5 and SHA1 checksums
7
6
  # as a single key when doing comparisons against other file instances. The Moab design assumes that this file signature
@@ -40,17 +39,11 @@ module Moab
40
39
  # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
41
40
  # All rights reserved. See {file:LICENSE.rdoc} for details.
42
41
  class FileSignature < Serializer::Serializable
43
-
44
42
  include HappyMapper
45
43
 
46
44
  # The name of the XML element used to serialize this objects data
47
45
  tag 'fileSignature'
48
46
 
49
- # (see Serializable#initialize)
50
- def initialize(opts={})
51
- super(opts)
52
- end
53
-
54
47
  # @attribute
55
48
  # @return [Integer] The size of the file in bytes
56
49
  attribute :size, Integer, :on_save => Proc.new { |n| n.to_s }
@@ -67,19 +60,47 @@ module Moab
67
60
  # @return [String] The SHA256 checksum value of the file
68
61
  attribute :sha256, String, :on_save => Proc.new { |n| n.nil? ? "" : n.to_s }
69
62
 
63
+ KNOWN_ALGOS = {
64
+ md5: proc { Digest::MD5.new },
65
+ sha1: proc { Digest::SHA1.new },
66
+ sha256: proc { Digest::SHA2.new(256) }
67
+ }.freeze
68
+
69
+ def self.active_algos
70
+ Moab::Config.checksum_algos
71
+ end
72
+
73
+ # Reads the file once for ALL (requested) algorithms, not once per.
74
+ # @param [Pathname] pathname
75
+ # @param [Array<Symbol>] one or more keys of KNOWN_ALGOS to be computed
76
+ # @return [Moab::FileSignature] object populated with (requested) checksums
77
+ def self.from_file(pathname, algos_to_use = active_algos)
78
+ raise 'Unrecognized algorithm requested' unless algos_to_use.all? { |a| KNOWN_ALGOS.include?(a) }
79
+
80
+ signatures = algos_to_use.map { |k| [k, KNOWN_ALGOS[k].call] }.to_h
81
+
82
+ pathname.open("r") do |stream|
83
+ while (buffer = stream.read(8192))
84
+ signatures.each_value { |digest| digest.update(buffer) }
85
+ end
86
+ end
87
+
88
+ new(signatures.map { |k, digest| [k, digest.hexdigest] }.to_h.merge(size: pathname.size))
89
+ end
90
+
70
91
  # @param type [Symbol,String] The type of checksum
71
92
  # @param value [String] The checksum value
72
93
  # @return [void] Set the value of the specified checksum type
73
- def set_checksum(type,value)
94
+ def set_checksum(type, value)
74
95
  case type.to_s.downcase.to_sym
75
- when :md5
76
- @md5 = value
77
- when :sha1
78
- @sha1 = value
79
- when :sha256
80
- @sha256 = value
81
- else
82
- raise ArgumentError, "Unknown checksum type '#{type}'"
96
+ when :md5
97
+ @md5 = value
98
+ when :sha1
99
+ @sha1 = value
100
+ when :sha256
101
+ @sha256 = value
102
+ else
103
+ raise ArgumentError, "Unknown checksum type '#{type}'"
83
104
  end
84
105
  end
85
106
 
@@ -141,24 +162,17 @@ module Moab
141
162
  @size.to_i
142
163
  end
143
164
 
144
- # @api internal
165
+ # @deprecated
166
+ # this method is a holdover from an earlier version. use the class method .from_file going forward.
167
+ # @api external
145
168
  # @param pathname [Pathname] The location of the file to be digested
146
169
  # @return [FileSignature] Generate a FileSignature instance containing size and checksums for a physical file
147
170
  def signature_from_file(pathname)
148
- @size = pathname.size
149
- md5_digest = Digest::MD5.new
150
- sha1_digest = Digest::SHA1.new
151
- sha256_digest = Digest::SHA2.new(256)
152
- pathname.open("r") do |stream|
153
- while buffer = stream.read(8192)
154
- md5_digest.update(buffer)
155
- sha1_digest.update(buffer)
156
- sha256_digest.update(buffer)
157
- end
158
- end
159
- @md5 = md5_digest.hexdigest
160
- @sha1 = sha1_digest.hexdigest
161
- @sha256 = sha256_digest.hexdigest
171
+ file_signature = self.class.from_file(pathname)
172
+ self.size = file_signature.size
173
+ self.md5 = file_signature.md5
174
+ self.sha1 = file_signature.sha1
175
+ self.sha256 = file_signature.sha256
162
176
  self
163
177
  end
164
178
 
@@ -195,7 +209,5 @@ module Moab
195
209
  end
196
210
  type_for_name
197
211
  end
198
-
199
212
  end
200
-
201
213
  end
@@ -1,7 +1,6 @@
1
1
  require 'moab'
2
2
 
3
3
  module Moab
4
-
5
4
  # A digital object's Signature Catalog is derived from an filtered aggregation of the file inventories
6
5
  # of a digital object's set of versions. (see {#update})
7
6
  # It has an entry for every file (identified by {FileSignature}) found in any of the versions,
@@ -32,7 +31,7 @@ module Moab
32
31
  tag 'signatureCatalog'
33
32
 
34
33
  # (see Serializable#initialize)
35
- def initialize(opts={})
34
+ def initialize(opts = {})
36
35
  @entries = Array.new
37
36
  @signature_hash = Hash.new
38
37
  super(opts)
@@ -44,7 +43,7 @@ module Moab
44
43
 
45
44
  # @attribute
46
45
  # @return [Integer] The ordinal version number
47
- attribute :version_id, Integer, :tag => 'versionId', :key => true, :on_save => Proc.new {|n| n.to_s}
46
+ attribute :version_id, Integer, :tag => 'versionId', :key => true, :on_save => Proc.new { |n| n.to_s }
48
47
 
49
48
  # @return [String] The unique identifier concatenating digital object id with version id
50
49
  def composite_key
@@ -56,7 +55,7 @@ module Moab
56
55
  attribute :catalog_datetime, Time, :tag => 'catalogDatetime'
57
56
 
58
57
  def catalog_datetime=(datetime)
59
- @catalog_datetime=Moab::UtcTime.input(datetime)
58
+ @catalog_datetime = Moab::UtcTime.input(datetime)
60
59
  end
61
60
 
62
61
  def catalog_datetime
@@ -65,7 +64,7 @@ module Moab
65
64
 
66
65
  # @attribute
67
66
  # @return [Integer] The total number of data files (dynamically calculated)
68
- attribute :file_count, Integer, :tag => 'fileCount', :on_save => Proc.new {|t| t.to_s}
67
+ attribute :file_count, Integer, :tag => 'fileCount', :on_save => Proc.new { |t| t.to_s }
69
68
 
70
69
  def file_count
71
70
  entries.size
@@ -73,7 +72,7 @@ module Moab
73
72
 
74
73
  # @attribute
75
74
  # @return [Integer] The total size (in bytes) of all data files (dynamically calculated)
76
- attribute :byte_count, Integer, :tag => 'byteCount', :on_save => Proc.new {|t| t.to_s}
75
+ attribute :byte_count, Integer, :tag => 'byteCount', :on_save => Proc.new { |t| t.to_s }
77
76
 
78
77
  def byte_count
79
78
  entries.inject(0) { |sum, entry| sum + entry.signature.size.to_i }
@@ -81,11 +80,11 @@ module Moab
81
80
 
82
81
  # @attribute
83
82
  # @return [Integer] The total disk usage (in 1 kB blocks) of all data files (estimating du -k result) (dynamically calculated)
84
- attribute :block_count, Integer, :tag => 'blockCount', :on_save => Proc.new {|t| t.to_s}
83
+ attribute :block_count, Integer, :tag => 'blockCount', :on_save => Proc.new { |t| t.to_s }
85
84
 
86
85
  def block_count
87
- block_size=1024
88
- entries.inject(0) { |sum, entry| sum + (entry.signature.size.to_i + block_size - 1)/block_size }
86
+ block_size = 1024
87
+ entries.inject(0) { |sum, entry| sum + (entry.signature.size.to_i + block_size - 1) / block_size }
89
88
  end
90
89
 
91
90
  # @return [Array<String>] The data fields to include in summary reports
@@ -128,8 +127,8 @@ module Moab
128
127
  # @param group [FileGroup] A group of the files from a file inventory
129
128
  # @param group_pathname [Pathname] The location of the directory containing the group's files
130
129
  # @return [void] Inspect and upgrade the group's signature data to include all desired checksums
131
- def normalize_group_signatures(group, group_pathname=nil)
132
- unless group_pathname.nil?
130
+ def normalize_group_signatures(group, group_pathname = nil)
131
+ unless group_pathname.nil?
133
132
  group_pathname = Pathname(group_pathname)
134
133
  raise "Could not locate #{group_pathname}" unless group_pathname.exist?
135
134
  end
@@ -162,7 +161,7 @@ module Moab
162
161
  if file.signature.complete?
163
162
  entry.signature = file.signature
164
163
  else
165
- file_pathname = data_pathname.join(group.group_id,entry.path)
164
+ file_pathname = data_pathname.join(group.group_id, entry.path)
166
165
  entry.signature = file.signature.normalized_signature(file_pathname)
167
166
  end
168
167
  add_entry(entry)
@@ -179,20 +178,18 @@ module Moab
179
178
  # containing only those files that were added in this version
180
179
  # @example {include:file:spec/features/catalog/version_additions_spec.rb}
181
180
  def version_additions(version_inventory)
182
- version_additions = FileInventory.new(:type=>'additions')
181
+ version_additions = FileInventory.new(:type => 'additions')
183
182
  version_additions.copy_ids(version_inventory)
184
183
  version_inventory.groups.each do |group|
185
184
  group_addtions = FileGroup.new(:group_id => group.group_id)
186
185
  group.files.each do |file|
187
186
  unless @signature_hash.has_key?(file.signature)
188
- group_addtions.add_file_instance(file.signature,file.instances[0])
187
+ group_addtions.add_file_instance(file.signature, file.instances[0])
189
188
  end
190
189
  end
191
190
  version_additions.groups << group_addtions if group_addtions.files.size > 0
192
191
  end
193
192
  version_additions
194
193
  end
195
-
196
194
  end
197
-
198
195
  end