moab-versioning 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/lib/moab.rb +59 -0
  3. data/lib/moab/bagger.rb +289 -0
  4. data/lib/moab/config.rb +21 -0
  5. data/lib/moab/exceptions.rb +18 -0
  6. data/lib/moab/file_group.rb +244 -0
  7. data/lib/moab/file_group_difference.rb +336 -0
  8. data/lib/moab/file_group_difference_subset.rb +45 -0
  9. data/lib/moab/file_instance.rb +82 -0
  10. data/lib/moab/file_instance_difference.rb +54 -0
  11. data/lib/moab/file_inventory.rb +279 -0
  12. data/lib/moab/file_inventory_difference.rb +132 -0
  13. data/lib/moab/file_manifestation.rb +85 -0
  14. data/lib/moab/file_signature.rb +200 -0
  15. data/lib/moab/signature_catalog.rb +195 -0
  16. data/lib/moab/signature_catalog_entry.rb +61 -0
  17. data/lib/moab/storage_object.rb +220 -0
  18. data/lib/moab/storage_object_version.rb +333 -0
  19. data/lib/moab/storage_repository.rb +57 -0
  20. data/lib/moab/storage_services.rb +104 -0
  21. data/lib/moab/verification_result.rb +83 -0
  22. data/lib/moab/version_metadata.rb +38 -0
  23. data/lib/moab/version_metadata_entry.rb +64 -0
  24. data/lib/moab/version_metadata_event.rb +47 -0
  25. data/lib/moab_stanford.rb +18 -0
  26. data/lib/monkey_patches.rb +65 -0
  27. data/lib/serializer.rb +36 -0
  28. data/lib/serializer/manifest.rb +76 -0
  29. data/lib/serializer/serializable.rb +178 -0
  30. data/lib/stanford/active_fedora_object.rb +34 -0
  31. data/lib/stanford/content_inventory.rb +236 -0
  32. data/lib/stanford/dor_metadata.rb +49 -0
  33. data/lib/stanford/storage_repository.rb +46 -0
  34. data/lib/stanford/storage_services.rb +66 -0
  35. data/lib/tasks/yard.rake +34 -0
  36. data/lib/tools/api_doc_generator.rb +396 -0
  37. data/lib/tools/spec_generator.rb +410 -0
  38. data/lib/tools/spec_generator_old.rb +49 -0
  39. metadata +252 -0
data/lib/serializer.rb ADDED
@@ -0,0 +1,36 @@
1
+ # Serializer is a module containing classes whose methods faciliate serialization
2
+ # of data fields to various formats. To obtain those benefits, a dependent class
3
+ # should inherit from {Serializable} or {Manifest}
4
+ # depending on whether XML serialization is required.
5
+ #
6
+ # ====Data Model
7
+ # * <b>{Serializable} = utility methods to faciliate serialization to Hash, JSON, or YAML</b>
8
+ # * {Manifest} = adds methods for marshalling/unmarshalling data to a persistent XML file format
9
+ #
10
+ # @see https://github.com/jnunemaker/happymapper
11
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
12
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
13
+ module Serializer
14
+ end
15
+
16
+ require 'nokogiri'
17
+ require 'happymapper'
18
+ if RUBY_VERSION < '1.9'
19
+ require 'hashery/ordered_hash'
20
+ include Hashery
21
+ else
22
+ require 'psych'
23
+ OrderedHash = Hash
24
+ end
25
+ require 'json'
26
+ require 'json/pure'
27
+ require 'pathname'
28
+ require 'fileutils'
29
+ require 'time'
30
+ require 'digest/md5'
31
+ require 'digest/sha1'
32
+
33
+ require 'monkey_patches'
34
+ require 'serializer/serializable'
35
+ require 'serializer/manifest'
36
+
@@ -0,0 +1,76 @@
1
+ module Serializer
2
+
3
+ # Subclass of {Serializable} that adds methods for marshalling/unmarshalling data
4
+ # to a persistent XML file format.
5
+ #
6
+ # ====Data Model
7
+ # * {Serializable} = utility methods to faciliate serialization to Hash, JSON, or YAML
8
+ # * <b>{Manifest} = subclass adds methods for marshalling/unmarshalling data to XML file format</b>
9
+ #
10
+ # @see Serializable
11
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
12
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
13
+ class Manifest < Serializable
14
+
15
+ include HappyMapper
16
+
17
+ # @api internal
18
+ # @param filename [String] Optional filename if one wishes to override the default filename
19
+ # @return [String] Returns the standard filename (derived from the class name) to be used for serializing an object
20
+ def self.xml_filename(filename=nil)
21
+ if filename
22
+ filename
23
+ else
24
+ cname = self.name.split(/::/).last
25
+ cname[0, 1].downcase + cname[1..-1] + '.xml'
26
+ end
27
+ end
28
+
29
+ # @api internal
30
+ # @param parent_dir [Pathname,String] The location of the directory in which the xml file is located
31
+ # @param filename [String] Optional filename if one wishes to override the default filename
32
+ # @return [Pathname] The location of the xml file
33
+ def self.xml_pathname(parent_dir, filename=nil)
34
+ Pathname.new(parent_dir).join(self.xml_filename(filename))
35
+ end
36
+
37
+ # @api external
38
+ # @param parent_dir [Pathname,String] The location of the directory in which the xml file is located
39
+ # @param filename [String] Optional filename if one wishes to override the default filename
40
+ # @return [Boolean] Returns true if the xml file exists
41
+ def self.xml_pathname_exist?(parent_dir, filename=nil)
42
+ self.xml_pathname(parent_dir, filename).exist?
43
+ end
44
+
45
+ # @api external
46
+ # @param parent_dir [Pathname,String] The location of the directory in which the xml file is located
47
+ # @param filename [String] Optional filename if one wishes to override the default filename
48
+ # @return [Serializable] Read the xml file and return the parsed XML
49
+ # @example {include:file:spec/features/serializer/read_xml_spec.rb}
50
+ def self.read_xml_file(parent_dir, filename=nil)
51
+ self.parse(self.xml_pathname(parent_dir, filename).read)
52
+ end
53
+
54
+ # @api external
55
+ # @param xml_object [Serializable]
56
+ # @param parent_dir [Pathname,String] The location of the directory in which the xml file is located
57
+ # @param filename [String] Optional filename if one wishes to override the default filename
58
+ # @return [void] Serializize the in-memory object to a xml file instance
59
+ def self.write_xml_file(xml_object, parent_dir, filename=nil)
60
+ parent_dir.mkpath
61
+ self.xml_pathname(parent_dir, filename).open('w') { |f| f << xml_object.to_xml }
62
+ nil
63
+ end
64
+
65
+ # @api external
66
+ # @param parent_dir [Pathname,String] The location of the directory in which the xml file is located
67
+ # @param filename [String] Optional filename if one wishes to override the default filename
68
+ # @return [void] Serializize the in-memory object to a xml file instance
69
+ # @example {include:file:spec/features/serializer/write_xml_spec.rb}
70
+ def write_xml_file(parent_dir, filename=nil)
71
+ self.class.write_xml_file(self, parent_dir, filename)
72
+ end
73
+
74
+ end
75
+
76
+ end
@@ -0,0 +1,178 @@
1
+ module Serializer
2
+
3
+ # Some utility methods to faciliate serialization of data fields to Hash, JSON, or YAML shared by all subclasses.
4
+ # This class assumes that HappyMapper is used for declaration of fields to be serialized.
5
+ #
6
+ # ====Data Model
7
+ # * <b>{Serializable} = utility methods to faciliate serialization to Hash, JSON, or YAML</b>
8
+ # * {Manifest} = adds methods for marshalling/unmarshalling data to a persistent XML file format
9
+ #
10
+ # @see https://github.com/jnunemaker/happymapper
11
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
12
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
13
+ class Serializable
14
+
15
+ include HappyMapper
16
+
17
+ # A flexible initializer based on the DataMapper "create factory" design pattern.
18
+ # @see http://datamapper.org/docs/create_and_destroy.html
19
+ # @see Serializable#initialize
20
+ # @param opts [Hash<Symbol,Object>] a hash containing any number of symbol => value pairs.
21
+ # The symbols should correspond to attributes declared using HappyMapper syntax
22
+ def initialize(opts={})
23
+ opts.each do |key, value|
24
+ if variable_names.include?(key.to_s) || key == :test
25
+ instance_variable_set("@#{key}", value)
26
+ else
27
+ raise "#{key} is not a variable name in #{self.class.name}"
28
+ end
29
+ end
30
+ end
31
+
32
+ # @api internal
33
+ # @return [Array] A list of HappyMapper xml attribute, element and text nodes declared for the class
34
+ def variables
35
+ attributes = self.class.attributes
36
+ elements = self.class.elements
37
+ attributes + elements
38
+ # text_node enhancement added by unhappymapper, which is not being used
39
+ # It enables elements having both attributes and a text value
40
+ #text_node = []
41
+ #if self.class.instance_variable_defined?("@text_node")
42
+ # text_node << self.class.instance_variable_get("@text_node")
43
+ #end
44
+ #attributes + elements + text_node
45
+ end
46
+
47
+ # @api internal
48
+ # @return [Array] Extract the names of the variables
49
+ def variable_names
50
+ variables.collect { |variable| variable.name}
51
+ end
52
+
53
+ # @api internal
54
+ # @return [String] Determine which attribute was marked as an object instance key.
55
+ # Keys are indicated by option :key=true when declaring the object's variables.
56
+ # This follows the same convention as used by DataMapper
57
+ # @see http://datamapper.org/docs/properties.html
58
+ def key_name
59
+ if not defined?(@key_name)
60
+ @key_name = nil
61
+ self.class.attributes.each do |attribute|
62
+ if attribute.options[:key]
63
+ @key_name = attribute.name
64
+ break
65
+ end
66
+ end
67
+ end
68
+ @key_name
69
+ end
70
+
71
+ # @api internal
72
+ # @return [String] For the current object instance, return the string to use as a hash key
73
+ def key
74
+ return self.send(key_name) if key_name
75
+ nil
76
+ end
77
+
78
+ # @api internal
79
+ # @param array [Array] The array to be converted to a hash
80
+ # @return [OrderedHash] Generate a hash from an array of objects.
81
+ # If the array member has a field tagged as a key, that field will be used as the hash.key.
82
+ # Otherwise the index position of the array member will be used as the key
83
+ def array_to_hash(array,summary=false)
84
+ item_hash = OrderedHash.new
85
+ array.each_index do |index|
86
+ item = array[index]
87
+ ikey = (item.respond_to?(:key) && item.key) ? item.key : index
88
+ item_hash[ikey] = item.respond_to?(:to_hash) ? item.to_hash(summary) : item
89
+ end
90
+ item_hash
91
+ end
92
+
93
+ # @api internal
94
+ # @return [OrderedHash] Recursively generate an OrderedHash containing the object's properties
95
+ # @param summary [Boolean] Controls the depth and detail of recursion
96
+ def to_hash(summary=false)
97
+ oh = OrderedHash.new
98
+ vars = summary ? variables.select{|v| summary_fields.include?(v.name)} : variables
99
+ vars.each do |variable|
100
+ key = variable.options[:tag] || variable.name.to_s
101
+ value = self.send(variable.name)
102
+ case value
103
+ when Array
104
+ oh[key] = array_to_hash(value,summary)
105
+ when Serializable
106
+ oh[key] = value.to_hash
107
+ else
108
+ oh[key] = value
109
+ end
110
+ end
111
+ oh
112
+ end
113
+
114
+ # @return [OrderedHash] Calls to_hash(summary=true)
115
+ def summary
116
+ self.to_hash(summary=true)
117
+ end
118
+
119
+ # @api internal
120
+ # @param other [Serializable] The other object being compared
121
+ # @return [OrderedHash] Generate a hash containing the differences between two objects of the same type
122
+ def diff(other)
123
+ raise "Cannot compare different classes" if self.class != other.class
124
+ left = other.to_hash
125
+ right = self.to_hash
126
+ if self.key.nil? or other.key.nil?
127
+ ltag = :old
128
+ rtag = :new
129
+ else
130
+ ltag = other.key
131
+ rtag = self.key
132
+ end
133
+ Serializable.deep_diff(ltag, left, rtag, right)
134
+ end
135
+
136
+ # @api internal
137
+ # @param hashes [Array<Hash>] The hashes to be compared, with optional name tags
138
+ # @return [OrderedHash] Generate a hash containing the differences between two hashes
139
+ # (recursively descend parallel trees of hashes)
140
+ # @see https://gist.github.com/146844
141
+ def Serializable.deep_diff(*hashes)
142
+ diff = OrderedHash.new
143
+ case hashes.length
144
+ when 4
145
+ ltag, left, rtag, right = hashes
146
+ when 2
147
+ ltag, left, rtag, right = :left, hashes[0], :right, hashes[1]
148
+ else
149
+ raise "wrong number of arguments (expected 2 or 4)"
150
+ end
151
+ (left.keys | right.keys).each do |k|
152
+ if left[k] != right[k]
153
+ if left[k].is_a?(Hash) && right[k].is_a?(Hash)
154
+ diff[k] = deep_diff(ltag, left[k], rtag, right[k])
155
+ else
156
+ diff[k] = OrderedHash.[](ltag, left[k], rtag, right[k])
157
+ end
158
+ end
159
+ end
160
+ diff
161
+ end
162
+
163
+ # @api internal
164
+ # @return [String] Generate JSON output from a hash of the object's variables
165
+ def to_json(summary=false)
166
+ hash=self.to_hash(summary)
167
+ JSON.pretty_generate(hash)
168
+ end
169
+
170
+ # @api internal
171
+ # @return [String] Generate YAML output from a hash of the object's variables
172
+ def to_yaml(summary=false)
173
+ self.to_hash(summary).to_yaml
174
+ end
175
+
176
+ end
177
+
178
+ end
@@ -0,0 +1,34 @@
1
+ require 'moab_stanford'
2
+
3
+ module Stanford
4
+
5
+ # Utility Class for extracting content or other information from a Fedora Instance
6
+ #
7
+ # ====Data Model
8
+ # * {DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)
9
+ # * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparsions
10
+ # * <b>{ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance</b>
11
+ #
12
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
13
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
14
+ class ActiveFedoraObject
15
+
16
+ # @param fedora_object [Object] The Active Fedora representation of the Fedora Object
17
+ # @return [Stanford::ActiveFedoraObject] Create a u
18
+ def initialize(fedora_object)
19
+ @fedora_object = fedora_object
20
+ end
21
+
22
+ # @return [Object] The Active Fedora representation of the Fedora Object
23
+ attr_accessor :fedora_object
24
+
25
+ # @api external
26
+ # @param ds_id [String] The datastream identifier
27
+ # @return [String] The content of the specified datastream
28
+ def get_datastream_content(ds_id)
29
+ @fedora_object.datastreams[ds_id].content
30
+ end
31
+
32
+ end
33
+
34
+ end
@@ -0,0 +1,236 @@
1
+ require 'moab_stanford'
2
+
3
+ module Stanford
4
+
5
+ # Stanford-specific utility methods for transforming contentMetadata to versionInventory and doing
6
+ #
7
+ # ====Data Model
8
+ # * {DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)
9
+ # * <b>{ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparsions</b>
10
+ # * {ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance
11
+ #
12
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
13
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
14
+ class ContentInventory
15
+
16
+ # @param content_metadata [String] The content metadata to be transformed into a versionInventory
17
+ # @param object_id [String] The identifier of the digital object
18
+ # @param subset [String] Speciifes which subset of files to list (all|preserve|publish|shelve)
19
+ # @param version_id [Integer] The ID of the version whosen content metadata is to be transformed
20
+ # @return [FileInventory] The versionInventory equivalent of the contentMetadata
21
+ # if the supplied content_metadata is blank or empty, then a skeletal FileInventory will be returned
22
+ def inventory_from_cm(content_metadata, object_id, subset, version_id=nil)
23
+ # The contentMetadata datastream is not required for ingest, since some object types, such as collection or APO do not require one.
24
+ # Many of these objects have contentMetadata with no child elements, such as this:
25
+ # <contentMetadata objectId="bd608mj3166" type="file"/>
26
+ # but there are also objects that have no datasteam of this name at all
27
+ cm_inventory = FileInventory.new(:type=>"version",:digital_object_id=>object_id, :version_id=>version_id)
28
+ content_group = group_from_cm(content_metadata, subset)
29
+ cm_inventory.groups << content_group
30
+ cm_inventory
31
+ end
32
+
33
+ # @api external
34
+ # @param content_metadata [String] The contentMetadata as a string
35
+ # @param subset [String] Speciifes which subset of files to list (all|preserve|publish|shelve)
36
+ # @return [FileGroup] The {FileGroup} object generated from a contentMetadata instance
37
+ # @example {include:file:spec/features/stanford/content_metadata_read_spec.rb}
38
+ def group_from_cm(content_metadata, subset)
39
+ ng_doc = Nokogiri::XML(content_metadata)
40
+ validate_content_metadata(ng_doc)
41
+ nodeset = case subset.to_s.downcase
42
+ when 'preserve'
43
+ ng_doc.xpath("//file[@preserve='yes']")
44
+ when 'publish'
45
+ ng_doc.xpath("//file[@publish='yes']")
46
+ when 'shelve'
47
+ ng_doc.xpath("//file[@shelve='yes']")
48
+ when 'all'
49
+ ng_doc.xpath("//file")
50
+ else
51
+ raise "Unknown disposition subset (#{subset})"
52
+ end
53
+ content_group = FileGroup.new(:group_id=>'content', :data_source => "contentMetadata-#{subset}")
54
+ nodeset.each do |file_node|
55
+ signature = generate_signature(file_node)
56
+ instance = generate_instance(file_node)
57
+ content_group.add_file_instance(signature, instance)
58
+ end
59
+ content_group
60
+ end
61
+
62
+ # @api internal
63
+ # @param node [Nokogiri::XML::Node] The XML node containing file information
64
+ # @return [FileSignature] The {FileSignature} object generated from the XML data
65
+ def generate_signature(node)
66
+ signature = FileSignature.new()
67
+ signature.size = node.attributes['size'].content
68
+ checksum_nodes = node.xpath('checksum')
69
+ checksum_nodes.each do |checksum_node|
70
+ case checksum_node.attributes['type'].content.upcase
71
+ when 'MD5'
72
+ signature.md5 = checksum_node.text
73
+ when 'SHA1', 'SHA-1'
74
+ signature.sha1 = checksum_node.text
75
+ when 'SHA256', 'SHA-256'
76
+ signature.sha256 = checksum_node.text
77
+ end
78
+ end
79
+ signature
80
+ end
81
+
82
+ # @api internal
83
+ # @param node (see #generate_signature)
84
+ # @return [FileInstance] The {FileInstance} object generated from the XML data
85
+ def generate_instance(node)
86
+ instance = FileInstance.new()
87
+ instance.path = node.attributes['id'].content
88
+ instance.datetime = node.attributes['datetime'].content rescue nil
89
+ instance
90
+ end
91
+
92
+ # @api external
93
+ # @param file_group [FileGroup] The {FileGroup} object used as the data source
94
+ # @return [String] The contentMetadata instance generated from the FileGroup
95
+ # @example {include:file:spec/features/stanford/content_metadata_write_spec.rb}
96
+ def generate_content_metadata(file_group, object_id, version_id)
97
+ cm = Nokogiri::XML::Builder.new do |xml|
98
+ xml.contentMetadata(:type=>"sample", :objectId=>object_id) {
99
+ xml.resource(:type=>"version", :sequence=>"1", :id=>"version-#{version_id.to_s}") {
100
+ file_group.files.each do |file_manifestation|
101
+ signature = file_manifestation.signature
102
+ file_manifestation.instances.each do |instance|
103
+ xml.file(
104
+ :id=>instance.path,
105
+ :size=>signature.size,
106
+ :datetime=>instance.datetime,
107
+ :shelve=>'yes',
108
+ :publish=>'yes',
109
+ :preserve=>'yes') {
110
+ fixity = signature.fixity
111
+ xml.checksum(:type=>"MD5") {xml.text signature.md5 } if fixity[:md5]
112
+ xml.checksum(:type=>"SHA-1") {xml.text signature.sha1} if fixity[:sha1]
113
+ xml.checksum(:type=>"SHA-256") {xml.text signature.sha256} if fixity[:sha256]
114
+ }
115
+ end
116
+ end
117
+ }
118
+ }
119
+ end
120
+ cm.to_xml
121
+ end
122
+
123
+ # @param content_metadata [String,Nokogiri::XML::Document] The contentMetadata as a string or XML doc
124
+ # @return [Boolean] True if contentMetadata has essetial file attributes, else raise exception
125
+ def validate_content_metadata(content_metadata)
126
+ result = validate_content_metadata_details(content_metadata)
127
+ raise Moab::InvalidMetadataException, result[0]+" ..." if result.size > 0
128
+ true
129
+ end
130
+
131
+ # @param content_metadata [String, Nokogiri::XML::Document] The contentMetadata as a string or XML doc
132
+ # @return [Array<String>] List of problems found
133
+ def validate_content_metadata_details(content_metadata)
134
+ result = []
135
+ content_metadata_doc =
136
+ case content_metadata.class.name
137
+ when "String"
138
+ Nokogiri::XML(content_metadata)
139
+ when "Pathname"
140
+ Nokogiri::XML(content_metadata.read)
141
+ when "Nokogiri::XML::Document"
142
+ content_metadata
143
+ else
144
+ raise Moab::InvalidMetadataException, "Content Metadata is in unrecognized format"
145
+ end
146
+ nodeset = content_metadata_doc.xpath("//file")
147
+ nodeset.each do |file_node|
148
+ missing = ['id', 'size','md5','sha1']
149
+ missing.delete('id') if file_node.has_attribute?('id')
150
+ missing.delete('size') if file_node.has_attribute?('size')
151
+ checksum_nodes = file_node.xpath('checksum')
152
+ checksum_nodes.each do |checksum_node|
153
+ case checksum_node.attributes['type'].content.upcase
154
+ when 'MD5'
155
+ missing.delete('md5')
156
+ when 'SHA1', 'SHA-1'
157
+ missing.delete('sha1')
158
+ end
159
+ end
160
+ if missing.include?('id')
161
+ result << "File node #{nodeset.index(file_node)} is missing #{missing.join(',')}"
162
+ elsif missing.size > 0
163
+ id = file_node['id']
164
+ result << "File node having id='#{id}' is missing #{missing.join(',')}"
165
+ end
166
+ end
167
+ result
168
+ end
169
+
170
+ # @param content_metadata [String] The contentMetadata as a string
171
+ # @param content_group [FileGroup] The {FileGroup} object used as the fixity data source
172
+ # @return [String] Returns a remediated copy of the contentMetadata with fixity data filled in
173
+ # @see http://blog.slashpoundbang.com/post/1454850669/how-to-pretty-print-xml-with-nokogiri
174
+ def remediate_content_metadata(content_metadata, content_group)
175
+ return nil if content_metadata.nil?
176
+ return content_metadata if content_group.nil? or content_group.files.size < 1
177
+ signature_for_path = content_group.path_hash
178
+ @type_for_name = FileSignature.checksum_type_for_name
179
+ @names_for_type = FileSignature.checksum_names_for_type
180
+ ng_doc = Nokogiri::XML(content_metadata) { |x| x.noblanks }
181
+ nodeset = ng_doc.xpath("//file")
182
+ nodeset.each do |file_node|
183
+ filepath = file_node['id']
184
+ signature = signature_for_path[filepath]
185
+ remediate_file_size(file_node, signature)
186
+ remediate_checksum_nodes(file_node, signature)
187
+ end
188
+ ng_doc.to_xml(:indent => 2)
189
+ end
190
+
191
+ # @param [Nokogiri::XML::Element] file_node the File stanza being remediated
192
+ # @param [FileSignature] signature the fixity data for the file from the FileGroup
193
+ # @return [void] update the file size attribute if missing, raise exception if inconsistent
194
+ def remediate_file_size(file_node, signature)
195
+ file_size = file_node['size']
196
+ if file_size.nil? or file_size.empty?
197
+ file_node['size'] = signature.size.to_s
198
+ elsif file_size != signature.size.to_s
199
+ raise "Inconsistent size for #{file_node['id']}: #{file_size} != #{signature.size.to_s}"
200
+ end
201
+ end
202
+
203
+ # @param [Nokogiri::XML::Element] file_node the File stanza being remediated
204
+ # @param [FileSignature] signature the fixity data for the file from the FileGroup
205
+ # @return [void] update the file's checksum elements if data missing, raise exception if inconsistent
206
+ def remediate_checksum_nodes(file_node, signature)
207
+ # collect <checksum> elements for checksum types that are already present
208
+ checksum_nodes = OrderedHash.new
209
+ file_node.xpath('checksum').each do |checksum_node|
210
+ type = @type_for_name[checksum_node['type']]
211
+ checksum_nodes[type] = checksum_node
212
+ end
213
+ # add new <checksum> elements for the other checksum types that were missing
214
+ @names_for_type.each do |type, names|
215
+ unless checksum_nodes.has_key?(type)
216
+ checksum_node = Nokogiri::XML::Element.new('checksum',file_node.document)
217
+ checksum_node['type'] = names[0]
218
+ file_node << checksum_node
219
+ checksum_nodes[type] = checksum_node
220
+ end
221
+ end
222
+ # make sure the <checksum> element has a content value
223
+ checksum_nodes.each do |type,checksum_node|
224
+ cm_checksum = checksum_node.content
225
+ sig_checksum = signature.checksums[type]
226
+ if cm_checksum.nil? or cm_checksum.empty?
227
+ checksum_node.content = sig_checksum
228
+ elsif cm_checksum != sig_checksum
229
+ raise "Inconsistent #{type.to_s} for #{file_node['id']}: #{cm_checksum} != #{sig_checksum}"
230
+ end
231
+ end
232
+ end
233
+
234
+ end
235
+
236
+ end