moab-versioning 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/lib/moab.rb +59 -0
  3. data/lib/moab/bagger.rb +289 -0
  4. data/lib/moab/config.rb +21 -0
  5. data/lib/moab/exceptions.rb +18 -0
  6. data/lib/moab/file_group.rb +244 -0
  7. data/lib/moab/file_group_difference.rb +336 -0
  8. data/lib/moab/file_group_difference_subset.rb +45 -0
  9. data/lib/moab/file_instance.rb +82 -0
  10. data/lib/moab/file_instance_difference.rb +54 -0
  11. data/lib/moab/file_inventory.rb +279 -0
  12. data/lib/moab/file_inventory_difference.rb +132 -0
  13. data/lib/moab/file_manifestation.rb +85 -0
  14. data/lib/moab/file_signature.rb +200 -0
  15. data/lib/moab/signature_catalog.rb +195 -0
  16. data/lib/moab/signature_catalog_entry.rb +61 -0
  17. data/lib/moab/storage_object.rb +220 -0
  18. data/lib/moab/storage_object_version.rb +333 -0
  19. data/lib/moab/storage_repository.rb +57 -0
  20. data/lib/moab/storage_services.rb +104 -0
  21. data/lib/moab/verification_result.rb +83 -0
  22. data/lib/moab/version_metadata.rb +38 -0
  23. data/lib/moab/version_metadata_entry.rb +64 -0
  24. data/lib/moab/version_metadata_event.rb +47 -0
  25. data/lib/moab_stanford.rb +18 -0
  26. data/lib/monkey_patches.rb +65 -0
  27. data/lib/serializer.rb +36 -0
  28. data/lib/serializer/manifest.rb +76 -0
  29. data/lib/serializer/serializable.rb +178 -0
  30. data/lib/stanford/active_fedora_object.rb +34 -0
  31. data/lib/stanford/content_inventory.rb +236 -0
  32. data/lib/stanford/dor_metadata.rb +49 -0
  33. data/lib/stanford/storage_repository.rb +46 -0
  34. data/lib/stanford/storage_services.rb +66 -0
  35. data/lib/tasks/yard.rake +34 -0
  36. data/lib/tools/api_doc_generator.rb +396 -0
  37. data/lib/tools/spec_generator.rb +410 -0
  38. data/lib/tools/spec_generator_old.rb +49 -0
  39. metadata +252 -0
data/lib/serializer.rb ADDED
@@ -0,0 +1,36 @@
1
+ # Serializer is a module containing classes whose methods faciliate serialization
2
+ # of data fields to various formats. To obtain those benefits, a dependent class
3
+ # should inherit from {Serializable} or {Manifest}
4
+ # depending on whether XML serialization is required.
5
+ #
6
+ # ====Data Model
7
+ # * <b>{Serializable} = utility methods to faciliate serialization to Hash, JSON, or YAML</b>
8
+ # * {Manifest} = adds methods for marshalling/unmarshalling data to a persistent XML file format
9
+ #
10
+ # @see https://github.com/jnunemaker/happymapper
11
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
12
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
13
+ module Serializer
14
+ end
15
+
16
+ require 'nokogiri'
17
+ require 'happymapper'
18
+ if RUBY_VERSION < '1.9'
19
+ require 'hashery/ordered_hash'
20
+ include Hashery
21
+ else
22
+ require 'psych'
23
+ OrderedHash = Hash
24
+ end
25
+ require 'json'
26
+ require 'json/pure'
27
+ require 'pathname'
28
+ require 'fileutils'
29
+ require 'time'
30
+ require 'digest/md5'
31
+ require 'digest/sha1'
32
+
33
+ require 'monkey_patches'
34
+ require 'serializer/serializable'
35
+ require 'serializer/manifest'
36
+
@@ -0,0 +1,76 @@
1
+ module Serializer
2
+
3
+ # Subclass of {Serializable} that adds methods for marshalling/unmarshalling data
4
+ # to a persistent XML file format.
5
+ #
6
+ # ====Data Model
7
+ # * {Serializable} = utility methods to faciliate serialization to Hash, JSON, or YAML
8
+ # * <b>{Manifest} = subclass adds methods for marshalling/unmarshalling data to XML file format</b>
9
+ #
10
+ # @see Serializable
11
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
12
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
13
+ class Manifest < Serializable
14
+
15
+ include HappyMapper
16
+
17
+ # @api internal
18
+ # @param filename [String] Optional filename if one wishes to override the default filename
19
+ # @return [String] Returns the standard filename (derived from the class name) to be used for serializing an object
20
+ def self.xml_filename(filename=nil)
21
+ if filename
22
+ filename
23
+ else
24
+ cname = self.name.split(/::/).last
25
+ cname[0, 1].downcase + cname[1..-1] + '.xml'
26
+ end
27
+ end
28
+
29
+ # @api internal
30
+ # @param parent_dir [Pathname,String] The location of the directory in which the xml file is located
31
+ # @param filename [String] Optional filename if one wishes to override the default filename
32
+ # @return [Pathname] The location of the xml file
33
+ def self.xml_pathname(parent_dir, filename=nil)
34
+ Pathname.new(parent_dir).join(self.xml_filename(filename))
35
+ end
36
+
37
+ # @api external
38
+ # @param parent_dir [Pathname,String] The location of the directory in which the xml file is located
39
+ # @param filename [String] Optional filename if one wishes to override the default filename
40
+ # @return [Boolean] Returns true if the xml file exists
41
+ def self.xml_pathname_exist?(parent_dir, filename=nil)
42
+ self.xml_pathname(parent_dir, filename).exist?
43
+ end
44
+
45
+ # @api external
46
+ # @param parent_dir [Pathname,String] The location of the directory in which the xml file is located
47
+ # @param filename [String] Optional filename if one wishes to override the default filename
48
+ # @return [Serializable] Read the xml file and return the parsed XML
49
+ # @example {include:file:spec/features/serializer/read_xml_spec.rb}
50
+ def self.read_xml_file(parent_dir, filename=nil)
51
+ self.parse(self.xml_pathname(parent_dir, filename).read)
52
+ end
53
+
54
+ # @api external
55
+ # @param xml_object [Serializable]
56
+ # @param parent_dir [Pathname,String] The location of the directory in which the xml file is located
57
+ # @param filename [String] Optional filename if one wishes to override the default filename
58
+ # @return [void] Serializize the in-memory object to a xml file instance
59
+ def self.write_xml_file(xml_object, parent_dir, filename=nil)
60
+ parent_dir.mkpath
61
+ self.xml_pathname(parent_dir, filename).open('w') { |f| f << xml_object.to_xml }
62
+ nil
63
+ end
64
+
65
+ # @api external
66
+ # @param parent_dir [Pathname,String] The location of the directory in which the xml file is located
67
+ # @param filename [String] Optional filename if one wishes to override the default filename
68
+ # @return [void] Serializize the in-memory object to a xml file instance
69
+ # @example {include:file:spec/features/serializer/write_xml_spec.rb}
70
+ def write_xml_file(parent_dir, filename=nil)
71
+ self.class.write_xml_file(self, parent_dir, filename)
72
+ end
73
+
74
+ end
75
+
76
+ end
@@ -0,0 +1,178 @@
1
+ module Serializer
2
+
3
+ # Some utility methods to faciliate serialization of data fields to Hash, JSON, or YAML shared by all subclasses.
4
+ # This class assumes that HappyMapper is used for declaration of fields to be serialized.
5
+ #
6
+ # ====Data Model
7
+ # * <b>{Serializable} = utility methods to faciliate serialization to Hash, JSON, or YAML</b>
8
+ # * {Manifest} = adds methods for marshalling/unmarshalling data to a persistent XML file format
9
+ #
10
+ # @see https://github.com/jnunemaker/happymapper
11
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
12
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
13
+ class Serializable
14
+
15
+ include HappyMapper
16
+
17
+ # A flexible initializer based on the DataMapper "create factory" design pattern.
18
+ # @see http://datamapper.org/docs/create_and_destroy.html
19
+ # @see Serializable#initialize
20
+ # @param opts [Hash<Symbol,Object>] a hash containing any number of symbol => value pairs.
21
+ # The symbols should correspond to attributes declared using HappyMapper syntax
22
+ def initialize(opts={})
23
+ opts.each do |key, value|
24
+ if variable_names.include?(key.to_s) || key == :test
25
+ instance_variable_set("@#{key}", value)
26
+ else
27
+ raise "#{key} is not a variable name in #{self.class.name}"
28
+ end
29
+ end
30
+ end
31
+
32
+ # @api internal
33
+ # @return [Array] A list of HappyMapper xml attribute, element and text nodes declared for the class
34
+ def variables
35
+ attributes = self.class.attributes
36
+ elements = self.class.elements
37
+ attributes + elements
38
+ # text_node enhancement added by unhappymapper, which is not being used
39
+ # It enables elements having both attributes and a text value
40
+ #text_node = []
41
+ #if self.class.instance_variable_defined?("@text_node")
42
+ # text_node << self.class.instance_variable_get("@text_node")
43
+ #end
44
+ #attributes + elements + text_node
45
+ end
46
+
47
+ # @api internal
48
+ # @return [Array] Extract the names of the variables
49
+ def variable_names
50
+ variables.collect { |variable| variable.name}
51
+ end
52
+
53
+ # @api internal
54
+ # @return [String] Determine which attribute was marked as an object instance key.
55
+ # Keys are indicated by option :key=true when declaring the object's variables.
56
+ # This follows the same convention as used by DataMapper
57
+ # @see http://datamapper.org/docs/properties.html
58
+ def key_name
59
+ if not defined?(@key_name)
60
+ @key_name = nil
61
+ self.class.attributes.each do |attribute|
62
+ if attribute.options[:key]
63
+ @key_name = attribute.name
64
+ break
65
+ end
66
+ end
67
+ end
68
+ @key_name
69
+ end
70
+
71
+ # @api internal
72
+ # @return [String] For the current object instance, return the string to use as a hash key
73
+ def key
74
+ return self.send(key_name) if key_name
75
+ nil
76
+ end
77
+
78
+ # @api internal
79
+ # @param array [Array] The array to be converted to a hash
80
+ # @return [OrderedHash] Generate a hash from an array of objects.
81
+ # If the array member has a field tagged as a key, that field will be used as the hash.key.
82
+ # Otherwise the index position of the array member will be used as the key
83
+ def array_to_hash(array,summary=false)
84
+ item_hash = OrderedHash.new
85
+ array.each_index do |index|
86
+ item = array[index]
87
+ ikey = (item.respond_to?(:key) && item.key) ? item.key : index
88
+ item_hash[ikey] = item.respond_to?(:to_hash) ? item.to_hash(summary) : item
89
+ end
90
+ item_hash
91
+ end
92
+
93
+ # @api internal
94
+ # @return [OrderedHash] Recursively generate an OrderedHash containing the object's properties
95
+ # @param summary [Boolean] Controls the depth and detail of recursion
96
+ def to_hash(summary=false)
97
+ oh = OrderedHash.new
98
+ vars = summary ? variables.select{|v| summary_fields.include?(v.name)} : variables
99
+ vars.each do |variable|
100
+ key = variable.options[:tag] || variable.name.to_s
101
+ value = self.send(variable.name)
102
+ case value
103
+ when Array
104
+ oh[key] = array_to_hash(value,summary)
105
+ when Serializable
106
+ oh[key] = value.to_hash
107
+ else
108
+ oh[key] = value
109
+ end
110
+ end
111
+ oh
112
+ end
113
+
114
+ # @return [OrderedHash] Calls to_hash(summary=true)
115
+ def summary
116
+ self.to_hash(summary=true)
117
+ end
118
+
119
+ # @api internal
120
+ # @param other [Serializable] The other object being compared
121
+ # @return [OrderedHash] Generate a hash containing the differences between two objects of the same type
122
+ def diff(other)
123
+ raise "Cannot compare different classes" if self.class != other.class
124
+ left = other.to_hash
125
+ right = self.to_hash
126
+ if self.key.nil? or other.key.nil?
127
+ ltag = :old
128
+ rtag = :new
129
+ else
130
+ ltag = other.key
131
+ rtag = self.key
132
+ end
133
+ Serializable.deep_diff(ltag, left, rtag, right)
134
+ end
135
+
136
+ # @api internal
137
+ # @param hashes [Array<Hash>] The hashes to be compared, with optional name tags
138
+ # @return [OrderedHash] Generate a hash containing the differences between two hashes
139
+ # (recursively descend parallel trees of hashes)
140
+ # @see https://gist.github.com/146844
141
+ def Serializable.deep_diff(*hashes)
142
+ diff = OrderedHash.new
143
+ case hashes.length
144
+ when 4
145
+ ltag, left, rtag, right = hashes
146
+ when 2
147
+ ltag, left, rtag, right = :left, hashes[0], :right, hashes[1]
148
+ else
149
+ raise "wrong number of arguments (expected 2 or 4)"
150
+ end
151
+ (left.keys | right.keys).each do |k|
152
+ if left[k] != right[k]
153
+ if left[k].is_a?(Hash) && right[k].is_a?(Hash)
154
+ diff[k] = deep_diff(ltag, left[k], rtag, right[k])
155
+ else
156
+ diff[k] = OrderedHash.[](ltag, left[k], rtag, right[k])
157
+ end
158
+ end
159
+ end
160
+ diff
161
+ end
162
+
163
+ # @api internal
164
+ # @return [String] Generate JSON output from a hash of the object's variables
165
+ def to_json(summary=false)
166
+ hash=self.to_hash(summary)
167
+ JSON.pretty_generate(hash)
168
+ end
169
+
170
+ # @api internal
171
+ # @return [String] Generate YAML output from a hash of the object's variables
172
+ def to_yaml(summary=false)
173
+ self.to_hash(summary).to_yaml
174
+ end
175
+
176
+ end
177
+
178
+ end
@@ -0,0 +1,34 @@
1
+ require 'moab_stanford'
2
+
3
+ module Stanford
4
+
5
+ # Utility Class for extracting content or other information from a Fedora Instance
6
+ #
7
+ # ====Data Model
8
+ # * {DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)
9
+ # * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparsions
10
+ # * <b>{ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance</b>
11
+ #
12
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
13
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
14
+ class ActiveFedoraObject
15
+
16
+ # @param fedora_object [Object] The Active Fedora representation of the Fedora Object
17
+ # @return [Stanford::ActiveFedoraObject] Create a u
18
+ def initialize(fedora_object)
19
+ @fedora_object = fedora_object
20
+ end
21
+
22
+ # @return [Object] The Active Fedora representation of the Fedora Object
23
+ attr_accessor :fedora_object
24
+
25
+ # @api external
26
+ # @param ds_id [String] The datastream identifier
27
+ # @return [String] The content of the specified datastream
28
+ def get_datastream_content(ds_id)
29
+ @fedora_object.datastreams[ds_id].content
30
+ end
31
+
32
+ end
33
+
34
+ end
@@ -0,0 +1,236 @@
1
+ require 'moab_stanford'
2
+
3
+ module Stanford
4
+
5
+ # Stanford-specific utility methods for transforming contentMetadata to versionInventory and doing
6
+ #
7
+ # ====Data Model
8
+ # * {DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)
9
+ # * <b>{ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparsions</b>
10
+ # * {ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance
11
+ #
12
+ # @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
13
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
14
+ class ContentInventory
15
+
16
+ # @param content_metadata [String] The content metadata to be transformed into a versionInventory
17
+ # @param object_id [String] The identifier of the digital object
18
+ # @param subset [String] Speciifes which subset of files to list (all|preserve|publish|shelve)
19
+ # @param version_id [Integer] The ID of the version whosen content metadata is to be transformed
20
+ # @return [FileInventory] The versionInventory equivalent of the contentMetadata
21
+ # if the supplied content_metadata is blank or empty, then a skeletal FileInventory will be returned
22
+ def inventory_from_cm(content_metadata, object_id, subset, version_id=nil)
23
+ # The contentMetadata datastream is not required for ingest, since some object types, such as collection or APO do not require one.
24
+ # Many of these objects have contentMetadata with no child elements, such as this:
25
+ # <contentMetadata objectId="bd608mj3166" type="file"/>
26
+ # but there are also objects that have no datasteam of this name at all
27
+ cm_inventory = FileInventory.new(:type=>"version",:digital_object_id=>object_id, :version_id=>version_id)
28
+ content_group = group_from_cm(content_metadata, subset)
29
+ cm_inventory.groups << content_group
30
+ cm_inventory
31
+ end
32
+
33
+ # @api external
34
+ # @param content_metadata [String] The contentMetadata as a string
35
+ # @param subset [String] Speciifes which subset of files to list (all|preserve|publish|shelve)
36
+ # @return [FileGroup] The {FileGroup} object generated from a contentMetadata instance
37
+ # @example {include:file:spec/features/stanford/content_metadata_read_spec.rb}
38
+ def group_from_cm(content_metadata, subset)
39
+ ng_doc = Nokogiri::XML(content_metadata)
40
+ validate_content_metadata(ng_doc)
41
+ nodeset = case subset.to_s.downcase
42
+ when 'preserve'
43
+ ng_doc.xpath("//file[@preserve='yes']")
44
+ when 'publish'
45
+ ng_doc.xpath("//file[@publish='yes']")
46
+ when 'shelve'
47
+ ng_doc.xpath("//file[@shelve='yes']")
48
+ when 'all'
49
+ ng_doc.xpath("//file")
50
+ else
51
+ raise "Unknown disposition subset (#{subset})"
52
+ end
53
+ content_group = FileGroup.new(:group_id=>'content', :data_source => "contentMetadata-#{subset}")
54
+ nodeset.each do |file_node|
55
+ signature = generate_signature(file_node)
56
+ instance = generate_instance(file_node)
57
+ content_group.add_file_instance(signature, instance)
58
+ end
59
+ content_group
60
+ end
61
+
62
+ # @api internal
63
+ # @param node [Nokogiri::XML::Node] The XML node containing file information
64
+ # @return [FileSignature] The {FileSignature} object generated from the XML data
65
+ def generate_signature(node)
66
+ signature = FileSignature.new()
67
+ signature.size = node.attributes['size'].content
68
+ checksum_nodes = node.xpath('checksum')
69
+ checksum_nodes.each do |checksum_node|
70
+ case checksum_node.attributes['type'].content.upcase
71
+ when 'MD5'
72
+ signature.md5 = checksum_node.text
73
+ when 'SHA1', 'SHA-1'
74
+ signature.sha1 = checksum_node.text
75
+ when 'SHA256', 'SHA-256'
76
+ signature.sha256 = checksum_node.text
77
+ end
78
+ end
79
+ signature
80
+ end
81
+
82
+ # @api internal
83
+ # @param node (see #generate_signature)
84
+ # @return [FileInstance] The {FileInstance} object generated from the XML data
85
+ def generate_instance(node)
86
+ instance = FileInstance.new()
87
+ instance.path = node.attributes['id'].content
88
+ instance.datetime = node.attributes['datetime'].content rescue nil
89
+ instance
90
+ end
91
+
92
+ # @api external
93
+ # @param file_group [FileGroup] The {FileGroup} object used as the data source
94
+ # @return [String] The contentMetadata instance generated from the FileGroup
95
+ # @example {include:file:spec/features/stanford/content_metadata_write_spec.rb}
96
+ def generate_content_metadata(file_group, object_id, version_id)
97
+ cm = Nokogiri::XML::Builder.new do |xml|
98
+ xml.contentMetadata(:type=>"sample", :objectId=>object_id) {
99
+ xml.resource(:type=>"version", :sequence=>"1", :id=>"version-#{version_id.to_s}") {
100
+ file_group.files.each do |file_manifestation|
101
+ signature = file_manifestation.signature
102
+ file_manifestation.instances.each do |instance|
103
+ xml.file(
104
+ :id=>instance.path,
105
+ :size=>signature.size,
106
+ :datetime=>instance.datetime,
107
+ :shelve=>'yes',
108
+ :publish=>'yes',
109
+ :preserve=>'yes') {
110
+ fixity = signature.fixity
111
+ xml.checksum(:type=>"MD5") {xml.text signature.md5 } if fixity[:md5]
112
+ xml.checksum(:type=>"SHA-1") {xml.text signature.sha1} if fixity[:sha1]
113
+ xml.checksum(:type=>"SHA-256") {xml.text signature.sha256} if fixity[:sha256]
114
+ }
115
+ end
116
+ end
117
+ }
118
+ }
119
+ end
120
+ cm.to_xml
121
+ end
122
+
123
+ # @param content_metadata [String,Nokogiri::XML::Document] The contentMetadata as a string or XML doc
124
+ # @return [Boolean] True if contentMetadata has essetial file attributes, else raise exception
125
+ def validate_content_metadata(content_metadata)
126
+ result = validate_content_metadata_details(content_metadata)
127
+ raise Moab::InvalidMetadataException, result[0]+" ..." if result.size > 0
128
+ true
129
+ end
130
+
131
+ # @param content_metadata [String, Nokogiri::XML::Document] The contentMetadata as a string or XML doc
132
+ # @return [Array<String>] List of problems found
133
+ def validate_content_metadata_details(content_metadata)
134
+ result = []
135
+ content_metadata_doc =
136
+ case content_metadata.class.name
137
+ when "String"
138
+ Nokogiri::XML(content_metadata)
139
+ when "Pathname"
140
+ Nokogiri::XML(content_metadata.read)
141
+ when "Nokogiri::XML::Document"
142
+ content_metadata
143
+ else
144
+ raise Moab::InvalidMetadataException, "Content Metadata is in unrecognized format"
145
+ end
146
+ nodeset = content_metadata_doc.xpath("//file")
147
+ nodeset.each do |file_node|
148
+ missing = ['id', 'size','md5','sha1']
149
+ missing.delete('id') if file_node.has_attribute?('id')
150
+ missing.delete('size') if file_node.has_attribute?('size')
151
+ checksum_nodes = file_node.xpath('checksum')
152
+ checksum_nodes.each do |checksum_node|
153
+ case checksum_node.attributes['type'].content.upcase
154
+ when 'MD5'
155
+ missing.delete('md5')
156
+ when 'SHA1', 'SHA-1'
157
+ missing.delete('sha1')
158
+ end
159
+ end
160
+ if missing.include?('id')
161
+ result << "File node #{nodeset.index(file_node)} is missing #{missing.join(',')}"
162
+ elsif missing.size > 0
163
+ id = file_node['id']
164
+ result << "File node having id='#{id}' is missing #{missing.join(',')}"
165
+ end
166
+ end
167
+ result
168
+ end
169
+
170
+ # @param content_metadata [String] The contentMetadata as a string
171
+ # @param content_group [FileGroup] The {FileGroup} object used as the fixity data source
172
+ # @return [String] Returns a remediated copy of the contentMetadata with fixity data filled in
173
+ # @see http://blog.slashpoundbang.com/post/1454850669/how-to-pretty-print-xml-with-nokogiri
174
+ def remediate_content_metadata(content_metadata, content_group)
175
+ return nil if content_metadata.nil?
176
+ return content_metadata if content_group.nil? or content_group.files.size < 1
177
+ signature_for_path = content_group.path_hash
178
+ @type_for_name = FileSignature.checksum_type_for_name
179
+ @names_for_type = FileSignature.checksum_names_for_type
180
+ ng_doc = Nokogiri::XML(content_metadata) { |x| x.noblanks }
181
+ nodeset = ng_doc.xpath("//file")
182
+ nodeset.each do |file_node|
183
+ filepath = file_node['id']
184
+ signature = signature_for_path[filepath]
185
+ remediate_file_size(file_node, signature)
186
+ remediate_checksum_nodes(file_node, signature)
187
+ end
188
+ ng_doc.to_xml(:indent => 2)
189
+ end
190
+
191
+ # @param [Nokogiri::XML::Element] file_node the File stanza being remediated
192
+ # @param [FileSignature] signature the fixity data for the file from the FileGroup
193
+ # @return [void] update the file size attribute if missing, raise exception if inconsistent
194
+ def remediate_file_size(file_node, signature)
195
+ file_size = file_node['size']
196
+ if file_size.nil? or file_size.empty?
197
+ file_node['size'] = signature.size.to_s
198
+ elsif file_size != signature.size.to_s
199
+ raise "Inconsistent size for #{file_node['id']}: #{file_size} != #{signature.size.to_s}"
200
+ end
201
+ end
202
+
203
+ # @param [Nokogiri::XML::Element] file_node the File stanza being remediated
204
+ # @param [FileSignature] signature the fixity data for the file from the FileGroup
205
+ # @return [void] update the file's checksum elements if data missing, raise exception if inconsistent
206
+ def remediate_checksum_nodes(file_node, signature)
207
+ # collect <checksum> elements for checksum types that are already present
208
+ checksum_nodes = OrderedHash.new
209
+ file_node.xpath('checksum').each do |checksum_node|
210
+ type = @type_for_name[checksum_node['type']]
211
+ checksum_nodes[type] = checksum_node
212
+ end
213
+ # add new <checksum> elements for the other checksum types that were missing
214
+ @names_for_type.each do |type, names|
215
+ unless checksum_nodes.has_key?(type)
216
+ checksum_node = Nokogiri::XML::Element.new('checksum',file_node.document)
217
+ checksum_node['type'] = names[0]
218
+ file_node << checksum_node
219
+ checksum_nodes[type] = checksum_node
220
+ end
221
+ end
222
+ # make sure the <checksum> element has a content value
223
+ checksum_nodes.each do |type,checksum_node|
224
+ cm_checksum = checksum_node.content
225
+ sig_checksum = signature.checksums[type]
226
+ if cm_checksum.nil? or cm_checksum.empty?
227
+ checksum_node.content = sig_checksum
228
+ elsif cm_checksum != sig_checksum
229
+ raise "Inconsistent #{type.to_s} for #{file_node['id']}: #{cm_checksum} != #{sig_checksum}"
230
+ end
231
+ end
232
+ end
233
+
234
+ end
235
+
236
+ end