moab-versioning 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/moab.rb +59 -0
- data/lib/moab/bagger.rb +289 -0
- data/lib/moab/config.rb +21 -0
- data/lib/moab/exceptions.rb +18 -0
- data/lib/moab/file_group.rb +244 -0
- data/lib/moab/file_group_difference.rb +336 -0
- data/lib/moab/file_group_difference_subset.rb +45 -0
- data/lib/moab/file_instance.rb +82 -0
- data/lib/moab/file_instance_difference.rb +54 -0
- data/lib/moab/file_inventory.rb +279 -0
- data/lib/moab/file_inventory_difference.rb +132 -0
- data/lib/moab/file_manifestation.rb +85 -0
- data/lib/moab/file_signature.rb +200 -0
- data/lib/moab/signature_catalog.rb +195 -0
- data/lib/moab/signature_catalog_entry.rb +61 -0
- data/lib/moab/storage_object.rb +220 -0
- data/lib/moab/storage_object_version.rb +333 -0
- data/lib/moab/storage_repository.rb +57 -0
- data/lib/moab/storage_services.rb +104 -0
- data/lib/moab/verification_result.rb +83 -0
- data/lib/moab/version_metadata.rb +38 -0
- data/lib/moab/version_metadata_entry.rb +64 -0
- data/lib/moab/version_metadata_event.rb +47 -0
- data/lib/moab_stanford.rb +18 -0
- data/lib/monkey_patches.rb +65 -0
- data/lib/serializer.rb +36 -0
- data/lib/serializer/manifest.rb +76 -0
- data/lib/serializer/serializable.rb +178 -0
- data/lib/stanford/active_fedora_object.rb +34 -0
- data/lib/stanford/content_inventory.rb +236 -0
- data/lib/stanford/dor_metadata.rb +49 -0
- data/lib/stanford/storage_repository.rb +46 -0
- data/lib/stanford/storage_services.rb +66 -0
- data/lib/tasks/yard.rake +34 -0
- data/lib/tools/api_doc_generator.rb +396 -0
- data/lib/tools/spec_generator.rb +410 -0
- data/lib/tools/spec_generator_old.rb +49 -0
- metadata +252 -0
data/lib/serializer.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# Serializer is a module containing classes whose methods faciliate serialization
|
2
|
+
# of data fields to various formats. To obtain those benefits, a dependent class
|
3
|
+
# should inherit from {Serializable} or {Manifest}
|
4
|
+
# depending on whether XML serialization is required.
|
5
|
+
#
|
6
|
+
# ====Data Model
|
7
|
+
# * <b>{Serializable} = utility methods to faciliate serialization to Hash, JSON, or YAML</b>
|
8
|
+
# * {Manifest} = adds methods for marshalling/unmarshalling data to a persistent XML file format
|
9
|
+
#
|
10
|
+
# @see https://github.com/jnunemaker/happymapper
|
11
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
12
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
13
|
+
module Serializer
|
14
|
+
end
|
15
|
+
|
16
|
+
require 'nokogiri'
|
17
|
+
require 'happymapper'
|
18
|
+
if RUBY_VERSION < '1.9'
|
19
|
+
require 'hashery/ordered_hash'
|
20
|
+
include Hashery
|
21
|
+
else
|
22
|
+
require 'psych'
|
23
|
+
OrderedHash = Hash
|
24
|
+
end
|
25
|
+
require 'json'
|
26
|
+
require 'json/pure'
|
27
|
+
require 'pathname'
|
28
|
+
require 'fileutils'
|
29
|
+
require 'time'
|
30
|
+
require 'digest/md5'
|
31
|
+
require 'digest/sha1'
|
32
|
+
|
33
|
+
require 'monkey_patches'
|
34
|
+
require 'serializer/serializable'
|
35
|
+
require 'serializer/manifest'
|
36
|
+
|
@@ -0,0 +1,76 @@
|
|
1
|
+
module Serializer
|
2
|
+
|
3
|
+
# Subclass of {Serializable} that adds methods for marshalling/unmarshalling data
|
4
|
+
# to a persistent XML file format.
|
5
|
+
#
|
6
|
+
# ====Data Model
|
7
|
+
# * {Serializable} = utility methods to faciliate serialization to Hash, JSON, or YAML
|
8
|
+
# * <b>{Manifest} = subclass adds methods for marshalling/unmarshalling data to XML file format</b>
|
9
|
+
#
|
10
|
+
# @see Serializable
|
11
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
12
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
13
|
+
class Manifest < Serializable
|
14
|
+
|
15
|
+
include HappyMapper
|
16
|
+
|
17
|
+
# @api internal
|
18
|
+
# @param filename [String] Optional filename if one wishes to override the default filename
|
19
|
+
# @return [String] Returns the standard filename (derived from the class name) to be used for serializing an object
|
20
|
+
def self.xml_filename(filename=nil)
|
21
|
+
if filename
|
22
|
+
filename
|
23
|
+
else
|
24
|
+
cname = self.name.split(/::/).last
|
25
|
+
cname[0, 1].downcase + cname[1..-1] + '.xml'
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# @api internal
|
30
|
+
# @param parent_dir [Pathname,String] The location of the directory in which the xml file is located
|
31
|
+
# @param filename [String] Optional filename if one wishes to override the default filename
|
32
|
+
# @return [Pathname] The location of the xml file
|
33
|
+
def self.xml_pathname(parent_dir, filename=nil)
|
34
|
+
Pathname.new(parent_dir).join(self.xml_filename(filename))
|
35
|
+
end
|
36
|
+
|
37
|
+
# @api external
|
38
|
+
# @param parent_dir [Pathname,String] The location of the directory in which the xml file is located
|
39
|
+
# @param filename [String] Optional filename if one wishes to override the default filename
|
40
|
+
# @return [Boolean] Returns true if the xml file exists
|
41
|
+
def self.xml_pathname_exist?(parent_dir, filename=nil)
|
42
|
+
self.xml_pathname(parent_dir, filename).exist?
|
43
|
+
end
|
44
|
+
|
45
|
+
# @api external
|
46
|
+
# @param parent_dir [Pathname,String] The location of the directory in which the xml file is located
|
47
|
+
# @param filename [String] Optional filename if one wishes to override the default filename
|
48
|
+
# @return [Serializable] Read the xml file and return the parsed XML
|
49
|
+
# @example {include:file:spec/features/serializer/read_xml_spec.rb}
|
50
|
+
def self.read_xml_file(parent_dir, filename=nil)
|
51
|
+
self.parse(self.xml_pathname(parent_dir, filename).read)
|
52
|
+
end
|
53
|
+
|
54
|
+
# @api external
|
55
|
+
# @param xml_object [Serializable]
|
56
|
+
# @param parent_dir [Pathname,String] The location of the directory in which the xml file is located
|
57
|
+
# @param filename [String] Optional filename if one wishes to override the default filename
|
58
|
+
# @return [void] Serializize the in-memory object to a xml file instance
|
59
|
+
def self.write_xml_file(xml_object, parent_dir, filename=nil)
|
60
|
+
parent_dir.mkpath
|
61
|
+
self.xml_pathname(parent_dir, filename).open('w') { |f| f << xml_object.to_xml }
|
62
|
+
nil
|
63
|
+
end
|
64
|
+
|
65
|
+
# @api external
|
66
|
+
# @param parent_dir [Pathname,String] The location of the directory in which the xml file is located
|
67
|
+
# @param filename [String] Optional filename if one wishes to override the default filename
|
68
|
+
# @return [void] Serializize the in-memory object to a xml file instance
|
69
|
+
# @example {include:file:spec/features/serializer/write_xml_spec.rb}
|
70
|
+
def write_xml_file(parent_dir, filename=nil)
|
71
|
+
self.class.write_xml_file(self, parent_dir, filename)
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
@@ -0,0 +1,178 @@
|
|
1
|
+
module Serializer
|
2
|
+
|
3
|
+
# Some utility methods to faciliate serialization of data fields to Hash, JSON, or YAML shared by all subclasses.
|
4
|
+
# This class assumes that HappyMapper is used for declaration of fields to be serialized.
|
5
|
+
#
|
6
|
+
# ====Data Model
|
7
|
+
# * <b>{Serializable} = utility methods to faciliate serialization to Hash, JSON, or YAML</b>
|
8
|
+
# * {Manifest} = adds methods for marshalling/unmarshalling data to a persistent XML file format
|
9
|
+
#
|
10
|
+
# @see https://github.com/jnunemaker/happymapper
|
11
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
12
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
13
|
+
class Serializable
|
14
|
+
|
15
|
+
include HappyMapper
|
16
|
+
|
17
|
+
# A flexible initializer based on the DataMapper "create factory" design pattern.
|
18
|
+
# @see http://datamapper.org/docs/create_and_destroy.html
|
19
|
+
# @see Serializable#initialize
|
20
|
+
# @param opts [Hash<Symbol,Object>] a hash containing any number of symbol => value pairs.
|
21
|
+
# The symbols should correspond to attributes declared using HappyMapper syntax
|
22
|
+
def initialize(opts={})
|
23
|
+
opts.each do |key, value|
|
24
|
+
if variable_names.include?(key.to_s) || key == :test
|
25
|
+
instance_variable_set("@#{key}", value)
|
26
|
+
else
|
27
|
+
raise "#{key} is not a variable name in #{self.class.name}"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# @api internal
|
33
|
+
# @return [Array] A list of HappyMapper xml attribute, element and text nodes declared for the class
|
34
|
+
def variables
|
35
|
+
attributes = self.class.attributes
|
36
|
+
elements = self.class.elements
|
37
|
+
attributes + elements
|
38
|
+
# text_node enhancement added by unhappymapper, which is not being used
|
39
|
+
# It enables elements having both attributes and a text value
|
40
|
+
#text_node = []
|
41
|
+
#if self.class.instance_variable_defined?("@text_node")
|
42
|
+
# text_node << self.class.instance_variable_get("@text_node")
|
43
|
+
#end
|
44
|
+
#attributes + elements + text_node
|
45
|
+
end
|
46
|
+
|
47
|
+
# @api internal
|
48
|
+
# @return [Array] Extract the names of the variables
|
49
|
+
def variable_names
|
50
|
+
variables.collect { |variable| variable.name}
|
51
|
+
end
|
52
|
+
|
53
|
+
# @api internal
|
54
|
+
# @return [String] Determine which attribute was marked as an object instance key.
|
55
|
+
# Keys are indicated by option :key=true when declaring the object's variables.
|
56
|
+
# This follows the same convention as used by DataMapper
|
57
|
+
# @see http://datamapper.org/docs/properties.html
|
58
|
+
def key_name
|
59
|
+
if not defined?(@key_name)
|
60
|
+
@key_name = nil
|
61
|
+
self.class.attributes.each do |attribute|
|
62
|
+
if attribute.options[:key]
|
63
|
+
@key_name = attribute.name
|
64
|
+
break
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
@key_name
|
69
|
+
end
|
70
|
+
|
71
|
+
# @api internal
|
72
|
+
# @return [String] For the current object instance, return the string to use as a hash key
|
73
|
+
def key
|
74
|
+
return self.send(key_name) if key_name
|
75
|
+
nil
|
76
|
+
end
|
77
|
+
|
78
|
+
# @api internal
|
79
|
+
# @param array [Array] The array to be converted to a hash
|
80
|
+
# @return [OrderedHash] Generate a hash from an array of objects.
|
81
|
+
# If the array member has a field tagged as a key, that field will be used as the hash.key.
|
82
|
+
# Otherwise the index position of the array member will be used as the key
|
83
|
+
def array_to_hash(array,summary=false)
|
84
|
+
item_hash = OrderedHash.new
|
85
|
+
array.each_index do |index|
|
86
|
+
item = array[index]
|
87
|
+
ikey = (item.respond_to?(:key) && item.key) ? item.key : index
|
88
|
+
item_hash[ikey] = item.respond_to?(:to_hash) ? item.to_hash(summary) : item
|
89
|
+
end
|
90
|
+
item_hash
|
91
|
+
end
|
92
|
+
|
93
|
+
# @api internal
|
94
|
+
# @return [OrderedHash] Recursively generate an OrderedHash containing the object's properties
|
95
|
+
# @param summary [Boolean] Controls the depth and detail of recursion
|
96
|
+
def to_hash(summary=false)
|
97
|
+
oh = OrderedHash.new
|
98
|
+
vars = summary ? variables.select{|v| summary_fields.include?(v.name)} : variables
|
99
|
+
vars.each do |variable|
|
100
|
+
key = variable.options[:tag] || variable.name.to_s
|
101
|
+
value = self.send(variable.name)
|
102
|
+
case value
|
103
|
+
when Array
|
104
|
+
oh[key] = array_to_hash(value,summary)
|
105
|
+
when Serializable
|
106
|
+
oh[key] = value.to_hash
|
107
|
+
else
|
108
|
+
oh[key] = value
|
109
|
+
end
|
110
|
+
end
|
111
|
+
oh
|
112
|
+
end
|
113
|
+
|
114
|
+
# @return [OrderedHash] Calls to_hash(summary=true)
|
115
|
+
def summary
|
116
|
+
self.to_hash(summary=true)
|
117
|
+
end
|
118
|
+
|
119
|
+
# @api internal
|
120
|
+
# @param other [Serializable] The other object being compared
|
121
|
+
# @return [OrderedHash] Generate a hash containing the differences between two objects of the same type
|
122
|
+
def diff(other)
|
123
|
+
raise "Cannot compare different classes" if self.class != other.class
|
124
|
+
left = other.to_hash
|
125
|
+
right = self.to_hash
|
126
|
+
if self.key.nil? or other.key.nil?
|
127
|
+
ltag = :old
|
128
|
+
rtag = :new
|
129
|
+
else
|
130
|
+
ltag = other.key
|
131
|
+
rtag = self.key
|
132
|
+
end
|
133
|
+
Serializable.deep_diff(ltag, left, rtag, right)
|
134
|
+
end
|
135
|
+
|
136
|
+
# @api internal
|
137
|
+
# @param hashes [Array<Hash>] The hashes to be compared, with optional name tags
|
138
|
+
# @return [OrderedHash] Generate a hash containing the differences between two hashes
|
139
|
+
# (recursively descend parallel trees of hashes)
|
140
|
+
# @see https://gist.github.com/146844
|
141
|
+
def Serializable.deep_diff(*hashes)
|
142
|
+
diff = OrderedHash.new
|
143
|
+
case hashes.length
|
144
|
+
when 4
|
145
|
+
ltag, left, rtag, right = hashes
|
146
|
+
when 2
|
147
|
+
ltag, left, rtag, right = :left, hashes[0], :right, hashes[1]
|
148
|
+
else
|
149
|
+
raise "wrong number of arguments (expected 2 or 4)"
|
150
|
+
end
|
151
|
+
(left.keys | right.keys).each do |k|
|
152
|
+
if left[k] != right[k]
|
153
|
+
if left[k].is_a?(Hash) && right[k].is_a?(Hash)
|
154
|
+
diff[k] = deep_diff(ltag, left[k], rtag, right[k])
|
155
|
+
else
|
156
|
+
diff[k] = OrderedHash.[](ltag, left[k], rtag, right[k])
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
diff
|
161
|
+
end
|
162
|
+
|
163
|
+
# @api internal
|
164
|
+
# @return [String] Generate JSON output from a hash of the object's variables
|
165
|
+
def to_json(summary=false)
|
166
|
+
hash=self.to_hash(summary)
|
167
|
+
JSON.pretty_generate(hash)
|
168
|
+
end
|
169
|
+
|
170
|
+
# @api internal
|
171
|
+
# @return [String] Generate YAML output from a hash of the object's variables
|
172
|
+
def to_yaml(summary=false)
|
173
|
+
self.to_hash(summary).to_yaml
|
174
|
+
end
|
175
|
+
|
176
|
+
end
|
177
|
+
|
178
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'moab_stanford'
|
2
|
+
|
3
|
+
module Stanford
|
4
|
+
|
5
|
+
# Utility Class for extracting content or other information from a Fedora Instance
|
6
|
+
#
|
7
|
+
# ====Data Model
|
8
|
+
# * {DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)
|
9
|
+
# * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparsions
|
10
|
+
# * <b>{ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance</b>
|
11
|
+
#
|
12
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
13
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
14
|
+
class ActiveFedoraObject
|
15
|
+
|
16
|
+
# @param fedora_object [Object] The Active Fedora representation of the Fedora Object
|
17
|
+
# @return [Stanford::ActiveFedoraObject] Create a u
|
18
|
+
def initialize(fedora_object)
|
19
|
+
@fedora_object = fedora_object
|
20
|
+
end
|
21
|
+
|
22
|
+
# @return [Object] The Active Fedora representation of the Fedora Object
|
23
|
+
attr_accessor :fedora_object
|
24
|
+
|
25
|
+
# @api external
|
26
|
+
# @param ds_id [String] The datastream identifier
|
27
|
+
# @return [String] The content of the specified datastream
|
28
|
+
def get_datastream_content(ds_id)
|
29
|
+
@fedora_object.datastreams[ds_id].content
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -0,0 +1,236 @@
|
|
1
|
+
require 'moab_stanford'
|
2
|
+
|
3
|
+
module Stanford
|
4
|
+
|
5
|
+
# Stanford-specific utility methods for transforming contentMetadata to versionInventory and doing
|
6
|
+
#
|
7
|
+
# ====Data Model
|
8
|
+
# * {DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)
|
9
|
+
# * <b>{ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparsions</b>
|
10
|
+
# * {ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance
|
11
|
+
#
|
12
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
13
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
14
|
+
class ContentInventory
|
15
|
+
|
16
|
+
# @param content_metadata [String] The content metadata to be transformed into a versionInventory
|
17
|
+
# @param object_id [String] The identifier of the digital object
|
18
|
+
# @param subset [String] Speciifes which subset of files to list (all|preserve|publish|shelve)
|
19
|
+
# @param version_id [Integer] The ID of the version whosen content metadata is to be transformed
|
20
|
+
# @return [FileInventory] The versionInventory equivalent of the contentMetadata
|
21
|
+
# if the supplied content_metadata is blank or empty, then a skeletal FileInventory will be returned
|
22
|
+
def inventory_from_cm(content_metadata, object_id, subset, version_id=nil)
|
23
|
+
# The contentMetadata datastream is not required for ingest, since some object types, such as collection or APO do not require one.
|
24
|
+
# Many of these objects have contentMetadata with no child elements, such as this:
|
25
|
+
# <contentMetadata objectId="bd608mj3166" type="file"/>
|
26
|
+
# but there are also objects that have no datasteam of this name at all
|
27
|
+
cm_inventory = FileInventory.new(:type=>"version",:digital_object_id=>object_id, :version_id=>version_id)
|
28
|
+
content_group = group_from_cm(content_metadata, subset)
|
29
|
+
cm_inventory.groups << content_group
|
30
|
+
cm_inventory
|
31
|
+
end
|
32
|
+
|
33
|
+
# @api external
|
34
|
+
# @param content_metadata [String] The contentMetadata as a string
|
35
|
+
# @param subset [String] Speciifes which subset of files to list (all|preserve|publish|shelve)
|
36
|
+
# @return [FileGroup] The {FileGroup} object generated from a contentMetadata instance
|
37
|
+
# @example {include:file:spec/features/stanford/content_metadata_read_spec.rb}
|
38
|
+
def group_from_cm(content_metadata, subset)
|
39
|
+
ng_doc = Nokogiri::XML(content_metadata)
|
40
|
+
validate_content_metadata(ng_doc)
|
41
|
+
nodeset = case subset.to_s.downcase
|
42
|
+
when 'preserve'
|
43
|
+
ng_doc.xpath("//file[@preserve='yes']")
|
44
|
+
when 'publish'
|
45
|
+
ng_doc.xpath("//file[@publish='yes']")
|
46
|
+
when 'shelve'
|
47
|
+
ng_doc.xpath("//file[@shelve='yes']")
|
48
|
+
when 'all'
|
49
|
+
ng_doc.xpath("//file")
|
50
|
+
else
|
51
|
+
raise "Unknown disposition subset (#{subset})"
|
52
|
+
end
|
53
|
+
content_group = FileGroup.new(:group_id=>'content', :data_source => "contentMetadata-#{subset}")
|
54
|
+
nodeset.each do |file_node|
|
55
|
+
signature = generate_signature(file_node)
|
56
|
+
instance = generate_instance(file_node)
|
57
|
+
content_group.add_file_instance(signature, instance)
|
58
|
+
end
|
59
|
+
content_group
|
60
|
+
end
|
61
|
+
|
62
|
+
# @api internal
|
63
|
+
# @param node [Nokogiri::XML::Node] The XML node containing file information
|
64
|
+
# @return [FileSignature] The {FileSignature} object generated from the XML data
|
65
|
+
def generate_signature(node)
|
66
|
+
signature = FileSignature.new()
|
67
|
+
signature.size = node.attributes['size'].content
|
68
|
+
checksum_nodes = node.xpath('checksum')
|
69
|
+
checksum_nodes.each do |checksum_node|
|
70
|
+
case checksum_node.attributes['type'].content.upcase
|
71
|
+
when 'MD5'
|
72
|
+
signature.md5 = checksum_node.text
|
73
|
+
when 'SHA1', 'SHA-1'
|
74
|
+
signature.sha1 = checksum_node.text
|
75
|
+
when 'SHA256', 'SHA-256'
|
76
|
+
signature.sha256 = checksum_node.text
|
77
|
+
end
|
78
|
+
end
|
79
|
+
signature
|
80
|
+
end
|
81
|
+
|
82
|
+
# @api internal
|
83
|
+
# @param node (see #generate_signature)
|
84
|
+
# @return [FileInstance] The {FileInstance} object generated from the XML data
|
85
|
+
def generate_instance(node)
|
86
|
+
instance = FileInstance.new()
|
87
|
+
instance.path = node.attributes['id'].content
|
88
|
+
instance.datetime = node.attributes['datetime'].content rescue nil
|
89
|
+
instance
|
90
|
+
end
|
91
|
+
|
92
|
+
# @api external
|
93
|
+
# @param file_group [FileGroup] The {FileGroup} object used as the data source
|
94
|
+
# @return [String] The contentMetadata instance generated from the FileGroup
|
95
|
+
# @example {include:file:spec/features/stanford/content_metadata_write_spec.rb}
|
96
|
+
def generate_content_metadata(file_group, object_id, version_id)
|
97
|
+
cm = Nokogiri::XML::Builder.new do |xml|
|
98
|
+
xml.contentMetadata(:type=>"sample", :objectId=>object_id) {
|
99
|
+
xml.resource(:type=>"version", :sequence=>"1", :id=>"version-#{version_id.to_s}") {
|
100
|
+
file_group.files.each do |file_manifestation|
|
101
|
+
signature = file_manifestation.signature
|
102
|
+
file_manifestation.instances.each do |instance|
|
103
|
+
xml.file(
|
104
|
+
:id=>instance.path,
|
105
|
+
:size=>signature.size,
|
106
|
+
:datetime=>instance.datetime,
|
107
|
+
:shelve=>'yes',
|
108
|
+
:publish=>'yes',
|
109
|
+
:preserve=>'yes') {
|
110
|
+
fixity = signature.fixity
|
111
|
+
xml.checksum(:type=>"MD5") {xml.text signature.md5 } if fixity[:md5]
|
112
|
+
xml.checksum(:type=>"SHA-1") {xml.text signature.sha1} if fixity[:sha1]
|
113
|
+
xml.checksum(:type=>"SHA-256") {xml.text signature.sha256} if fixity[:sha256]
|
114
|
+
}
|
115
|
+
end
|
116
|
+
end
|
117
|
+
}
|
118
|
+
}
|
119
|
+
end
|
120
|
+
cm.to_xml
|
121
|
+
end
|
122
|
+
|
123
|
+
# @param content_metadata [String,Nokogiri::XML::Document] The contentMetadata as a string or XML doc
|
124
|
+
# @return [Boolean] True if contentMetadata has essetial file attributes, else raise exception
|
125
|
+
def validate_content_metadata(content_metadata)
|
126
|
+
result = validate_content_metadata_details(content_metadata)
|
127
|
+
raise Moab::InvalidMetadataException, result[0]+" ..." if result.size > 0
|
128
|
+
true
|
129
|
+
end
|
130
|
+
|
131
|
+
# @param content_metadata [String, Nokogiri::XML::Document] The contentMetadata as a string or XML doc
|
132
|
+
# @return [Array<String>] List of problems found
|
133
|
+
def validate_content_metadata_details(content_metadata)
|
134
|
+
result = []
|
135
|
+
content_metadata_doc =
|
136
|
+
case content_metadata.class.name
|
137
|
+
when "String"
|
138
|
+
Nokogiri::XML(content_metadata)
|
139
|
+
when "Pathname"
|
140
|
+
Nokogiri::XML(content_metadata.read)
|
141
|
+
when "Nokogiri::XML::Document"
|
142
|
+
content_metadata
|
143
|
+
else
|
144
|
+
raise Moab::InvalidMetadataException, "Content Metadata is in unrecognized format"
|
145
|
+
end
|
146
|
+
nodeset = content_metadata_doc.xpath("//file")
|
147
|
+
nodeset.each do |file_node|
|
148
|
+
missing = ['id', 'size','md5','sha1']
|
149
|
+
missing.delete('id') if file_node.has_attribute?('id')
|
150
|
+
missing.delete('size') if file_node.has_attribute?('size')
|
151
|
+
checksum_nodes = file_node.xpath('checksum')
|
152
|
+
checksum_nodes.each do |checksum_node|
|
153
|
+
case checksum_node.attributes['type'].content.upcase
|
154
|
+
when 'MD5'
|
155
|
+
missing.delete('md5')
|
156
|
+
when 'SHA1', 'SHA-1'
|
157
|
+
missing.delete('sha1')
|
158
|
+
end
|
159
|
+
end
|
160
|
+
if missing.include?('id')
|
161
|
+
result << "File node #{nodeset.index(file_node)} is missing #{missing.join(',')}"
|
162
|
+
elsif missing.size > 0
|
163
|
+
id = file_node['id']
|
164
|
+
result << "File node having id='#{id}' is missing #{missing.join(',')}"
|
165
|
+
end
|
166
|
+
end
|
167
|
+
result
|
168
|
+
end
|
169
|
+
|
170
|
+
# @param content_metadata [String] The contentMetadata as a string
|
171
|
+
# @param content_group [FileGroup] The {FileGroup} object used as the fixity data source
|
172
|
+
# @return [String] Returns a remediated copy of the contentMetadata with fixity data filled in
|
173
|
+
# @see http://blog.slashpoundbang.com/post/1454850669/how-to-pretty-print-xml-with-nokogiri
|
174
|
+
def remediate_content_metadata(content_metadata, content_group)
|
175
|
+
return nil if content_metadata.nil?
|
176
|
+
return content_metadata if content_group.nil? or content_group.files.size < 1
|
177
|
+
signature_for_path = content_group.path_hash
|
178
|
+
@type_for_name = FileSignature.checksum_type_for_name
|
179
|
+
@names_for_type = FileSignature.checksum_names_for_type
|
180
|
+
ng_doc = Nokogiri::XML(content_metadata) { |x| x.noblanks }
|
181
|
+
nodeset = ng_doc.xpath("//file")
|
182
|
+
nodeset.each do |file_node|
|
183
|
+
filepath = file_node['id']
|
184
|
+
signature = signature_for_path[filepath]
|
185
|
+
remediate_file_size(file_node, signature)
|
186
|
+
remediate_checksum_nodes(file_node, signature)
|
187
|
+
end
|
188
|
+
ng_doc.to_xml(:indent => 2)
|
189
|
+
end
|
190
|
+
|
191
|
+
# @param [Nokogiri::XML::Element] file_node the File stanza being remediated
|
192
|
+
# @param [FileSignature] signature the fixity data for the file from the FileGroup
|
193
|
+
# @return [void] update the file size attribute if missing, raise exception if inconsistent
|
194
|
+
def remediate_file_size(file_node, signature)
|
195
|
+
file_size = file_node['size']
|
196
|
+
if file_size.nil? or file_size.empty?
|
197
|
+
file_node['size'] = signature.size.to_s
|
198
|
+
elsif file_size != signature.size.to_s
|
199
|
+
raise "Inconsistent size for #{file_node['id']}: #{file_size} != #{signature.size.to_s}"
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
# @param [Nokogiri::XML::Element] file_node the File stanza being remediated
|
204
|
+
# @param [FileSignature] signature the fixity data for the file from the FileGroup
|
205
|
+
# @return [void] update the file's checksum elements if data missing, raise exception if inconsistent
|
206
|
+
def remediate_checksum_nodes(file_node, signature)
|
207
|
+
# collect <checksum> elements for checksum types that are already present
|
208
|
+
checksum_nodes = OrderedHash.new
|
209
|
+
file_node.xpath('checksum').each do |checksum_node|
|
210
|
+
type = @type_for_name[checksum_node['type']]
|
211
|
+
checksum_nodes[type] = checksum_node
|
212
|
+
end
|
213
|
+
# add new <checksum> elements for the other checksum types that were missing
|
214
|
+
@names_for_type.each do |type, names|
|
215
|
+
unless checksum_nodes.has_key?(type)
|
216
|
+
checksum_node = Nokogiri::XML::Element.new('checksum',file_node.document)
|
217
|
+
checksum_node['type'] = names[0]
|
218
|
+
file_node << checksum_node
|
219
|
+
checksum_nodes[type] = checksum_node
|
220
|
+
end
|
221
|
+
end
|
222
|
+
# make sure the <checksum> element has a content value
|
223
|
+
checksum_nodes.each do |type,checksum_node|
|
224
|
+
cm_checksum = checksum_node.content
|
225
|
+
sig_checksum = signature.checksums[type]
|
226
|
+
if cm_checksum.nil? or cm_checksum.empty?
|
227
|
+
checksum_node.content = sig_checksum
|
228
|
+
elsif cm_checksum != sig_checksum
|
229
|
+
raise "Inconsistent #{type.to_s} for #{file_node['id']}: #{cm_checksum} != #{sig_checksum}"
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
end
|
235
|
+
|
236
|
+
end
|