moab-versioning 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/moab.rb +59 -0
- data/lib/moab/bagger.rb +289 -0
- data/lib/moab/config.rb +21 -0
- data/lib/moab/exceptions.rb +18 -0
- data/lib/moab/file_group.rb +244 -0
- data/lib/moab/file_group_difference.rb +336 -0
- data/lib/moab/file_group_difference_subset.rb +45 -0
- data/lib/moab/file_instance.rb +82 -0
- data/lib/moab/file_instance_difference.rb +54 -0
- data/lib/moab/file_inventory.rb +279 -0
- data/lib/moab/file_inventory_difference.rb +132 -0
- data/lib/moab/file_manifestation.rb +85 -0
- data/lib/moab/file_signature.rb +200 -0
- data/lib/moab/signature_catalog.rb +195 -0
- data/lib/moab/signature_catalog_entry.rb +61 -0
- data/lib/moab/storage_object.rb +220 -0
- data/lib/moab/storage_object_version.rb +333 -0
- data/lib/moab/storage_repository.rb +57 -0
- data/lib/moab/storage_services.rb +104 -0
- data/lib/moab/verification_result.rb +83 -0
- data/lib/moab/version_metadata.rb +38 -0
- data/lib/moab/version_metadata_entry.rb +64 -0
- data/lib/moab/version_metadata_event.rb +47 -0
- data/lib/moab_stanford.rb +18 -0
- data/lib/monkey_patches.rb +65 -0
- data/lib/serializer.rb +36 -0
- data/lib/serializer/manifest.rb +76 -0
- data/lib/serializer/serializable.rb +178 -0
- data/lib/stanford/active_fedora_object.rb +34 -0
- data/lib/stanford/content_inventory.rb +236 -0
- data/lib/stanford/dor_metadata.rb +49 -0
- data/lib/stanford/storage_repository.rb +46 -0
- data/lib/stanford/storage_services.rb +66 -0
- data/lib/tasks/yard.rake +34 -0
- data/lib/tools/api_doc_generator.rb +396 -0
- data/lib/tools/spec_generator.rb +410 -0
- data/lib/tools/spec_generator_old.rb +49 -0
- metadata +252 -0
data/lib/serializer.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# Serializer is a module containing classes whose methods faciliate serialization
|
2
|
+
# of data fields to various formats. To obtain those benefits, a dependent class
|
3
|
+
# should inherit from {Serializable} or {Manifest}
|
4
|
+
# depending on whether XML serialization is required.
|
5
|
+
#
|
6
|
+
# ====Data Model
|
7
|
+
# * <b>{Serializable} = utility methods to faciliate serialization to Hash, JSON, or YAML</b>
|
8
|
+
# * {Manifest} = adds methods for marshalling/unmarshalling data to a persistent XML file format
|
9
|
+
#
|
10
|
+
# @see https://github.com/jnunemaker/happymapper
|
11
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
12
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
13
|
+
module Serializer
|
14
|
+
end
|
15
|
+
|
16
|
+
require 'nokogiri'
|
17
|
+
require 'happymapper'
|
18
|
+
if RUBY_VERSION < '1.9'
|
19
|
+
require 'hashery/ordered_hash'
|
20
|
+
include Hashery
|
21
|
+
else
|
22
|
+
require 'psych'
|
23
|
+
OrderedHash = Hash
|
24
|
+
end
|
25
|
+
require 'json'
|
26
|
+
require 'json/pure'
|
27
|
+
require 'pathname'
|
28
|
+
require 'fileutils'
|
29
|
+
require 'time'
|
30
|
+
require 'digest/md5'
|
31
|
+
require 'digest/sha1'
|
32
|
+
|
33
|
+
require 'monkey_patches'
|
34
|
+
require 'serializer/serializable'
|
35
|
+
require 'serializer/manifest'
|
36
|
+
|
@@ -0,0 +1,76 @@
|
|
1
|
+
module Serializer
|
2
|
+
|
3
|
+
# Subclass of {Serializable} that adds methods for marshalling/unmarshalling data
|
4
|
+
# to a persistent XML file format.
|
5
|
+
#
|
6
|
+
# ====Data Model
|
7
|
+
# * {Serializable} = utility methods to faciliate serialization to Hash, JSON, or YAML
|
8
|
+
# * <b>{Manifest} = subclass adds methods for marshalling/unmarshalling data to XML file format</b>
|
9
|
+
#
|
10
|
+
# @see Serializable
|
11
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
12
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
13
|
+
class Manifest < Serializable
|
14
|
+
|
15
|
+
include HappyMapper
|
16
|
+
|
17
|
+
# @api internal
|
18
|
+
# @param filename [String] Optional filename if one wishes to override the default filename
|
19
|
+
# @return [String] Returns the standard filename (derived from the class name) to be used for serializing an object
|
20
|
+
def self.xml_filename(filename=nil)
|
21
|
+
if filename
|
22
|
+
filename
|
23
|
+
else
|
24
|
+
cname = self.name.split(/::/).last
|
25
|
+
cname[0, 1].downcase + cname[1..-1] + '.xml'
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# @api internal
|
30
|
+
# @param parent_dir [Pathname,String] The location of the directory in which the xml file is located
|
31
|
+
# @param filename [String] Optional filename if one wishes to override the default filename
|
32
|
+
# @return [Pathname] The location of the xml file
|
33
|
+
def self.xml_pathname(parent_dir, filename=nil)
|
34
|
+
Pathname.new(parent_dir).join(self.xml_filename(filename))
|
35
|
+
end
|
36
|
+
|
37
|
+
# @api external
|
38
|
+
# @param parent_dir [Pathname,String] The location of the directory in which the xml file is located
|
39
|
+
# @param filename [String] Optional filename if one wishes to override the default filename
|
40
|
+
# @return [Boolean] Returns true if the xml file exists
|
41
|
+
def self.xml_pathname_exist?(parent_dir, filename=nil)
|
42
|
+
self.xml_pathname(parent_dir, filename).exist?
|
43
|
+
end
|
44
|
+
|
45
|
+
# @api external
|
46
|
+
# @param parent_dir [Pathname,String] The location of the directory in which the xml file is located
|
47
|
+
# @param filename [String] Optional filename if one wishes to override the default filename
|
48
|
+
# @return [Serializable] Read the xml file and return the parsed XML
|
49
|
+
# @example {include:file:spec/features/serializer/read_xml_spec.rb}
|
50
|
+
def self.read_xml_file(parent_dir, filename=nil)
|
51
|
+
self.parse(self.xml_pathname(parent_dir, filename).read)
|
52
|
+
end
|
53
|
+
|
54
|
+
# @api external
|
55
|
+
# @param xml_object [Serializable]
|
56
|
+
# @param parent_dir [Pathname,String] The location of the directory in which the xml file is located
|
57
|
+
# @param filename [String] Optional filename if one wishes to override the default filename
|
58
|
+
# @return [void] Serializize the in-memory object to a xml file instance
|
59
|
+
def self.write_xml_file(xml_object, parent_dir, filename=nil)
|
60
|
+
parent_dir.mkpath
|
61
|
+
self.xml_pathname(parent_dir, filename).open('w') { |f| f << xml_object.to_xml }
|
62
|
+
nil
|
63
|
+
end
|
64
|
+
|
65
|
+
# @api external
|
66
|
+
# @param parent_dir [Pathname,String] The location of the directory in which the xml file is located
|
67
|
+
# @param filename [String] Optional filename if one wishes to override the default filename
|
68
|
+
# @return [void] Serializize the in-memory object to a xml file instance
|
69
|
+
# @example {include:file:spec/features/serializer/write_xml_spec.rb}
|
70
|
+
def write_xml_file(parent_dir, filename=nil)
|
71
|
+
self.class.write_xml_file(self, parent_dir, filename)
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
@@ -0,0 +1,178 @@
|
|
1
|
+
module Serializer
|
2
|
+
|
3
|
+
# Some utility methods to faciliate serialization of data fields to Hash, JSON, or YAML shared by all subclasses.
|
4
|
+
# This class assumes that HappyMapper is used for declaration of fields to be serialized.
|
5
|
+
#
|
6
|
+
# ====Data Model
|
7
|
+
# * <b>{Serializable} = utility methods to faciliate serialization to Hash, JSON, or YAML</b>
|
8
|
+
# * {Manifest} = adds methods for marshalling/unmarshalling data to a persistent XML file format
|
9
|
+
#
|
10
|
+
# @see https://github.com/jnunemaker/happymapper
|
11
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
12
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
13
|
+
class Serializable
|
14
|
+
|
15
|
+
include HappyMapper
|
16
|
+
|
17
|
+
# A flexible initializer based on the DataMapper "create factory" design pattern.
|
18
|
+
# @see http://datamapper.org/docs/create_and_destroy.html
|
19
|
+
# @see Serializable#initialize
|
20
|
+
# @param opts [Hash<Symbol,Object>] a hash containing any number of symbol => value pairs.
|
21
|
+
# The symbols should correspond to attributes declared using HappyMapper syntax
|
22
|
+
def initialize(opts={})
|
23
|
+
opts.each do |key, value|
|
24
|
+
if variable_names.include?(key.to_s) || key == :test
|
25
|
+
instance_variable_set("@#{key}", value)
|
26
|
+
else
|
27
|
+
raise "#{key} is not a variable name in #{self.class.name}"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# @api internal
|
33
|
+
# @return [Array] A list of HappyMapper xml attribute, element and text nodes declared for the class
|
34
|
+
def variables
|
35
|
+
attributes = self.class.attributes
|
36
|
+
elements = self.class.elements
|
37
|
+
attributes + elements
|
38
|
+
# text_node enhancement added by unhappymapper, which is not being used
|
39
|
+
# It enables elements having both attributes and a text value
|
40
|
+
#text_node = []
|
41
|
+
#if self.class.instance_variable_defined?("@text_node")
|
42
|
+
# text_node << self.class.instance_variable_get("@text_node")
|
43
|
+
#end
|
44
|
+
#attributes + elements + text_node
|
45
|
+
end
|
46
|
+
|
47
|
+
# @api internal
|
48
|
+
# @return [Array] Extract the names of the variables
|
49
|
+
def variable_names
|
50
|
+
variables.collect { |variable| variable.name}
|
51
|
+
end
|
52
|
+
|
53
|
+
# @api internal
|
54
|
+
# @return [String] Determine which attribute was marked as an object instance key.
|
55
|
+
# Keys are indicated by option :key=true when declaring the object's variables.
|
56
|
+
# This follows the same convention as used by DataMapper
|
57
|
+
# @see http://datamapper.org/docs/properties.html
|
58
|
+
def key_name
|
59
|
+
if not defined?(@key_name)
|
60
|
+
@key_name = nil
|
61
|
+
self.class.attributes.each do |attribute|
|
62
|
+
if attribute.options[:key]
|
63
|
+
@key_name = attribute.name
|
64
|
+
break
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
@key_name
|
69
|
+
end
|
70
|
+
|
71
|
+
# @api internal
|
72
|
+
# @return [String] For the current object instance, return the string to use as a hash key
|
73
|
+
def key
|
74
|
+
return self.send(key_name) if key_name
|
75
|
+
nil
|
76
|
+
end
|
77
|
+
|
78
|
+
# @api internal
|
79
|
+
# @param array [Array] The array to be converted to a hash
|
80
|
+
# @return [OrderedHash] Generate a hash from an array of objects.
|
81
|
+
# If the array member has a field tagged as a key, that field will be used as the hash.key.
|
82
|
+
# Otherwise the index position of the array member will be used as the key
|
83
|
+
def array_to_hash(array,summary=false)
|
84
|
+
item_hash = OrderedHash.new
|
85
|
+
array.each_index do |index|
|
86
|
+
item = array[index]
|
87
|
+
ikey = (item.respond_to?(:key) && item.key) ? item.key : index
|
88
|
+
item_hash[ikey] = item.respond_to?(:to_hash) ? item.to_hash(summary) : item
|
89
|
+
end
|
90
|
+
item_hash
|
91
|
+
end
|
92
|
+
|
93
|
+
# @api internal
|
94
|
+
# @return [OrderedHash] Recursively generate an OrderedHash containing the object's properties
|
95
|
+
# @param summary [Boolean] Controls the depth and detail of recursion
|
96
|
+
def to_hash(summary=false)
|
97
|
+
oh = OrderedHash.new
|
98
|
+
vars = summary ? variables.select{|v| summary_fields.include?(v.name)} : variables
|
99
|
+
vars.each do |variable|
|
100
|
+
key = variable.options[:tag] || variable.name.to_s
|
101
|
+
value = self.send(variable.name)
|
102
|
+
case value
|
103
|
+
when Array
|
104
|
+
oh[key] = array_to_hash(value,summary)
|
105
|
+
when Serializable
|
106
|
+
oh[key] = value.to_hash
|
107
|
+
else
|
108
|
+
oh[key] = value
|
109
|
+
end
|
110
|
+
end
|
111
|
+
oh
|
112
|
+
end
|
113
|
+
|
114
|
+
# @return [OrderedHash] Calls to_hash(summary=true)
|
115
|
+
def summary
|
116
|
+
self.to_hash(summary=true)
|
117
|
+
end
|
118
|
+
|
119
|
+
# @api internal
|
120
|
+
# @param other [Serializable] The other object being compared
|
121
|
+
# @return [OrderedHash] Generate a hash containing the differences between two objects of the same type
|
122
|
+
def diff(other)
|
123
|
+
raise "Cannot compare different classes" if self.class != other.class
|
124
|
+
left = other.to_hash
|
125
|
+
right = self.to_hash
|
126
|
+
if self.key.nil? or other.key.nil?
|
127
|
+
ltag = :old
|
128
|
+
rtag = :new
|
129
|
+
else
|
130
|
+
ltag = other.key
|
131
|
+
rtag = self.key
|
132
|
+
end
|
133
|
+
Serializable.deep_diff(ltag, left, rtag, right)
|
134
|
+
end
|
135
|
+
|
136
|
+
# @api internal
|
137
|
+
# @param hashes [Array<Hash>] The hashes to be compared, with optional name tags
|
138
|
+
# @return [OrderedHash] Generate a hash containing the differences between two hashes
|
139
|
+
# (recursively descend parallel trees of hashes)
|
140
|
+
# @see https://gist.github.com/146844
|
141
|
+
def Serializable.deep_diff(*hashes)
|
142
|
+
diff = OrderedHash.new
|
143
|
+
case hashes.length
|
144
|
+
when 4
|
145
|
+
ltag, left, rtag, right = hashes
|
146
|
+
when 2
|
147
|
+
ltag, left, rtag, right = :left, hashes[0], :right, hashes[1]
|
148
|
+
else
|
149
|
+
raise "wrong number of arguments (expected 2 or 4)"
|
150
|
+
end
|
151
|
+
(left.keys | right.keys).each do |k|
|
152
|
+
if left[k] != right[k]
|
153
|
+
if left[k].is_a?(Hash) && right[k].is_a?(Hash)
|
154
|
+
diff[k] = deep_diff(ltag, left[k], rtag, right[k])
|
155
|
+
else
|
156
|
+
diff[k] = OrderedHash.[](ltag, left[k], rtag, right[k])
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
diff
|
161
|
+
end
|
162
|
+
|
163
|
+
# @api internal
|
164
|
+
# @return [String] Generate JSON output from a hash of the object's variables
|
165
|
+
def to_json(summary=false)
|
166
|
+
hash=self.to_hash(summary)
|
167
|
+
JSON.pretty_generate(hash)
|
168
|
+
end
|
169
|
+
|
170
|
+
# @api internal
|
171
|
+
# @return [String] Generate YAML output from a hash of the object's variables
|
172
|
+
def to_yaml(summary=false)
|
173
|
+
self.to_hash(summary).to_yaml
|
174
|
+
end
|
175
|
+
|
176
|
+
end
|
177
|
+
|
178
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'moab_stanford'
|
2
|
+
|
3
|
+
module Stanford
|
4
|
+
|
5
|
+
# Utility Class for extracting content or other information from a Fedora Instance
|
6
|
+
#
|
7
|
+
# ====Data Model
|
8
|
+
# * {DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)
|
9
|
+
# * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparsions
|
10
|
+
# * <b>{ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance</b>
|
11
|
+
#
|
12
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
13
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
14
|
+
class ActiveFedoraObject
|
15
|
+
|
16
|
+
# @param fedora_object [Object] The Active Fedora representation of the Fedora Object
|
17
|
+
# @return [Stanford::ActiveFedoraObject] Create a u
|
18
|
+
def initialize(fedora_object)
|
19
|
+
@fedora_object = fedora_object
|
20
|
+
end
|
21
|
+
|
22
|
+
# @return [Object] The Active Fedora representation of the Fedora Object
|
23
|
+
attr_accessor :fedora_object
|
24
|
+
|
25
|
+
# @api external
|
26
|
+
# @param ds_id [String] The datastream identifier
|
27
|
+
# @return [String] The content of the specified datastream
|
28
|
+
def get_datastream_content(ds_id)
|
29
|
+
@fedora_object.datastreams[ds_id].content
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -0,0 +1,236 @@
|
|
1
|
+
require 'moab_stanford'
|
2
|
+
|
3
|
+
module Stanford
|
4
|
+
|
5
|
+
# Stanford-specific utility methods for transforming contentMetadata to versionInventory and doing
|
6
|
+
#
|
7
|
+
# ====Data Model
|
8
|
+
# * {DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)
|
9
|
+
# * <b>{ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparsions</b>
|
10
|
+
# * {ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance
|
11
|
+
#
|
12
|
+
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
13
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
14
|
+
class ContentInventory
|
15
|
+
|
16
|
+
# @param content_metadata [String] The content metadata to be transformed into a versionInventory
|
17
|
+
# @param object_id [String] The identifier of the digital object
|
18
|
+
# @param subset [String] Speciifes which subset of files to list (all|preserve|publish|shelve)
|
19
|
+
# @param version_id [Integer] The ID of the version whosen content metadata is to be transformed
|
20
|
+
# @return [FileInventory] The versionInventory equivalent of the contentMetadata
|
21
|
+
# if the supplied content_metadata is blank or empty, then a skeletal FileInventory will be returned
|
22
|
+
def inventory_from_cm(content_metadata, object_id, subset, version_id=nil)
|
23
|
+
# The contentMetadata datastream is not required for ingest, since some object types, such as collection or APO do not require one.
|
24
|
+
# Many of these objects have contentMetadata with no child elements, such as this:
|
25
|
+
# <contentMetadata objectId="bd608mj3166" type="file"/>
|
26
|
+
# but there are also objects that have no datasteam of this name at all
|
27
|
+
cm_inventory = FileInventory.new(:type=>"version",:digital_object_id=>object_id, :version_id=>version_id)
|
28
|
+
content_group = group_from_cm(content_metadata, subset)
|
29
|
+
cm_inventory.groups << content_group
|
30
|
+
cm_inventory
|
31
|
+
end
|
32
|
+
|
33
|
+
# @api external
|
34
|
+
# @param content_metadata [String] The contentMetadata as a string
|
35
|
+
# @param subset [String] Speciifes which subset of files to list (all|preserve|publish|shelve)
|
36
|
+
# @return [FileGroup] The {FileGroup} object generated from a contentMetadata instance
|
37
|
+
# @example {include:file:spec/features/stanford/content_metadata_read_spec.rb}
|
38
|
+
def group_from_cm(content_metadata, subset)
|
39
|
+
ng_doc = Nokogiri::XML(content_metadata)
|
40
|
+
validate_content_metadata(ng_doc)
|
41
|
+
nodeset = case subset.to_s.downcase
|
42
|
+
when 'preserve'
|
43
|
+
ng_doc.xpath("//file[@preserve='yes']")
|
44
|
+
when 'publish'
|
45
|
+
ng_doc.xpath("//file[@publish='yes']")
|
46
|
+
when 'shelve'
|
47
|
+
ng_doc.xpath("//file[@shelve='yes']")
|
48
|
+
when 'all'
|
49
|
+
ng_doc.xpath("//file")
|
50
|
+
else
|
51
|
+
raise "Unknown disposition subset (#{subset})"
|
52
|
+
end
|
53
|
+
content_group = FileGroup.new(:group_id=>'content', :data_source => "contentMetadata-#{subset}")
|
54
|
+
nodeset.each do |file_node|
|
55
|
+
signature = generate_signature(file_node)
|
56
|
+
instance = generate_instance(file_node)
|
57
|
+
content_group.add_file_instance(signature, instance)
|
58
|
+
end
|
59
|
+
content_group
|
60
|
+
end
|
61
|
+
|
62
|
+
# @api internal
|
63
|
+
# @param node [Nokogiri::XML::Node] The XML node containing file information
|
64
|
+
# @return [FileSignature] The {FileSignature} object generated from the XML data
|
65
|
+
def generate_signature(node)
|
66
|
+
signature = FileSignature.new()
|
67
|
+
signature.size = node.attributes['size'].content
|
68
|
+
checksum_nodes = node.xpath('checksum')
|
69
|
+
checksum_nodes.each do |checksum_node|
|
70
|
+
case checksum_node.attributes['type'].content.upcase
|
71
|
+
when 'MD5'
|
72
|
+
signature.md5 = checksum_node.text
|
73
|
+
when 'SHA1', 'SHA-1'
|
74
|
+
signature.sha1 = checksum_node.text
|
75
|
+
when 'SHA256', 'SHA-256'
|
76
|
+
signature.sha256 = checksum_node.text
|
77
|
+
end
|
78
|
+
end
|
79
|
+
signature
|
80
|
+
end
|
81
|
+
|
82
|
+
# @api internal
|
83
|
+
# @param node (see #generate_signature)
|
84
|
+
# @return [FileInstance] The {FileInstance} object generated from the XML data
|
85
|
+
def generate_instance(node)
|
86
|
+
instance = FileInstance.new()
|
87
|
+
instance.path = node.attributes['id'].content
|
88
|
+
instance.datetime = node.attributes['datetime'].content rescue nil
|
89
|
+
instance
|
90
|
+
end
|
91
|
+
|
92
|
+
# @api external
|
93
|
+
# @param file_group [FileGroup] The {FileGroup} object used as the data source
|
94
|
+
# @return [String] The contentMetadata instance generated from the FileGroup
|
95
|
+
# @example {include:file:spec/features/stanford/content_metadata_write_spec.rb}
|
96
|
+
def generate_content_metadata(file_group, object_id, version_id)
|
97
|
+
cm = Nokogiri::XML::Builder.new do |xml|
|
98
|
+
xml.contentMetadata(:type=>"sample", :objectId=>object_id) {
|
99
|
+
xml.resource(:type=>"version", :sequence=>"1", :id=>"version-#{version_id.to_s}") {
|
100
|
+
file_group.files.each do |file_manifestation|
|
101
|
+
signature = file_manifestation.signature
|
102
|
+
file_manifestation.instances.each do |instance|
|
103
|
+
xml.file(
|
104
|
+
:id=>instance.path,
|
105
|
+
:size=>signature.size,
|
106
|
+
:datetime=>instance.datetime,
|
107
|
+
:shelve=>'yes',
|
108
|
+
:publish=>'yes',
|
109
|
+
:preserve=>'yes') {
|
110
|
+
fixity = signature.fixity
|
111
|
+
xml.checksum(:type=>"MD5") {xml.text signature.md5 } if fixity[:md5]
|
112
|
+
xml.checksum(:type=>"SHA-1") {xml.text signature.sha1} if fixity[:sha1]
|
113
|
+
xml.checksum(:type=>"SHA-256") {xml.text signature.sha256} if fixity[:sha256]
|
114
|
+
}
|
115
|
+
end
|
116
|
+
end
|
117
|
+
}
|
118
|
+
}
|
119
|
+
end
|
120
|
+
cm.to_xml
|
121
|
+
end
|
122
|
+
|
123
|
+
# @param content_metadata [String,Nokogiri::XML::Document] The contentMetadata as a string or XML doc
|
124
|
+
# @return [Boolean] True if contentMetadata has essetial file attributes, else raise exception
|
125
|
+
def validate_content_metadata(content_metadata)
|
126
|
+
result = validate_content_metadata_details(content_metadata)
|
127
|
+
raise Moab::InvalidMetadataException, result[0]+" ..." if result.size > 0
|
128
|
+
true
|
129
|
+
end
|
130
|
+
|
131
|
+
# @param content_metadata [String, Nokogiri::XML::Document] The contentMetadata as a string or XML doc
|
132
|
+
# @return [Array<String>] List of problems found
|
133
|
+
def validate_content_metadata_details(content_metadata)
|
134
|
+
result = []
|
135
|
+
content_metadata_doc =
|
136
|
+
case content_metadata.class.name
|
137
|
+
when "String"
|
138
|
+
Nokogiri::XML(content_metadata)
|
139
|
+
when "Pathname"
|
140
|
+
Nokogiri::XML(content_metadata.read)
|
141
|
+
when "Nokogiri::XML::Document"
|
142
|
+
content_metadata
|
143
|
+
else
|
144
|
+
raise Moab::InvalidMetadataException, "Content Metadata is in unrecognized format"
|
145
|
+
end
|
146
|
+
nodeset = content_metadata_doc.xpath("//file")
|
147
|
+
nodeset.each do |file_node|
|
148
|
+
missing = ['id', 'size','md5','sha1']
|
149
|
+
missing.delete('id') if file_node.has_attribute?('id')
|
150
|
+
missing.delete('size') if file_node.has_attribute?('size')
|
151
|
+
checksum_nodes = file_node.xpath('checksum')
|
152
|
+
checksum_nodes.each do |checksum_node|
|
153
|
+
case checksum_node.attributes['type'].content.upcase
|
154
|
+
when 'MD5'
|
155
|
+
missing.delete('md5')
|
156
|
+
when 'SHA1', 'SHA-1'
|
157
|
+
missing.delete('sha1')
|
158
|
+
end
|
159
|
+
end
|
160
|
+
if missing.include?('id')
|
161
|
+
result << "File node #{nodeset.index(file_node)} is missing #{missing.join(',')}"
|
162
|
+
elsif missing.size > 0
|
163
|
+
id = file_node['id']
|
164
|
+
result << "File node having id='#{id}' is missing #{missing.join(',')}"
|
165
|
+
end
|
166
|
+
end
|
167
|
+
result
|
168
|
+
end
|
169
|
+
|
170
|
+
# @param content_metadata [String] The contentMetadata as a string
|
171
|
+
# @param content_group [FileGroup] The {FileGroup} object used as the fixity data source
|
172
|
+
# @return [String] Returns a remediated copy of the contentMetadata with fixity data filled in
|
173
|
+
# @see http://blog.slashpoundbang.com/post/1454850669/how-to-pretty-print-xml-with-nokogiri
|
174
|
+
def remediate_content_metadata(content_metadata, content_group)
|
175
|
+
return nil if content_metadata.nil?
|
176
|
+
return content_metadata if content_group.nil? or content_group.files.size < 1
|
177
|
+
signature_for_path = content_group.path_hash
|
178
|
+
@type_for_name = FileSignature.checksum_type_for_name
|
179
|
+
@names_for_type = FileSignature.checksum_names_for_type
|
180
|
+
ng_doc = Nokogiri::XML(content_metadata) { |x| x.noblanks }
|
181
|
+
nodeset = ng_doc.xpath("//file")
|
182
|
+
nodeset.each do |file_node|
|
183
|
+
filepath = file_node['id']
|
184
|
+
signature = signature_for_path[filepath]
|
185
|
+
remediate_file_size(file_node, signature)
|
186
|
+
remediate_checksum_nodes(file_node, signature)
|
187
|
+
end
|
188
|
+
ng_doc.to_xml(:indent => 2)
|
189
|
+
end
|
190
|
+
|
191
|
+
# @param [Nokogiri::XML::Element] file_node the File stanza being remediated
|
192
|
+
# @param [FileSignature] signature the fixity data for the file from the FileGroup
|
193
|
+
# @return [void] update the file size attribute if missing, raise exception if inconsistent
|
194
|
+
def remediate_file_size(file_node, signature)
|
195
|
+
file_size = file_node['size']
|
196
|
+
if file_size.nil? or file_size.empty?
|
197
|
+
file_node['size'] = signature.size.to_s
|
198
|
+
elsif file_size != signature.size.to_s
|
199
|
+
raise "Inconsistent size for #{file_node['id']}: #{file_size} != #{signature.size.to_s}"
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
# @param [Nokogiri::XML::Element] file_node the File stanza being remediated
|
204
|
+
# @param [FileSignature] signature the fixity data for the file from the FileGroup
|
205
|
+
# @return [void] update the file's checksum elements if data missing, raise exception if inconsistent
|
206
|
+
def remediate_checksum_nodes(file_node, signature)
|
207
|
+
# collect <checksum> elements for checksum types that are already present
|
208
|
+
checksum_nodes = OrderedHash.new
|
209
|
+
file_node.xpath('checksum').each do |checksum_node|
|
210
|
+
type = @type_for_name[checksum_node['type']]
|
211
|
+
checksum_nodes[type] = checksum_node
|
212
|
+
end
|
213
|
+
# add new <checksum> elements for the other checksum types that were missing
|
214
|
+
@names_for_type.each do |type, names|
|
215
|
+
unless checksum_nodes.has_key?(type)
|
216
|
+
checksum_node = Nokogiri::XML::Element.new('checksum',file_node.document)
|
217
|
+
checksum_node['type'] = names[0]
|
218
|
+
file_node << checksum_node
|
219
|
+
checksum_nodes[type] = checksum_node
|
220
|
+
end
|
221
|
+
end
|
222
|
+
# make sure the <checksum> element has a content value
|
223
|
+
checksum_nodes.each do |type,checksum_node|
|
224
|
+
cm_checksum = checksum_node.content
|
225
|
+
sig_checksum = signature.checksums[type]
|
226
|
+
if cm_checksum.nil? or cm_checksum.empty?
|
227
|
+
checksum_node.content = sig_checksum
|
228
|
+
elsif cm_checksum != sig_checksum
|
229
|
+
raise "Inconsistent #{type.to_s} for #{file_node['id']}: #{cm_checksum} != #{sig_checksum}"
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
end
|
235
|
+
|
236
|
+
end
|