moab-versioning 2.2.0 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/moab.rb +0 -1
- data/lib/moab/bagger.rb +2 -2
- data/lib/moab/exceptions.rb +1 -1
- data/lib/moab/file_group.rb +4 -5
- data/lib/moab/file_group_difference.rb +3 -3
- data/lib/moab/file_group_difference_subset.rb +1 -1
- data/lib/moab/file_inventory.rb +4 -4
- data/lib/moab/file_inventory_difference.rb +1 -1
- data/lib/moab/file_signature.rb +2 -2
- data/lib/moab/signature_catalog.rb +1 -1
- data/lib/moab/stanford.rb +2 -1
- data/lib/moab/storage_object.rb +1 -1
- data/lib/moab/storage_object_version.rb +7 -7
- data/lib/moab/storage_repository.rb +2 -2
- data/lib/moab/storage_services.rb +1 -3
- data/lib/moab/version_metadata_entry.rb +1 -2
- data/lib/serializer.rb +0 -1
- data/lib/stanford/active_fedora_object.rb +1 -1
- data/lib/stanford/content_inventory.rb +6 -6
- data/lib/stanford/dor_metadata.rb +1 -1
- data/lib/stanford/moab_storage_directory.rb +36 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e4b6638d612a715c99575b74595385e412fc8fd4
|
4
|
+
data.tar.gz: dec35bf65bb48c61e4f84447f06a4a244d556f8e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f1ae80fe9600cea7429abaeec22a74d962e7eec43738364f65860b5d127449bf22aae5cb25d237d17eeb469b918a155e200bce6d42f597c61ab1cc6a862e37c6
|
7
|
+
data.tar.gz: 2dd2b7cc61a003701e4bc2a08f7b63a0bd9b0927d03a30458ae24cc47e18b0e917709ffce7e6dcebbbd243ab5391456f84100b13d878e4ae1fcdd726348e6d47
|
data/lib/moab.rb
CHANGED
data/lib/moab/bagger.rb
CHANGED
@@ -192,7 +192,7 @@ module Moab
|
|
192
192
|
manifest_file = Hash.new
|
193
193
|
manifest_types = [:md5, :sha1, :sha256]
|
194
194
|
manifest_types.each do |type|
|
195
|
-
manifest_pathname[type] = @bag_pathname.join("manifest-#{type
|
195
|
+
manifest_pathname[type] = @bag_pathname.join("manifest-#{type}.txt")
|
196
196
|
manifest_file[type] = manifest_pathname[type].open('w')
|
197
197
|
end
|
198
198
|
@bag_inventory.groups.each do |group|
|
@@ -233,7 +233,7 @@ module Moab
|
|
233
233
|
manifest_file = Hash.new
|
234
234
|
manifest_types = [:md5, :sha1, :sha256]
|
235
235
|
manifest_types.each do |type|
|
236
|
-
manifest_pathname[type] = @bag_pathname.join("tagmanifest-#{type
|
236
|
+
manifest_pathname[type] = @bag_pathname.join("tagmanifest-#{type}.txt")
|
237
237
|
manifest_file[type] = manifest_pathname[type].open('w')
|
238
238
|
end
|
239
239
|
@bag_pathname.children.each do |file|
|
data/lib/moab/exceptions.rb
CHANGED
data/lib/moab/file_group.rb
CHANGED
@@ -219,12 +219,11 @@ module Moab
|
|
219
219
|
|
220
220
|
# @api internal
|
221
221
|
# @param pathname [Pathname, String] The location of the file to be added
|
222
|
-
# @param
|
222
|
+
# @param _validated (unused; kept here for backwards compatibility)
|
223
223
|
# @return [void] Add a single physical file's data to the array of files in this group.
|
224
224
|
# If fixity data was supplied in bag manifests, then utilize that data.
|
225
|
-
def add_physical_file(pathname,
|
226
|
-
pathname=Pathname.new(pathname).expand_path
|
227
|
-
validated ||= is_descendent_of_base?(pathname)
|
225
|
+
def add_physical_file(pathname, _validated=nil)
|
226
|
+
pathname = Pathname.new(pathname).expand_path
|
228
227
|
instance = FileInstance.new.instance_from_file(pathname, @base_directory)
|
229
228
|
if @signatures_from_bag && @signatures_from_bag[pathname]
|
230
229
|
signature = @signatures_from_bag[pathname]
|
@@ -234,7 +233,7 @@ module Moab
|
|
234
233
|
else
|
235
234
|
signature = FileSignature.new.signature_from_file(pathname)
|
236
235
|
end
|
237
|
-
add_file_instance(signature,instance)
|
236
|
+
add_file_instance(signature, instance)
|
238
237
|
end
|
239
238
|
|
240
239
|
end
|
@@ -340,15 +340,15 @@ module Moab
|
|
340
340
|
deltas = Hash.new {|hash, key| hash[key] = []}
|
341
341
|
# case where other_path is empty or 'same'. (create array of strings)
|
342
342
|
[:identical, :modified, :deleted, :copydeleted].each do |change|
|
343
|
-
deltas[change].concat
|
343
|
+
deltas[change].concat(@subset_hash[change].files.collect{ |file| file.basis_path })
|
344
344
|
end
|
345
345
|
# case where basis_path and other_path are both present. (create array of arrays)
|
346
346
|
[:copyadded, :renamed].each do |change|
|
347
|
-
deltas[change].concat
|
347
|
+
deltas[change].concat(@subset_hash[change].files.collect { |file| [file.basis_path,file.other_path] })
|
348
348
|
end
|
349
349
|
# case where basis_path is empty. (create array of strings)
|
350
350
|
[:added].each do |change|
|
351
|
-
deltas[change].concat
|
351
|
+
deltas[change].concat(@subset_hash[change].files.collect { |file| file.other_path })
|
352
352
|
end
|
353
353
|
deltas
|
354
354
|
end
|
data/lib/moab/file_inventory.rb
CHANGED
@@ -164,13 +164,13 @@ module Moab
|
|
164
164
|
data_source = (groups.collect { |g| g.data_source.to_s }).join('|')
|
165
165
|
if data_source.start_with?('contentMetadata')
|
166
166
|
if version_id
|
167
|
-
"v#{version_id
|
167
|
+
"v#{version_id}-#{data_source}"
|
168
168
|
else
|
169
169
|
"new-#{data_source}"
|
170
170
|
end
|
171
171
|
else
|
172
172
|
if version_id
|
173
|
-
"v#{version_id
|
173
|
+
"v#{version_id}"
|
174
174
|
else
|
175
175
|
data_source
|
176
176
|
end
|
@@ -214,7 +214,7 @@ module Moab
|
|
214
214
|
manifest_pathname = Hash.new
|
215
215
|
checksum_types = [:md5, :sha1, :sha256]
|
216
216
|
checksum_types.each do |type|
|
217
|
-
manifest_pathname[type] = bag_pathname.join("manifest-#{type
|
217
|
+
manifest_pathname[type] = bag_pathname.join("manifest-#{type}.txt")
|
218
218
|
end
|
219
219
|
signatures = Hash.new { |hash,path| hash[path] = FileSignature.new }
|
220
220
|
checksum_types.each do |type|
|
@@ -264,7 +264,7 @@ module Moab
|
|
264
264
|
when "directory"
|
265
265
|
'directoryInventory.xml'
|
266
266
|
else
|
267
|
-
raise ArgumentError, "unknown inventory type: #{type
|
267
|
+
raise ArgumentError, "unknown inventory type: #{type}"
|
268
268
|
end
|
269
269
|
end
|
270
270
|
|
@@ -103,7 +103,7 @@ module Moab
|
|
103
103
|
# @return [String] Returns either the common digitial object ID, or a concatenation of both inventory's IDs
|
104
104
|
def common_object_id(basis_inventory, other_inventory)
|
105
105
|
if basis_inventory.digital_object_id != other_inventory.digital_object_id
|
106
|
-
"#{basis_inventory.digital_object_id
|
106
|
+
"#{basis_inventory.digital_object_id}|#{other_inventory.digital_object_id}"
|
107
107
|
else
|
108
108
|
basis_inventory.digital_object_id.to_s
|
109
109
|
end
|
data/lib/moab/file_signature.rb
CHANGED
@@ -79,7 +79,7 @@ module Moab
|
|
79
79
|
when :sha256
|
80
80
|
@sha256 = value
|
81
81
|
else
|
82
|
-
raise ArgumentError, "Unknown checksum type '#{type
|
82
|
+
raise ArgumentError, "Unknown checksum type '#{type}'"
|
83
83
|
end
|
84
84
|
end
|
85
85
|
|
@@ -89,7 +89,7 @@ module Moab
|
|
89
89
|
checksum_hash[:md5] = @md5
|
90
90
|
checksum_hash[:sha1] = @sha1
|
91
91
|
checksum_hash[:sha256] = @sha256
|
92
|
-
checksum_hash.delete_if { |
|
92
|
+
checksum_hash.delete_if { |_key, value| value.nil? or value.empty? }
|
93
93
|
checksum_hash
|
94
94
|
end
|
95
95
|
|
@@ -136,7 +136,7 @@ module Moab
|
|
136
136
|
group.files.each do |file|
|
137
137
|
unless file.signature.complete?
|
138
138
|
if @signature_hash.has_key?(file.signature)
|
139
|
-
file.signature = @signature_hash.find {|k,
|
139
|
+
file.signature = @signature_hash.find { |k, _v| k == file.signature }[0]
|
140
140
|
elsif group_pathname
|
141
141
|
file_pathname = group_pathname.join(file.instances[0].path)
|
142
142
|
file.signature = file.signature.normalized_signature(file_pathname)
|
data/lib/moab/stanford.rb
CHANGED
@@ -4,6 +4,7 @@ require 'stanford/dor_metadata'
|
|
4
4
|
require 'stanford/storage_repository'
|
5
5
|
require 'stanford/storage_services'
|
6
6
|
require 'stanford/active_fedora_object'
|
7
|
+
require 'stanford/moab_storage_directory'
|
7
8
|
|
8
9
|
# Stanford is a module that isolates classes specific to the Stanford Digital Repository
|
9
10
|
#
|
@@ -14,4 +15,4 @@ require 'stanford/active_fedora_object'
|
|
14
15
|
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
15
16
|
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
16
17
|
module Stanford
|
17
|
-
end
|
18
|
+
end
|
data/lib/moab/storage_object.rb
CHANGED
@@ -200,7 +200,7 @@ module Moab
|
|
200
200
|
end
|
201
201
|
end
|
202
202
|
|
203
|
-
# @return [VerificationResult] Return result of storage
|
203
|
+
# @return [VerificationResult] Return result of storage verification
|
204
204
|
def verify_object_storage
|
205
205
|
result = VerificationResult.new(digital_object_id)
|
206
206
|
self.version_list.each do |version|
|
@@ -35,7 +35,7 @@ module Moab
|
|
35
35
|
def initialize(storage_object, version_id)
|
36
36
|
if version_id.is_a?(Integer)
|
37
37
|
@version_id = version_id
|
38
|
-
elsif version_id.is_a?(String) and version_id
|
38
|
+
elsif version_id.is_a?(String) and version_id =~ /^v(\d+)$/
|
39
39
|
@version_id = version_id.sub(/^v/,'').to_i
|
40
40
|
else
|
41
41
|
raise "version_id (#{version_id}) is not in a recognized format"
|
@@ -82,10 +82,10 @@ module Moab
|
|
82
82
|
@storage_object.storage_filepath(catalog_filepath)
|
83
83
|
end
|
84
84
|
|
85
|
-
# @param
|
85
|
+
# @param _file_category (unused; kept here for backwards compatibility)
|
86
86
|
# @param [FileSignature] file_signature The signature of the file
|
87
87
|
# @return [Pathname] Pathname object containing the full path for the specified file
|
88
|
-
def find_filepath_using_signature(
|
88
|
+
def find_filepath_using_signature(_file_category, file_signature)
|
89
89
|
catalog_filepath = signature_catalog.catalog_filepath(file_signature)
|
90
90
|
@storage_object.storage_filepath(catalog_filepath)
|
91
91
|
end
|
@@ -140,7 +140,7 @@ module Moab
|
|
140
140
|
# @param bag_dir [Pathname,String] The location of the bag to be ingested
|
141
141
|
# @return [void] Create the version subdirectory and move files into it
|
142
142
|
def ingest_bag_data(bag_dir)
|
143
|
-
raise "Version already exists: #{@version_pathname
|
143
|
+
raise "Version already exists: #{@version_pathname}" if @version_pathname.exist?
|
144
144
|
@version_pathname.join('manifests').mkpath
|
145
145
|
bag_dir=Pathname(bag_dir)
|
146
146
|
ingest_dir(bag_dir.join('data'),@version_pathname.join('data'))
|
@@ -225,9 +225,9 @@ module Moab
|
|
225
225
|
result = VerificationResult.new("manifest_inventory")
|
226
226
|
manifest_inventory = self.file_inventory('manifests')
|
227
227
|
result.subentities << VerificationResult.verify_value('composite_key',self.composite_key,manifest_inventory.composite_key)
|
228
|
-
result.subentities << VerificationResult.verify_truth('manifests_group', !
|
228
|
+
result.subentities << VerificationResult.verify_truth('manifests_group', !manifest_inventory.group_empty?('manifests'))
|
229
229
|
# measure the manifest signatures of the files in the directory (excluding manifestInventory.xml)
|
230
|
-
directory_inventory = FileInventory.new.inventory_from_directory(@version_pathname.join('manifests'),'manifests')
|
230
|
+
directory_inventory = FileInventory.new.inventory_from_directory(@version_pathname.join('manifests'), 'manifests')
|
231
231
|
directory_inventory.digital_object_id = storage_object.digital_object_id
|
232
232
|
directory_group = directory_inventory.group('manifests')
|
233
233
|
directory_group.remove_file_having_path("manifestInventory.xml")
|
@@ -283,7 +283,7 @@ module Moab
|
|
283
283
|
file.instances.each do |instance|
|
284
284
|
relative_path = File.join(group.group_id, instance.path)
|
285
285
|
catalog_entry = signature_catalog.signature_hash[file.signature]
|
286
|
-
if !
|
286
|
+
if !catalog_entry.nil?
|
287
287
|
found += 1
|
288
288
|
else
|
289
289
|
missing << relative_path.to_s
|
@@ -123,8 +123,8 @@ module Moab
|
|
123
123
|
unless storage_object.object_pathname.exist?
|
124
124
|
if create
|
125
125
|
storage_object.object_pathname.mkpath
|
126
|
-
|
127
|
-
|
126
|
+
else
|
127
|
+
raise Moab::ObjectNotFoundException, "No storage object found for #{object_id}"
|
128
128
|
end
|
129
129
|
end
|
130
130
|
storage_object
|
data/lib/serializer.rb
CHANGED
@@ -6,7 +6,7 @@ module Stanford
|
|
6
6
|
#
|
7
7
|
# ====Data Model
|
8
8
|
# * {DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)
|
9
|
-
# * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing
|
9
|
+
# * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparisons
|
10
10
|
# * <b>{ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance</b>
|
11
11
|
#
|
12
12
|
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
@@ -2,11 +2,11 @@ require 'moab/stanford'
|
|
2
2
|
|
3
3
|
module Stanford
|
4
4
|
|
5
|
-
# Stanford-specific utility methods for transforming contentMetadata to versionInventory and doing
|
5
|
+
# Stanford-specific utility methods for transforming contentMetadata to versionInventory and doing comparisons
|
6
6
|
#
|
7
7
|
# ====Data Model
|
8
8
|
# * {DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)
|
9
|
-
# * <b>{ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing
|
9
|
+
# * <b>{ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparisons</b>
|
10
10
|
# * {ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance
|
11
11
|
#
|
12
12
|
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
@@ -97,7 +97,7 @@ module Stanford
|
|
97
97
|
def generate_content_metadata(file_group, object_id, version_id)
|
98
98
|
cm = Nokogiri::XML::Builder.new do |xml|
|
99
99
|
xml.contentMetadata(:type=>"sample", :objectId=>object_id) {
|
100
|
-
xml.resource(:type=>"version", :sequence=>"1", :id=>"version-#{version_id
|
100
|
+
xml.resource(:type=>"version", :sequence=>"1", :id=>"version-#{version_id}") {
|
101
101
|
file_group.files.each do |file_manifestation|
|
102
102
|
signature = file_manifestation.signature
|
103
103
|
file_manifestation.instances.each do |instance|
|
@@ -122,7 +122,7 @@ module Stanford
|
|
122
122
|
end
|
123
123
|
|
124
124
|
# @param content_metadata [String,Nokogiri::XML::Document] The contentMetadata as a string or XML doc
|
125
|
-
# @return [Boolean] True if contentMetadata has
|
125
|
+
# @return [Boolean] True if contentMetadata has essential file attributes, else raise exception
|
126
126
|
def validate_content_metadata(content_metadata)
|
127
127
|
result = validate_content_metadata_details(content_metadata)
|
128
128
|
raise Moab::InvalidMetadataException, result[0]+" ..." if result.size > 0
|
@@ -197,7 +197,7 @@ module Stanford
|
|
197
197
|
if file_size.nil? or file_size.empty?
|
198
198
|
file_node['size'] = signature.size.to_s
|
199
199
|
elsif file_size != signature.size.to_s
|
200
|
-
raise "Inconsistent size for #{file_node['id']}: #{file_size} != #{signature.size
|
200
|
+
raise "Inconsistent size for #{file_node['id']}: #{file_size} != #{signature.size}"
|
201
201
|
end
|
202
202
|
end
|
203
203
|
|
@@ -227,7 +227,7 @@ module Stanford
|
|
227
227
|
if cm_checksum.nil? or cm_checksum.empty?
|
228
228
|
checksum_node.content = sig_checksum
|
229
229
|
elsif cm_checksum != sig_checksum
|
230
|
-
raise "Inconsistent #{type
|
230
|
+
raise "Inconsistent #{type} for #{file_node['id']}: #{cm_checksum} != #{sig_checksum}"
|
231
231
|
end
|
232
232
|
end
|
233
233
|
end
|
@@ -6,7 +6,7 @@ module Stanford
|
|
6
6
|
#
|
7
7
|
# ====Data Model
|
8
8
|
# * <b>{DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)</b>
|
9
|
-
# * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing
|
9
|
+
# * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparisons
|
10
10
|
# * {ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance
|
11
11
|
#
|
12
12
|
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'find'
|
2
|
+
|
3
|
+
module Stanford
|
4
|
+
##
|
5
|
+
# methods for dealing with a directory which stores Moab objects
|
6
|
+
class MoabStorageDirectory
|
7
|
+
DRUID_TREE_REGEXP = '[[:lower:]]{2}/\\d{3}/[[:lower:]]{2}/\\d{4}'.freeze
|
8
|
+
DRUID_REGEXP = '[[:lower:]]{2}\\d{3}[[:lower:]]{2}\\d{4}'.freeze
|
9
|
+
|
10
|
+
def self.find_moab_paths(storage_dir)
|
11
|
+
Find.find(storage_dir) do |path|
|
12
|
+
Find.prune unless File.directory?(path) # don't bother with a matching on files, we only care about directories
|
13
|
+
path_match_data = storage_dir_regexp(storage_dir).match(path)
|
14
|
+
if path_match_data
|
15
|
+
yield path_match_data[1], path, path_match_data # yield the druid, the full path, and the MatchData object
|
16
|
+
Find.prune # we don't care about what's in the moab dir, we just want the paths that look like moabs
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.list_moab_druids(storage_dir)
|
22
|
+
druids = []
|
23
|
+
find_moab_paths(storage_dir) { |druid, _path, _path_match_data| druids << druid }
|
24
|
+
druids
|
25
|
+
end
|
26
|
+
|
27
|
+
private_class_method def self.storage_dir_regexps
|
28
|
+
@storage_dir_regexps ||= {}
|
29
|
+
end
|
30
|
+
|
31
|
+
# this regexp caching makes things many times faster (e.g. went from ~2200 s to crawl disk11, down to ~300 s)
|
32
|
+
private_class_method def self.storage_dir_regexp(storage_dir)
|
33
|
+
storage_dir_regexps[storage_dir] ||= Regexp.new("^#{storage_dir}/#{DRUID_TREE_REGEXP}/(#{DRUID_REGEXP})$")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: moab-versioning
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Darren Weber
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2017-
|
14
|
+
date: 2017-11-01 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: confstruct
|
@@ -196,7 +196,7 @@ dependencies:
|
|
196
196
|
- !ruby/object:Gem::Version
|
197
197
|
version: '0'
|
198
198
|
- !ruby/object:Gem::Dependency
|
199
|
-
name: pry
|
199
|
+
name: pry-byebug
|
200
200
|
requirement: !ruby/object:Gem::Requirement
|
201
201
|
requirements:
|
202
202
|
- - ">="
|
@@ -275,6 +275,7 @@ files:
|
|
275
275
|
- lib/stanford/active_fedora_object.rb
|
276
276
|
- lib/stanford/content_inventory.rb
|
277
277
|
- lib/stanford/dor_metadata.rb
|
278
|
+
- lib/stanford/moab_storage_directory.rb
|
278
279
|
- lib/stanford/storage_repository.rb
|
279
280
|
- lib/stanford/storage_services.rb
|
280
281
|
- lib/tasks/yard.rake
|