moab-versioning 2.2.0 → 2.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/moab.rb +0 -1
- data/lib/moab/bagger.rb +2 -2
- data/lib/moab/exceptions.rb +1 -1
- data/lib/moab/file_group.rb +4 -5
- data/lib/moab/file_group_difference.rb +3 -3
- data/lib/moab/file_group_difference_subset.rb +1 -1
- data/lib/moab/file_inventory.rb +4 -4
- data/lib/moab/file_inventory_difference.rb +1 -1
- data/lib/moab/file_signature.rb +2 -2
- data/lib/moab/signature_catalog.rb +1 -1
- data/lib/moab/stanford.rb +2 -1
- data/lib/moab/storage_object.rb +1 -1
- data/lib/moab/storage_object_version.rb +7 -7
- data/lib/moab/storage_repository.rb +2 -2
- data/lib/moab/storage_services.rb +1 -3
- data/lib/moab/version_metadata_entry.rb +1 -2
- data/lib/serializer.rb +0 -1
- data/lib/stanford/active_fedora_object.rb +1 -1
- data/lib/stanford/content_inventory.rb +6 -6
- data/lib/stanford/dor_metadata.rb +1 -1
- data/lib/stanford/moab_storage_directory.rb +36 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e4b6638d612a715c99575b74595385e412fc8fd4
|
4
|
+
data.tar.gz: dec35bf65bb48c61e4f84447f06a4a244d556f8e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f1ae80fe9600cea7429abaeec22a74d962e7eec43738364f65860b5d127449bf22aae5cb25d237d17eeb469b918a155e200bce6d42f597c61ab1cc6a862e37c6
|
7
|
+
data.tar.gz: 2dd2b7cc61a003701e4bc2a08f7b63a0bd9b0927d03a30458ae24cc47e18b0e917709ffce7e6dcebbbd243ab5391456f84100b13d878e4ae1fcdd726348e6d47
|
data/lib/moab.rb
CHANGED
data/lib/moab/bagger.rb
CHANGED
@@ -192,7 +192,7 @@ module Moab
|
|
192
192
|
manifest_file = Hash.new
|
193
193
|
manifest_types = [:md5, :sha1, :sha256]
|
194
194
|
manifest_types.each do |type|
|
195
|
-
manifest_pathname[type] = @bag_pathname.join("manifest-#{type
|
195
|
+
manifest_pathname[type] = @bag_pathname.join("manifest-#{type}.txt")
|
196
196
|
manifest_file[type] = manifest_pathname[type].open('w')
|
197
197
|
end
|
198
198
|
@bag_inventory.groups.each do |group|
|
@@ -233,7 +233,7 @@ module Moab
|
|
233
233
|
manifest_file = Hash.new
|
234
234
|
manifest_types = [:md5, :sha1, :sha256]
|
235
235
|
manifest_types.each do |type|
|
236
|
-
manifest_pathname[type] = @bag_pathname.join("tagmanifest-#{type
|
236
|
+
manifest_pathname[type] = @bag_pathname.join("tagmanifest-#{type}.txt")
|
237
237
|
manifest_file[type] = manifest_pathname[type].open('w')
|
238
238
|
end
|
239
239
|
@bag_pathname.children.each do |file|
|
data/lib/moab/exceptions.rb
CHANGED
data/lib/moab/file_group.rb
CHANGED
@@ -219,12 +219,11 @@ module Moab
|
|
219
219
|
|
220
220
|
# @api internal
|
221
221
|
# @param pathname [Pathname, String] The location of the file to be added
|
222
|
-
# @param
|
222
|
+
# @param _validated (unused; kept here for backwards compatibility)
|
223
223
|
# @return [void] Add a single physical file's data to the array of files in this group.
|
224
224
|
# If fixity data was supplied in bag manifests, then utilize that data.
|
225
|
-
def add_physical_file(pathname,
|
226
|
-
pathname=Pathname.new(pathname).expand_path
|
227
|
-
validated ||= is_descendent_of_base?(pathname)
|
225
|
+
def add_physical_file(pathname, _validated=nil)
|
226
|
+
pathname = Pathname.new(pathname).expand_path
|
228
227
|
instance = FileInstance.new.instance_from_file(pathname, @base_directory)
|
229
228
|
if @signatures_from_bag && @signatures_from_bag[pathname]
|
230
229
|
signature = @signatures_from_bag[pathname]
|
@@ -234,7 +233,7 @@ module Moab
|
|
234
233
|
else
|
235
234
|
signature = FileSignature.new.signature_from_file(pathname)
|
236
235
|
end
|
237
|
-
add_file_instance(signature,instance)
|
236
|
+
add_file_instance(signature, instance)
|
238
237
|
end
|
239
238
|
|
240
239
|
end
|
@@ -340,15 +340,15 @@ module Moab
|
|
340
340
|
deltas = Hash.new {|hash, key| hash[key] = []}
|
341
341
|
# case where other_path is empty or 'same'. (create array of strings)
|
342
342
|
[:identical, :modified, :deleted, :copydeleted].each do |change|
|
343
|
-
deltas[change].concat
|
343
|
+
deltas[change].concat(@subset_hash[change].files.collect{ |file| file.basis_path })
|
344
344
|
end
|
345
345
|
# case where basis_path and other_path are both present. (create array of arrays)
|
346
346
|
[:copyadded, :renamed].each do |change|
|
347
|
-
deltas[change].concat
|
347
|
+
deltas[change].concat(@subset_hash[change].files.collect { |file| [file.basis_path,file.other_path] })
|
348
348
|
end
|
349
349
|
# case where basis_path is empty. (create array of strings)
|
350
350
|
[:added].each do |change|
|
351
|
-
deltas[change].concat
|
351
|
+
deltas[change].concat(@subset_hash[change].files.collect { |file| file.other_path })
|
352
352
|
end
|
353
353
|
deltas
|
354
354
|
end
|
data/lib/moab/file_inventory.rb
CHANGED
@@ -164,13 +164,13 @@ module Moab
|
|
164
164
|
data_source = (groups.collect { |g| g.data_source.to_s }).join('|')
|
165
165
|
if data_source.start_with?('contentMetadata')
|
166
166
|
if version_id
|
167
|
-
"v#{version_id
|
167
|
+
"v#{version_id}-#{data_source}"
|
168
168
|
else
|
169
169
|
"new-#{data_source}"
|
170
170
|
end
|
171
171
|
else
|
172
172
|
if version_id
|
173
|
-
"v#{version_id
|
173
|
+
"v#{version_id}"
|
174
174
|
else
|
175
175
|
data_source
|
176
176
|
end
|
@@ -214,7 +214,7 @@ module Moab
|
|
214
214
|
manifest_pathname = Hash.new
|
215
215
|
checksum_types = [:md5, :sha1, :sha256]
|
216
216
|
checksum_types.each do |type|
|
217
|
-
manifest_pathname[type] = bag_pathname.join("manifest-#{type
|
217
|
+
manifest_pathname[type] = bag_pathname.join("manifest-#{type}.txt")
|
218
218
|
end
|
219
219
|
signatures = Hash.new { |hash,path| hash[path] = FileSignature.new }
|
220
220
|
checksum_types.each do |type|
|
@@ -264,7 +264,7 @@ module Moab
|
|
264
264
|
when "directory"
|
265
265
|
'directoryInventory.xml'
|
266
266
|
else
|
267
|
-
raise ArgumentError, "unknown inventory type: #{type
|
267
|
+
raise ArgumentError, "unknown inventory type: #{type}"
|
268
268
|
end
|
269
269
|
end
|
270
270
|
|
@@ -103,7 +103,7 @@ module Moab
|
|
103
103
|
# @return [String] Returns either the common digitial object ID, or a concatenation of both inventory's IDs
|
104
104
|
def common_object_id(basis_inventory, other_inventory)
|
105
105
|
if basis_inventory.digital_object_id != other_inventory.digital_object_id
|
106
|
-
"#{basis_inventory.digital_object_id
|
106
|
+
"#{basis_inventory.digital_object_id}|#{other_inventory.digital_object_id}"
|
107
107
|
else
|
108
108
|
basis_inventory.digital_object_id.to_s
|
109
109
|
end
|
data/lib/moab/file_signature.rb
CHANGED
@@ -79,7 +79,7 @@ module Moab
|
|
79
79
|
when :sha256
|
80
80
|
@sha256 = value
|
81
81
|
else
|
82
|
-
raise ArgumentError, "Unknown checksum type '#{type
|
82
|
+
raise ArgumentError, "Unknown checksum type '#{type}'"
|
83
83
|
end
|
84
84
|
end
|
85
85
|
|
@@ -89,7 +89,7 @@ module Moab
|
|
89
89
|
checksum_hash[:md5] = @md5
|
90
90
|
checksum_hash[:sha1] = @sha1
|
91
91
|
checksum_hash[:sha256] = @sha256
|
92
|
-
checksum_hash.delete_if { |
|
92
|
+
checksum_hash.delete_if { |_key, value| value.nil? or value.empty? }
|
93
93
|
checksum_hash
|
94
94
|
end
|
95
95
|
|
@@ -136,7 +136,7 @@ module Moab
|
|
136
136
|
group.files.each do |file|
|
137
137
|
unless file.signature.complete?
|
138
138
|
if @signature_hash.has_key?(file.signature)
|
139
|
-
file.signature = @signature_hash.find {|k,
|
139
|
+
file.signature = @signature_hash.find { |k, _v| k == file.signature }[0]
|
140
140
|
elsif group_pathname
|
141
141
|
file_pathname = group_pathname.join(file.instances[0].path)
|
142
142
|
file.signature = file.signature.normalized_signature(file_pathname)
|
data/lib/moab/stanford.rb
CHANGED
@@ -4,6 +4,7 @@ require 'stanford/dor_metadata'
|
|
4
4
|
require 'stanford/storage_repository'
|
5
5
|
require 'stanford/storage_services'
|
6
6
|
require 'stanford/active_fedora_object'
|
7
|
+
require 'stanford/moab_storage_directory'
|
7
8
|
|
8
9
|
# Stanford is a module that isolates classes specific to the Stanford Digital Repository
|
9
10
|
#
|
@@ -14,4 +15,4 @@ require 'stanford/active_fedora_object'
|
|
14
15
|
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
15
16
|
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
16
17
|
module Stanford
|
17
|
-
end
|
18
|
+
end
|
data/lib/moab/storage_object.rb
CHANGED
@@ -200,7 +200,7 @@ module Moab
|
|
200
200
|
end
|
201
201
|
end
|
202
202
|
|
203
|
-
# @return [VerificationResult] Return result of storage
|
203
|
+
# @return [VerificationResult] Return result of storage verification
|
204
204
|
def verify_object_storage
|
205
205
|
result = VerificationResult.new(digital_object_id)
|
206
206
|
self.version_list.each do |version|
|
@@ -35,7 +35,7 @@ module Moab
|
|
35
35
|
def initialize(storage_object, version_id)
|
36
36
|
if version_id.is_a?(Integer)
|
37
37
|
@version_id = version_id
|
38
|
-
elsif version_id.is_a?(String) and version_id
|
38
|
+
elsif version_id.is_a?(String) and version_id =~ /^v(\d+)$/
|
39
39
|
@version_id = version_id.sub(/^v/,'').to_i
|
40
40
|
else
|
41
41
|
raise "version_id (#{version_id}) is not in a recognized format"
|
@@ -82,10 +82,10 @@ module Moab
|
|
82
82
|
@storage_object.storage_filepath(catalog_filepath)
|
83
83
|
end
|
84
84
|
|
85
|
-
# @param
|
85
|
+
# @param _file_category (unused; kept here for backwards compatibility)
|
86
86
|
# @param [FileSignature] file_signature The signature of the file
|
87
87
|
# @return [Pathname] Pathname object containing the full path for the specified file
|
88
|
-
def find_filepath_using_signature(
|
88
|
+
def find_filepath_using_signature(_file_category, file_signature)
|
89
89
|
catalog_filepath = signature_catalog.catalog_filepath(file_signature)
|
90
90
|
@storage_object.storage_filepath(catalog_filepath)
|
91
91
|
end
|
@@ -140,7 +140,7 @@ module Moab
|
|
140
140
|
# @param bag_dir [Pathname,String] The location of the bag to be ingested
|
141
141
|
# @return [void] Create the version subdirectory and move files into it
|
142
142
|
def ingest_bag_data(bag_dir)
|
143
|
-
raise "Version already exists: #{@version_pathname
|
143
|
+
raise "Version already exists: #{@version_pathname}" if @version_pathname.exist?
|
144
144
|
@version_pathname.join('manifests').mkpath
|
145
145
|
bag_dir=Pathname(bag_dir)
|
146
146
|
ingest_dir(bag_dir.join('data'),@version_pathname.join('data'))
|
@@ -225,9 +225,9 @@ module Moab
|
|
225
225
|
result = VerificationResult.new("manifest_inventory")
|
226
226
|
manifest_inventory = self.file_inventory('manifests')
|
227
227
|
result.subentities << VerificationResult.verify_value('composite_key',self.composite_key,manifest_inventory.composite_key)
|
228
|
-
result.subentities << VerificationResult.verify_truth('manifests_group', !
|
228
|
+
result.subentities << VerificationResult.verify_truth('manifests_group', !manifest_inventory.group_empty?('manifests'))
|
229
229
|
# measure the manifest signatures of the files in the directory (excluding manifestInventory.xml)
|
230
|
-
directory_inventory = FileInventory.new.inventory_from_directory(@version_pathname.join('manifests'),'manifests')
|
230
|
+
directory_inventory = FileInventory.new.inventory_from_directory(@version_pathname.join('manifests'), 'manifests')
|
231
231
|
directory_inventory.digital_object_id = storage_object.digital_object_id
|
232
232
|
directory_group = directory_inventory.group('manifests')
|
233
233
|
directory_group.remove_file_having_path("manifestInventory.xml")
|
@@ -283,7 +283,7 @@ module Moab
|
|
283
283
|
file.instances.each do |instance|
|
284
284
|
relative_path = File.join(group.group_id, instance.path)
|
285
285
|
catalog_entry = signature_catalog.signature_hash[file.signature]
|
286
|
-
if !
|
286
|
+
if !catalog_entry.nil?
|
287
287
|
found += 1
|
288
288
|
else
|
289
289
|
missing << relative_path.to_s
|
@@ -123,8 +123,8 @@ module Moab
|
|
123
123
|
unless storage_object.object_pathname.exist?
|
124
124
|
if create
|
125
125
|
storage_object.object_pathname.mkpath
|
126
|
-
|
127
|
-
|
126
|
+
else
|
127
|
+
raise Moab::ObjectNotFoundException, "No storage object found for #{object_id}"
|
128
128
|
end
|
129
129
|
end
|
130
130
|
storage_object
|
data/lib/serializer.rb
CHANGED
@@ -6,7 +6,7 @@ module Stanford
|
|
6
6
|
#
|
7
7
|
# ====Data Model
|
8
8
|
# * {DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)
|
9
|
-
# * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing
|
9
|
+
# * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparisons
|
10
10
|
# * <b>{ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance</b>
|
11
11
|
#
|
12
12
|
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
@@ -2,11 +2,11 @@ require 'moab/stanford'
|
|
2
2
|
|
3
3
|
module Stanford
|
4
4
|
|
5
|
-
# Stanford-specific utility methods for transforming contentMetadata to versionInventory and doing
|
5
|
+
# Stanford-specific utility methods for transforming contentMetadata to versionInventory and doing comparisons
|
6
6
|
#
|
7
7
|
# ====Data Model
|
8
8
|
# * {DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)
|
9
|
-
# * <b>{ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing
|
9
|
+
# * <b>{ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparisons</b>
|
10
10
|
# * {ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance
|
11
11
|
#
|
12
12
|
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
@@ -97,7 +97,7 @@ module Stanford
|
|
97
97
|
def generate_content_metadata(file_group, object_id, version_id)
|
98
98
|
cm = Nokogiri::XML::Builder.new do |xml|
|
99
99
|
xml.contentMetadata(:type=>"sample", :objectId=>object_id) {
|
100
|
-
xml.resource(:type=>"version", :sequence=>"1", :id=>"version-#{version_id
|
100
|
+
xml.resource(:type=>"version", :sequence=>"1", :id=>"version-#{version_id}") {
|
101
101
|
file_group.files.each do |file_manifestation|
|
102
102
|
signature = file_manifestation.signature
|
103
103
|
file_manifestation.instances.each do |instance|
|
@@ -122,7 +122,7 @@ module Stanford
|
|
122
122
|
end
|
123
123
|
|
124
124
|
# @param content_metadata [String,Nokogiri::XML::Document] The contentMetadata as a string or XML doc
|
125
|
-
# @return [Boolean] True if contentMetadata has
|
125
|
+
# @return [Boolean] True if contentMetadata has essential file attributes, else raise exception
|
126
126
|
def validate_content_metadata(content_metadata)
|
127
127
|
result = validate_content_metadata_details(content_metadata)
|
128
128
|
raise Moab::InvalidMetadataException, result[0]+" ..." if result.size > 0
|
@@ -197,7 +197,7 @@ module Stanford
|
|
197
197
|
if file_size.nil? or file_size.empty?
|
198
198
|
file_node['size'] = signature.size.to_s
|
199
199
|
elsif file_size != signature.size.to_s
|
200
|
-
raise "Inconsistent size for #{file_node['id']}: #{file_size} != #{signature.size
|
200
|
+
raise "Inconsistent size for #{file_node['id']}: #{file_size} != #{signature.size}"
|
201
201
|
end
|
202
202
|
end
|
203
203
|
|
@@ -227,7 +227,7 @@ module Stanford
|
|
227
227
|
if cm_checksum.nil? or cm_checksum.empty?
|
228
228
|
checksum_node.content = sig_checksum
|
229
229
|
elsif cm_checksum != sig_checksum
|
230
|
-
raise "Inconsistent #{type
|
230
|
+
raise "Inconsistent #{type} for #{file_node['id']}: #{cm_checksum} != #{sig_checksum}"
|
231
231
|
end
|
232
232
|
end
|
233
233
|
end
|
@@ -6,7 +6,7 @@ module Stanford
|
|
6
6
|
#
|
7
7
|
# ====Data Model
|
8
8
|
# * <b>{DorMetadata} = utility methods for interfacing with Stanford metadata files (esp contentMetadata)</b>
|
9
|
-
# * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing
|
9
|
+
# * {ContentInventory} [1..1] = utilities for transforming contentMetadata to versionInventory and doing comparisons
|
10
10
|
# * {ActiveFedoraObject} [1..*] = utility for extracting content or other information from a Fedora Instance
|
11
11
|
#
|
12
12
|
# @note Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior University.
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'find'
|
2
|
+
|
3
|
+
module Stanford
|
4
|
+
##
|
5
|
+
# methods for dealing with a directory which stores Moab objects
|
6
|
+
class MoabStorageDirectory
|
7
|
+
DRUID_TREE_REGEXP = '[[:lower:]]{2}/\\d{3}/[[:lower:]]{2}/\\d{4}'.freeze
|
8
|
+
DRUID_REGEXP = '[[:lower:]]{2}\\d{3}[[:lower:]]{2}\\d{4}'.freeze
|
9
|
+
|
10
|
+
def self.find_moab_paths(storage_dir)
|
11
|
+
Find.find(storage_dir) do |path|
|
12
|
+
Find.prune unless File.directory?(path) # don't bother with a matching on files, we only care about directories
|
13
|
+
path_match_data = storage_dir_regexp(storage_dir).match(path)
|
14
|
+
if path_match_data
|
15
|
+
yield path_match_data[1], path, path_match_data # yield the druid, the full path, and the MatchData object
|
16
|
+
Find.prune # we don't care about what's in the moab dir, we just want the paths that look like moabs
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.list_moab_druids(storage_dir)
|
22
|
+
druids = []
|
23
|
+
find_moab_paths(storage_dir) { |druid, _path, _path_match_data| druids << druid }
|
24
|
+
druids
|
25
|
+
end
|
26
|
+
|
27
|
+
private_class_method def self.storage_dir_regexps
|
28
|
+
@storage_dir_regexps ||= {}
|
29
|
+
end
|
30
|
+
|
31
|
+
# this regexp caching makes things many times faster (e.g. went from ~2200 s to crawl disk11, down to ~300 s)
|
32
|
+
private_class_method def self.storage_dir_regexp(storage_dir)
|
33
|
+
storage_dir_regexps[storage_dir] ||= Regexp.new("^#{storage_dir}/#{DRUID_TREE_REGEXP}/(#{DRUID_REGEXP})$")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: moab-versioning
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Darren Weber
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2017-
|
14
|
+
date: 2017-11-01 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: confstruct
|
@@ -196,7 +196,7 @@ dependencies:
|
|
196
196
|
- !ruby/object:Gem::Version
|
197
197
|
version: '0'
|
198
198
|
- !ruby/object:Gem::Dependency
|
199
|
-
name: pry
|
199
|
+
name: pry-byebug
|
200
200
|
requirement: !ruby/object:Gem::Requirement
|
201
201
|
requirements:
|
202
202
|
- - ">="
|
@@ -275,6 +275,7 @@ files:
|
|
275
275
|
- lib/stanford/active_fedora_object.rb
|
276
276
|
- lib/stanford/content_inventory.rb
|
277
277
|
- lib/stanford/dor_metadata.rb
|
278
|
+
- lib/stanford/moab_storage_directory.rb
|
278
279
|
- lib/stanford/storage_repository.rb
|
279
280
|
- lib/stanford/storage_services.rb
|
280
281
|
- lib/tasks/yard.rake
|