longleaf 0.1.0.pre.3 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +94 -0
- data/.editorconfig +13 -0
- data/.gitignore +4 -1
- data/.rubocop.yml +44 -0
- data/.rubocop_todo.yml +834 -0
- data/.yardopts +1 -0
- data/Gemfile +16 -1
- data/README.md +98 -12
- data/Rakefile +6 -0
- data/bin/setup +16 -1
- data/docs/aboutlongleaf.md +28 -0
- data/docs/extra.css +32 -0
- data/docs/img/change-file.png +0 -0
- data/docs/img/ll-example-preserved.png +0 -0
- data/docs/index.md +19 -0
- data/docs/install.md +66 -0
- data/docs/ll-example/config-example-relative.yml +33 -0
- data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
- data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
- data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
- data/docs/ll-example/metadata-dir/.gitkeep +0 -0
- data/docs/ll-example/replica-files/.gitkeep +0 -0
- data/docs/ll-example/replica-metadata/.gitkeep +0 -0
- data/docs/quickstart.md +270 -0
- data/docs/rdocs/Longleaf.html +135 -0
- data/docs/rdocs/Longleaf/AppFields.html +178 -0
- data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
- data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
- data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
- data/docs/rdocs/Longleaf/CLI.html +909 -0
- data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
- data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
- data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
- data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
- data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
- data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
- data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
- data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
- data/docs/rdocs/Longleaf/EventError.html +147 -0
- data/docs/rdocs/Longleaf/EventNames.html +163 -0
- data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
- data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
- data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
- data/docs/rdocs/Longleaf/FileRecord.html +716 -0
- data/docs/rdocs/Longleaf/FileSelector.html +901 -0
- data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
- data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
- data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
- data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
- data/docs/rdocs/Longleaf/Logging.html +405 -0
- data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
- data/docs/rdocs/Longleaf/LongleafError.html +139 -0
- data/docs/rdocs/Longleaf/MDFields.html +193 -0
- data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
- data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
- data/docs/rdocs/Longleaf/MetadataError.html +143 -0
- data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
- data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
- data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
- data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
- data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
- data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
- data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
- data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
- data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
- data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
- data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
- data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
- data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
- data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
- data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
- data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
- data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
- data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
- data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
- data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
- data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
- data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
- data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
- data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
- data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
- data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
- data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
- data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
- data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
- data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
- data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
- data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
- data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
- data/docs/rdocs/_index.html +660 -0
- data/docs/rdocs/class_list.html +51 -0
- data/docs/rdocs/css/common.css +1 -0
- data/docs/rdocs/css/full_list.css +58 -0
- data/docs/rdocs/css/style.css +496 -0
- data/docs/rdocs/file.README.html +165 -0
- data/docs/rdocs/file_list.html +56 -0
- data/docs/rdocs/frames.html +17 -0
- data/docs/rdocs/index.html +165 -0
- data/docs/rdocs/js/app.js +303 -0
- data/docs/rdocs/js/full_list.js +216 -0
- data/docs/rdocs/js/jquery.js +4 -0
- data/docs/rdocs/method_list.html +2051 -0
- data/docs/rdocs/top-level-namespace.html +110 -0
- data/lib/longleaf/candidates/file_selector.rb +150 -0
- data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
- data/lib/longleaf/candidates/physical_path_provider.rb +17 -0
- data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
- data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
- data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
- data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
- data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
- data/lib/longleaf/cli.rb +249 -44
- data/lib/longleaf/commands/deregister_command.rb +51 -0
- data/lib/longleaf/commands/preserve_command.rb +50 -0
- data/lib/longleaf/commands/register_command.rb +34 -43
- data/lib/longleaf/commands/reindex_command.rb +92 -0
- data/lib/longleaf/commands/validate_config_command.rb +33 -8
- data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
- data/lib/longleaf/errors.rb +26 -7
- data/lib/longleaf/events/deregister_event.rb +53 -0
- data/lib/longleaf/events/event_names.rb +9 -0
- data/lib/longleaf/events/event_status_tracking.rb +59 -0
- data/lib/longleaf/events/preserve_event.rb +82 -0
- data/lib/longleaf/events/register_event.rb +59 -51
- data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
- data/lib/longleaf/helpers/digest_helper.rb +56 -0
- data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
- data/lib/longleaf/helpers/selection_options_parser.rb +215 -0
- data/lib/longleaf/helpers/service_date_helper.rb +78 -0
- data/lib/longleaf/indexing/index_manager.rb +101 -0
- data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
- data/lib/longleaf/logging.rb +5 -4
- data/lib/longleaf/logging/redirecting_logger.rb +30 -25
- data/lib/longleaf/models/app_fields.rb +7 -2
- data/lib/longleaf/models/file_record.rb +31 -8
- data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
- data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
- data/lib/longleaf/models/md_fields.rb +3 -1
- data/lib/longleaf/models/metadata_location.rb +47 -0
- data/lib/longleaf/models/metadata_record.rb +43 -16
- data/lib/longleaf/models/s3_storage_location.rb +138 -0
- data/lib/longleaf/models/service_definition.rb +7 -6
- data/lib/longleaf/models/service_fields.rb +7 -1
- data/lib/longleaf/models/service_record.rb +10 -6
- data/lib/longleaf/models/storage_location.rb +24 -21
- data/lib/longleaf/models/storage_types.rb +9 -0
- data/lib/longleaf/models/system_config_fields.rb +9 -0
- data/lib/longleaf/preservation_services/file_check_service.rb +59 -0
- data/lib/longleaf/preservation_services/fixity_check_service.rb +124 -0
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +198 -0
- data/lib/longleaf/preservation_services/s3_replication_service.rb +131 -0
- data/lib/longleaf/services/application_config_deserializer.rb +80 -21
- data/lib/longleaf/services/application_config_manager.rb +20 -6
- data/lib/longleaf/services/application_config_validator.rb +19 -9
- data/lib/longleaf/services/configuration_validator.rb +67 -4
- data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
- data/lib/longleaf/services/metadata_deserializer.rb +115 -42
- data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
- data/lib/longleaf/services/metadata_serializer.rb +139 -25
- data/lib/longleaf/services/metadata_validator.rb +76 -0
- data/lib/longleaf/services/s3_location_validator.rb +19 -0
- data/lib/longleaf/services/service_class_cache.rb +112 -0
- data/lib/longleaf/services/service_definition_manager.rb +10 -7
- data/lib/longleaf/services/service_definition_validator.rb +25 -18
- data/lib/longleaf/services/service_manager.rb +86 -11
- data/lib/longleaf/services/service_mapping_manager.rb +13 -12
- data/lib/longleaf/services/service_mapping_validator.rb +36 -26
- data/lib/longleaf/services/storage_location_manager.rb +76 -15
- data/lib/longleaf/services/storage_location_validator.rb +49 -35
- data/lib/longleaf/specs/config_builder.rb +47 -23
- data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
- data/lib/longleaf/specs/custom_matchers.rb +9 -0
- data/lib/longleaf/specs/file_helpers.rb +61 -0
- data/lib/longleaf/specs/metadata_builder.rb +98 -0
- data/lib/longleaf/specs/system_config_builder.rb +27 -0
- data/lib/longleaf/version.rb +1 -1
- data/longleaf.gemspec +20 -7
- data/mkdocs.yml +21 -0
- metadata +310 -26
- data/.travis.yml +0 -4
- data/lib/longleaf/commands/abstract_command.rb +0 -37
- data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'longleaf/services/metadata_serializer'
|
2
|
+
require 'longleaf/services/metadata_deserializer'
|
3
|
+
require 'longleaf/errors'
|
4
|
+
|
5
|
+
module Longleaf
|
6
|
+
# Handles the persistence of metadata records
|
7
|
+
class MetadataPersistenceManager
|
8
|
+
# Initialize the MetadataPersistenceManager
|
9
|
+
# @param index_manager [IndexManager] system config manager
|
10
|
+
def initialize(index_manager)
|
11
|
+
@index_manager = index_manager
|
12
|
+
end
|
13
|
+
|
14
|
+
# Persist the metadata for the provided file record to all configured destinations.
|
15
|
+
# This may include to disk as well as to an index.
|
16
|
+
# @param file_rec [FileRecord] file record
|
17
|
+
def persist(file_rec)
|
18
|
+
if file_rec.metadata_record.nil?
|
19
|
+
raise MetadataError.new("No metadata record provided, cannot persist metadata for #{file_rec.path}")
|
20
|
+
end
|
21
|
+
|
22
|
+
MetadataSerializer::write(metadata: file_rec.metadata_record,
|
23
|
+
file_path: file_rec.metadata_path,
|
24
|
+
digest_algs: file_rec.storage_location.metadata_location.digests)
|
25
|
+
|
26
|
+
index(file_rec)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Index metadata for the provided file record
|
30
|
+
# @param file_rec [FileRecord] file record
|
31
|
+
def index(file_rec)
|
32
|
+
if @index_manager.using_index?
|
33
|
+
@index_manager.index(file_rec)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Load the metadata record for the provided file record
|
38
|
+
# @param file_rec [FileRecord] file record
|
39
|
+
# @return [MetadataRecord] the metadata record for the file record
|
40
|
+
def load(file_rec)
|
41
|
+
md_rec = MetadataDeserializer.deserialize(file_path: file_rec.metadata_path,
|
42
|
+
digest_algs: file_rec.storage_location.metadata_location.digests)
|
43
|
+
file_rec.metadata_record = md_rec
|
44
|
+
md_rec
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -1,76 +1,190 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
require 'longleaf/models/metadata_record'
|
3
3
|
require 'longleaf/models/md_fields'
|
4
|
+
require 'longleaf/helpers/digest_helper'
|
5
|
+
require 'longleaf/errors'
|
6
|
+
require 'longleaf/logging'
|
4
7
|
require 'pathname'
|
8
|
+
require "tempfile"
|
5
9
|
|
6
|
-
# Service which serializes MetadataRecord objects
|
7
10
|
module Longleaf
|
11
|
+
# Service which serializes MetadataRecord objects
|
8
12
|
class MetadataSerializer
|
9
|
-
|
10
|
-
|
13
|
+
extend Longleaf::Logging
|
14
|
+
MDF ||= MDFields
|
15
|
+
|
11
16
|
# Serialize the contents of the provided metadata record to the specified path
|
12
17
|
#
|
13
18
|
# @param metadata [MetadataRecord] metadata record to serialize. Required.
|
14
19
|
# @param file_path [String] path to write the file to. Required.
|
15
20
|
# @param format [String] format to serialize the metadata in. Default is 'yaml'.
|
16
|
-
|
21
|
+
# @param digest_algs [Array] if provided, sidecar digest files for the metadata file
|
22
|
+
# will be generated for each algorithm.
|
23
|
+
def self.write(metadata:, file_path:, format: 'yaml', digest_algs: [])
|
17
24
|
raise ArgumentError.new('metadata parameter must be a MetadataRecord') \
|
18
|
-
unless metadata.class ==
|
19
|
-
|
25
|
+
unless metadata.class == MetadataRecord
|
26
|
+
|
20
27
|
case format
|
21
28
|
when 'yaml'
|
22
29
|
content = to_yaml(metadata)
|
23
30
|
else
|
24
|
-
raise ArgumentError.new(
|
31
|
+
raise ArgumentError.new("Invalid serialization format #{format} specified")
|
25
32
|
end
|
26
|
-
|
27
|
-
|
28
|
-
parent_dir = Pathname(file_path).parent
|
29
|
-
parent_dir.mkpath unless parent_dir.exist?
|
30
|
-
|
31
|
-
File.write(file_path, content)
|
33
|
+
|
34
|
+
atomic_write(file_path, content, digest_algs)
|
32
35
|
end
|
33
|
-
|
36
|
+
|
34
37
|
# @param metadata [MetadataRecord] metadata record to transform
|
35
38
|
# @return [String] a yaml representation of the provided MetadataRecord
|
36
39
|
def self.to_yaml(metadata)
|
37
40
|
props = to_hash(metadata)
|
38
41
|
props.to_yaml
|
39
42
|
end
|
40
|
-
|
43
|
+
|
44
|
+
# Create a hash representation of the given MetadataRecord file
|
45
|
+
# @param metadata [MetadataRecord] metadata record to transform into a hash
|
41
46
|
def self.to_hash(metadata)
|
42
47
|
props = Hash.new
|
43
|
-
|
48
|
+
|
44
49
|
data = Hash.new.merge(metadata.properties)
|
45
50
|
data[MDF::REGISTERED_TIMESTAMP] = metadata.registered if metadata.registered
|
46
51
|
data[MDF::DEREGISTERED_TIMESTAMP] = metadata.deregistered if metadata.deregistered
|
47
|
-
data[MDF::CHECKSUMS] = metadata.checksums unless metadata.checksums
|
52
|
+
data[MDF::CHECKSUMS] = metadata.checksums unless metadata.checksums && metadata.checksums.empty?
|
48
53
|
data[MDF::FILE_SIZE] = metadata.file_size unless metadata.file_size.nil?
|
49
54
|
data[MDF::LAST_MODIFIED] = metadata.last_modified if metadata.last_modified
|
50
|
-
|
55
|
+
data[MDF::PHYSICAL_PATH] = metadata.physical_path if metadata.physical_path
|
56
|
+
|
51
57
|
props[MDF::DATA] = data
|
52
|
-
|
58
|
+
|
53
59
|
services = Hash.new
|
54
60
|
metadata.list_services.each do |name|
|
55
61
|
service = metadata.service(name)
|
56
62
|
service[MDF::STALE_REPLICAS] = service.stale_replicas if service.stale_replicas
|
57
63
|
service[MDF::SERVICE_TIMESTAMP] = service.timestamp unless service.timestamp.nil?
|
58
64
|
service[MDF::RUN_NEEDED] = service.run_needed if service.run_needed
|
59
|
-
services[name] = service.properties
|
65
|
+
services[name] = service.properties unless service.properties.empty?
|
60
66
|
end
|
61
|
-
|
67
|
+
|
62
68
|
props[MDF::SERVICES] = services
|
63
|
-
|
69
|
+
|
64
70
|
props
|
65
71
|
end
|
66
|
-
|
72
|
+
|
73
|
+
# @param format [String] encoding format used for metadata file
|
74
|
+
# @return [String] the suffix used to indicate that a file is a metadata file in the provided encoding
|
75
|
+
# @raise [ArgumentError] raised if the provided format is not a supported metadata encoding format
|
67
76
|
def self.metadata_suffix(format: 'yaml')
|
68
77
|
case format
|
69
78
|
when 'yaml'
|
70
79
|
'-llmd.yaml'
|
71
80
|
else
|
72
|
-
raise ArgumentError.new(
|
81
|
+
raise ArgumentError.new("Invalid serialization format #{format} specified")
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Safely writes the new metadata file and its digests.
|
86
|
+
# It does so by first writing the content and its digests to temp files,
|
87
|
+
# then making the temp files the current version of the file.
|
88
|
+
# Attempts to clean up new data in the case of failure.
|
89
|
+
def self.atomic_write(file_path, content, digest_algs)
|
90
|
+
# Fill in parent directories if they do not exist
|
91
|
+
parent_dir = Pathname(file_path).parent
|
92
|
+
parent_dir.mkpath unless parent_dir.exist?
|
93
|
+
|
94
|
+
file_path = file_path.path if file_path.respond_to?(:path)
|
95
|
+
|
96
|
+
# If file does not already exist, then simply write it
|
97
|
+
if !File.exist?(file_path)
|
98
|
+
File.write(file_path, content)
|
99
|
+
write_digests(file_path, content, digest_algs)
|
100
|
+
return
|
101
|
+
end
|
102
|
+
|
103
|
+
# Updating file, use safe atomic write
|
104
|
+
File.open(file_path) do |original_file|
|
105
|
+
original_file.flock(File::LOCK_EX)
|
106
|
+
|
107
|
+
base_name = File.basename(file_path)
|
108
|
+
Tempfile.open(base_name, parent_dir) do |temp_file|
|
109
|
+
begin
|
110
|
+
# Write content to temp file
|
111
|
+
temp_file.write(content)
|
112
|
+
temp_file.close
|
113
|
+
|
114
|
+
temp_path = temp_file.path
|
115
|
+
|
116
|
+
# Set permissions of new file to match old if it exists
|
117
|
+
old_stat = File.stat(file_path)
|
118
|
+
set_perms(temp_path, old_stat)
|
119
|
+
|
120
|
+
begin
|
121
|
+
digest_paths = write_digests(temp_path, content, digest_algs)
|
122
|
+
|
123
|
+
File.rename(temp_path, file_path)
|
124
|
+
rescue => e
|
125
|
+
cleanup_digests(temp_path)
|
126
|
+
raise e
|
127
|
+
end
|
128
|
+
rescue => e
|
129
|
+
temp_file.delete
|
130
|
+
raise e
|
131
|
+
end
|
132
|
+
|
133
|
+
# Cleanup all existing digest files, in case the set of algorithms has changed
|
134
|
+
cleanup_digests(file_path)
|
135
|
+
# Move new digests into place
|
136
|
+
digest_paths.each do |digest_path|
|
137
|
+
File.rename(digest_path, digest_path.sub(temp_path, file_path))
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def self.set_perms(file_path, stat_info)
|
144
|
+
if stat_info
|
145
|
+
# Set correct permissions on new file
|
146
|
+
begin
|
147
|
+
File.chown(stat_info.uid, stat_info.gid, file_path)
|
148
|
+
# This operation will affect filesystem ACL's
|
149
|
+
File.chmod(stat_info.mode, file_path)
|
150
|
+
rescue Errno::EPERM, Errno::EACCES
|
151
|
+
# Changing file ownership failed, moving on.
|
152
|
+
return false
|
153
|
+
end
|
73
154
|
end
|
155
|
+
true
|
74
156
|
end
|
157
|
+
|
158
|
+
# Deletes all known digest files for the provided file path
|
159
|
+
def self.cleanup_digests(file_path)
|
160
|
+
DigestHelper::KNOWN_DIGESTS.each do |alg|
|
161
|
+
digest_path = "#{file_path}.#{alg}"
|
162
|
+
File.delete(digest_path) if File.exist?(digest_path)
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
def self.write_digests(file_path, content, digests)
|
167
|
+
return [] if digests.nil? || digests.empty?
|
168
|
+
|
169
|
+
digest_paths = Array.new
|
170
|
+
|
171
|
+
digests.each do |alg|
|
172
|
+
digest_class = DigestHelper::start_digest(alg)
|
173
|
+
result = digest_class.hexdigest(content)
|
174
|
+
digest_path = "#{file_path}.#{alg}"
|
175
|
+
|
176
|
+
File.write(digest_path, result)
|
177
|
+
|
178
|
+
digest_paths.push(digest_path)
|
179
|
+
|
180
|
+
self.logger.debug("Generated #{alg} digest for metadata file #{file_path}: #{digest_path} #{result}")
|
181
|
+
end
|
182
|
+
|
183
|
+
digest_paths
|
184
|
+
end
|
185
|
+
|
186
|
+
private_class_method :cleanup_digests
|
187
|
+
private_class_method :write_digests
|
188
|
+
private_class_method :atomic_write
|
75
189
|
end
|
76
|
-
end
|
190
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
require 'longleaf/models/md_fields'
|
3
|
+
require 'longleaf/errors'
|
4
|
+
require_relative 'configuration_validator'
|
5
|
+
|
6
|
+
module Longleaf
|
7
|
+
# Validator for file metadata
|
8
|
+
class MetadataValidator < ConfigurationValidator
|
9
|
+
MDF ||= MDFields
|
10
|
+
|
11
|
+
# @param config [Hash] hash containing the application configuration
|
12
|
+
def initialize(config)
|
13
|
+
super(config)
|
14
|
+
end
|
15
|
+
|
16
|
+
protected
|
17
|
+
# Validates the provided metadata for a file to ensure that it is syntactically correct and field types
|
18
|
+
# are validate.
|
19
|
+
def validate
|
20
|
+
assert("Metadata must be a hash, but a #{@config.class} was provided", @config.class == Hash)
|
21
|
+
assert("Metadata must contain a '#{MDF::DATA}' key", @config.key?(MDF::DATA))
|
22
|
+
assert("Metadata must contain a '#{MDF::SERVICES}' key", @config.key?(MDF::SERVICES))
|
23
|
+
|
24
|
+
data = @config[MDF::DATA]
|
25
|
+
register_on_failure { validate_date_field(data, MDF::REGISTERED_TIMESTAMP) }
|
26
|
+
register_on_failure { validate_date_field(data, MDF::DEREGISTERED_TIMESTAMP, required: false) }
|
27
|
+
register_on_failure { validate_date_field(data, MDF::LAST_MODIFIED) }
|
28
|
+
|
29
|
+
register_on_failure { validate_positive_integer(data, MDF::FILE_SIZE) }
|
30
|
+
|
31
|
+
checksums = data[MDF::CHECKSUMS]
|
32
|
+
register_on_failure do
|
33
|
+
if !checksums.nil? && !checksums.is_a?(Hash)
|
34
|
+
fail("Field '#{MDF::CHECKSUMS}' must be a map of algorithms to digests, but was a #{checksums.class}")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Ensure that any service timestamps present are valid dates
|
39
|
+
services = @config[MDF::SERVICES]
|
40
|
+
services.each do |service_name, service_rec|
|
41
|
+
register_on_failure { validate_date_field(service_rec, MDF::SERVICE_TIMESTAMP, required: false) }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def validate_date_field(section, field_key, required: true)
|
46
|
+
field_val = section[field_key]
|
47
|
+
|
48
|
+
if field_val
|
49
|
+
begin
|
50
|
+
Time.iso8601(section[field_key])
|
51
|
+
rescue ArgumentError
|
52
|
+
fail("Field '#{field_key}' must be a valid ISO8601 timestamp, but contained value '#{section[field_key]}'")
|
53
|
+
end
|
54
|
+
elsif required
|
55
|
+
fail("Metadata must contain a '#{field_key}' field")
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def validate_positive_integer(section, field_key, required: true)
|
60
|
+
field_val = section[field_key]
|
61
|
+
|
62
|
+
if field_val
|
63
|
+
begin
|
64
|
+
val = field_val.is_a?(Integer) ? field_val : Integer(field_val, 10)
|
65
|
+
if val < 0
|
66
|
+
fail("Field '#{field_key}' must be a positive integer")
|
67
|
+
end
|
68
|
+
rescue ArgumentError => err
|
69
|
+
fail("Field '#{field_key}' must be a positive integer")
|
70
|
+
end
|
71
|
+
elsif required
|
72
|
+
fail("Metadata must contain a '#{field_key}' field")
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
require 'longleaf/errors'
|
3
|
+
require 'longleaf/helpers/s3_uri_helper'
|
4
|
+
|
5
|
+
module Longleaf
|
6
|
+
# Validates the configuration of a s3 based location
|
7
|
+
class S3LocationValidator
|
8
|
+
def self.validate(p_validator, name, path_prop, section_name, path)
|
9
|
+
base_msg = "Storage location '#{name}' specifies invalid #{section_name} '#{path_prop}' property: "
|
10
|
+
p_validator.assert(base_msg + 'Path must not be empty', !path.nil? && !path.to_s.strip.empty?)
|
11
|
+
begin
|
12
|
+
bucket_name = S3UriHelper.extract_bucket(path)
|
13
|
+
p_validator.assert(base_msg + 'Path must specify a bucket', !bucket_name.nil?)
|
14
|
+
rescue ArgumentError => e
|
15
|
+
p_validator.fail(base_msg + e.message)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
module Longleaf
|
4
|
+
# Cache for loading and retrieving preservation service classes
|
5
|
+
class ServiceClassCache
|
6
|
+
STD_PRESERVATION_SERVICE_PATH = 'longleaf/preservation_services/'
|
7
|
+
|
8
|
+
def initialize(app_manager)
|
9
|
+
@app_manager = app_manager
|
10
|
+
# Cache storing per service definition instances of service classes
|
11
|
+
@service_instance_cache = Hash.new
|
12
|
+
# Cache storing per script path class of service
|
13
|
+
@class_cache = Hash.new
|
14
|
+
end
|
15
|
+
|
16
|
+
# Returns an instance of the preversation service defined for the provided service definition,
|
17
|
+
# based on the work_script and work_class properties provided.
|
18
|
+
#
|
19
|
+
# @param service_def [ServiceDefinition] definition of service to instantiate
|
20
|
+
# @return [PreservationService] Instance of the preservation service class for the definition.
|
21
|
+
def service_instance(service_def)
|
22
|
+
service_name = service_def.name
|
23
|
+
# Return the cached instance of the service
|
24
|
+
if @service_instance_cache.key?(service_name)
|
25
|
+
return @service_instance_cache[service_name]
|
26
|
+
end
|
27
|
+
|
28
|
+
clazz = service_class(service_def)
|
29
|
+
# Cache and return the class instance
|
30
|
+
@service_instance_cache[service_name] = clazz.new(service_def, @app_manager)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Load and return the PreservationService class assigned to the provided service definition,
|
34
|
+
# based on the work_script and work_class properties provided.
|
35
|
+
#
|
36
|
+
# @param service_def [ServiceDefinition] definition of service to retrieve class for
|
37
|
+
# @return [Class] class of work_script
|
38
|
+
def service_class(service_def)
|
39
|
+
service_name = service_def.name
|
40
|
+
work_script = service_def.work_script
|
41
|
+
|
42
|
+
if work_script.include?('/')
|
43
|
+
expanded_path = Pathname.new(work_script).expand_path.to_s
|
44
|
+
if !from_permitted_path?(expanded_path)
|
45
|
+
raise ConfigurationError.new("Unable to load work_script for service #{service_name}, #{work_script} is not in a known library path.")
|
46
|
+
end
|
47
|
+
|
48
|
+
last_slash_index = work_script.rindex('/')
|
49
|
+
script_path = work_script[0..last_slash_index]
|
50
|
+
script_name = work_script[(last_slash_index + 1)..-1]
|
51
|
+
else
|
52
|
+
script_path = STD_PRESERVATION_SERVICE_PATH
|
53
|
+
script_name = work_script
|
54
|
+
end
|
55
|
+
|
56
|
+
# Strip off the extension
|
57
|
+
script_name.sub!('.rb', '')
|
58
|
+
|
59
|
+
require_path = File.join(script_path, script_name)
|
60
|
+
# Return the cached Class if this path has been encountered before
|
61
|
+
if @class_cache.key?(require_path)
|
62
|
+
return @class_cache[require_path]
|
63
|
+
end
|
64
|
+
|
65
|
+
# Load the script
|
66
|
+
begin
|
67
|
+
require require_path
|
68
|
+
rescue LoadError
|
69
|
+
raise ConfigurationError.new("Failed to load work_script '#{script_name}' for service #{service_name}")
|
70
|
+
end
|
71
|
+
|
72
|
+
# Generate the class name, either configured or from file naming convention if possible
|
73
|
+
if service_def.work_class
|
74
|
+
class_name = service_def.work_class
|
75
|
+
else
|
76
|
+
class_name = script_name.split('_').map(&:capitalize).join
|
77
|
+
# Assume the longleaf module for classes in the standard path
|
78
|
+
class_name = 'Longleaf::' + class_name if script_path == STD_PRESERVATION_SERVICE_PATH
|
79
|
+
end
|
80
|
+
|
81
|
+
begin
|
82
|
+
class_constant = constantize(class_name)
|
83
|
+
# cache the class for this work_script and return it
|
84
|
+
@class_cache[require_path] = class_constant
|
85
|
+
rescue NameError
|
86
|
+
raise ConfigurationError.new("Failed to load work_script '#{script_name}' for service #{service_name}, class name #{class_name} was not found.")
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
private
|
91
|
+
# Borrowed from sidekiq implementation
|
92
|
+
def constantize(str)
|
93
|
+
names = str.split('::')
|
94
|
+
names.shift if names.empty? || names.first.empty?
|
95
|
+
|
96
|
+
names.inject(Object) do |constant, name|
|
97
|
+
# the false flag limits search for name to under the constant namespace
|
98
|
+
# which mimics Rails' behaviour
|
99
|
+
constant.const_defined?(name, false) ? constant.const_get(name, false) : constant.const_missing(name)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def from_permitted_path?(script_path)
|
104
|
+
$LOAD_PATH.each do |lib_path|
|
105
|
+
if script_path.start_with?(lib_path)
|
106
|
+
return true
|
107
|
+
end
|
108
|
+
end
|
109
|
+
false
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|