longleaf 0.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +94 -0
- data/.editorconfig +13 -0
- data/.gitignore +4 -1
- data/.rubocop.yml +44 -0
- data/.rubocop_todo.yml +834 -0
- data/.yardopts +1 -0
- data/Gemfile +16 -1
- data/README.md +98 -12
- data/Rakefile +6 -0
- data/bin/setup +16 -1
- data/docs/aboutlongleaf.md +28 -0
- data/docs/extra.css +32 -0
- data/docs/img/change-file.png +0 -0
- data/docs/img/ll-example-preserved.png +0 -0
- data/docs/index.md +19 -0
- data/docs/install.md +66 -0
- data/docs/ll-example/config-example-relative.yml +33 -0
- data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
- data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
- data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
- data/docs/ll-example/metadata-dir/.gitkeep +0 -0
- data/docs/ll-example/replica-files/.gitkeep +0 -0
- data/docs/ll-example/replica-metadata/.gitkeep +0 -0
- data/docs/quickstart.md +270 -0
- data/docs/rdocs/Longleaf.html +135 -0
- data/docs/rdocs/Longleaf/AppFields.html +178 -0
- data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
- data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
- data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
- data/docs/rdocs/Longleaf/CLI.html +909 -0
- data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
- data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
- data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
- data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
- data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
- data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
- data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
- data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
- data/docs/rdocs/Longleaf/EventError.html +147 -0
- data/docs/rdocs/Longleaf/EventNames.html +163 -0
- data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
- data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
- data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
- data/docs/rdocs/Longleaf/FileRecord.html +716 -0
- data/docs/rdocs/Longleaf/FileSelector.html +901 -0
- data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
- data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
- data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
- data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
- data/docs/rdocs/Longleaf/Logging.html +405 -0
- data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
- data/docs/rdocs/Longleaf/LongleafError.html +139 -0
- data/docs/rdocs/Longleaf/MDFields.html +193 -0
- data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
- data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
- data/docs/rdocs/Longleaf/MetadataError.html +143 -0
- data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
- data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
- data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
- data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
- data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
- data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
- data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
- data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
- data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
- data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
- data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
- data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
- data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
- data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
- data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
- data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
- data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
- data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
- data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
- data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
- data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
- data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
- data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
- data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
- data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
- data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
- data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
- data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
- data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
- data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
- data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
- data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
- data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
- data/docs/rdocs/_index.html +660 -0
- data/docs/rdocs/class_list.html +51 -0
- data/docs/rdocs/css/common.css +1 -0
- data/docs/rdocs/css/full_list.css +58 -0
- data/docs/rdocs/css/style.css +496 -0
- data/docs/rdocs/file.README.html +165 -0
- data/docs/rdocs/file_list.html +56 -0
- data/docs/rdocs/frames.html +17 -0
- data/docs/rdocs/index.html +165 -0
- data/docs/rdocs/js/app.js +303 -0
- data/docs/rdocs/js/full_list.js +216 -0
- data/docs/rdocs/js/jquery.js +4 -0
- data/docs/rdocs/method_list.html +2051 -0
- data/docs/rdocs/top-level-namespace.html +110 -0
- data/lib/longleaf/candidates/file_selector.rb +150 -0
- data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
- data/lib/longleaf/candidates/physical_path_provider.rb +17 -0
- data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
- data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
- data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
- data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
- data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
- data/lib/longleaf/cli.rb +252 -46
- data/lib/longleaf/commands/deregister_command.rb +51 -0
- data/lib/longleaf/commands/preserve_command.rb +50 -0
- data/lib/longleaf/commands/register_command.rb +34 -43
- data/lib/longleaf/commands/reindex_command.rb +92 -0
- data/lib/longleaf/commands/validate_config_command.rb +33 -8
- data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
- data/lib/longleaf/errors.rb +26 -7
- data/lib/longleaf/events/deregister_event.rb +53 -0
- data/lib/longleaf/events/event_names.rb +9 -0
- data/lib/longleaf/events/event_status_tracking.rb +59 -0
- data/lib/longleaf/events/preserve_event.rb +82 -0
- data/lib/longleaf/events/register_event.rb +59 -51
- data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
- data/lib/longleaf/helpers/digest_helper.rb +56 -0
- data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
- data/lib/longleaf/helpers/selection_options_parser.rb +215 -0
- data/lib/longleaf/helpers/service_date_helper.rb +78 -0
- data/lib/longleaf/indexing/index_manager.rb +101 -0
- data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
- data/lib/longleaf/logging.rb +5 -4
- data/lib/longleaf/logging/redirecting_logger.rb +30 -25
- data/lib/longleaf/models/app_fields.rb +7 -2
- data/lib/longleaf/models/file_record.rb +31 -8
- data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
- data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
- data/lib/longleaf/models/md_fields.rb +3 -1
- data/lib/longleaf/models/metadata_location.rb +47 -0
- data/lib/longleaf/models/metadata_record.rb +43 -16
- data/lib/longleaf/models/s3_storage_location.rb +138 -0
- data/lib/longleaf/models/service_definition.rb +7 -6
- data/lib/longleaf/models/service_fields.rb +7 -1
- data/lib/longleaf/models/service_record.rb +10 -6
- data/lib/longleaf/models/storage_location.rb +24 -19
- data/lib/longleaf/models/storage_types.rb +9 -0
- data/lib/longleaf/models/system_config_fields.rb +9 -0
- data/lib/longleaf/preservation_services/file_check_service.rb +59 -0
- data/lib/longleaf/preservation_services/fixity_check_service.rb +124 -0
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +198 -0
- data/lib/longleaf/preservation_services/s3_replication_service.rb +131 -0
- data/lib/longleaf/services/application_config_deserializer.rb +81 -24
- data/lib/longleaf/services/application_config_manager.rb +20 -6
- data/lib/longleaf/services/application_config_validator.rb +19 -9
- data/lib/longleaf/services/configuration_validator.rb +67 -4
- data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
- data/lib/longleaf/services/metadata_deserializer.rb +115 -42
- data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
- data/lib/longleaf/services/metadata_serializer.rb +156 -23
- data/lib/longleaf/services/metadata_validator.rb +76 -0
- data/lib/longleaf/services/s3_location_validator.rb +19 -0
- data/lib/longleaf/services/service_class_cache.rb +112 -0
- data/lib/longleaf/services/service_definition_manager.rb +10 -7
- data/lib/longleaf/services/service_definition_validator.rb +25 -18
- data/lib/longleaf/services/service_manager.rb +86 -11
- data/lib/longleaf/services/service_mapping_manager.rb +13 -12
- data/lib/longleaf/services/service_mapping_validator.rb +36 -26
- data/lib/longleaf/services/storage_location_manager.rb +76 -15
- data/lib/longleaf/services/storage_location_validator.rb +49 -35
- data/lib/longleaf/specs/config_builder.rb +47 -23
- data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
- data/lib/longleaf/specs/custom_matchers.rb +9 -0
- data/lib/longleaf/specs/file_helpers.rb +61 -0
- data/lib/longleaf/specs/metadata_builder.rb +98 -0
- data/lib/longleaf/specs/system_config_builder.rb +27 -0
- data/lib/longleaf/version.rb +1 -1
- data/longleaf.gemspec +20 -7
- data/mkdocs.yml +21 -0
- metadata +308 -24
- data/.travis.yml +0 -4
- data/lib/longleaf/commands/abstract_command.rb +0 -37
- data/lib/longleaf/services/storage_path_validator.rb +0 -16
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
require 'longleaf/services/metadata_serializer'
|
|
2
|
+
require 'longleaf/services/metadata_deserializer'
|
|
3
|
+
require 'longleaf/errors'
|
|
4
|
+
|
|
5
|
+
module Longleaf
|
|
6
|
+
# Handles the persistence of metadata records
|
|
7
|
+
class MetadataPersistenceManager
|
|
8
|
+
# Initialize the MetadataPersistenceManager
|
|
9
|
+
# @param index_manager [IndexManager] system config manager
|
|
10
|
+
def initialize(index_manager)
|
|
11
|
+
@index_manager = index_manager
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Persist the metadata for the provided file record to all configured destinations.
|
|
15
|
+
# This may include to disk as well as to an index.
|
|
16
|
+
# @param file_rec [FileRecord] file record
|
|
17
|
+
def persist(file_rec)
|
|
18
|
+
if file_rec.metadata_record.nil?
|
|
19
|
+
raise MetadataError.new("No metadata record provided, cannot persist metadata for #{file_rec.path}")
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
MetadataSerializer::write(metadata: file_rec.metadata_record,
|
|
23
|
+
file_path: file_rec.metadata_path,
|
|
24
|
+
digest_algs: file_rec.storage_location.metadata_location.digests)
|
|
25
|
+
|
|
26
|
+
index(file_rec)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Index metadata for the provided file record
|
|
30
|
+
# @param file_rec [FileRecord] file record
|
|
31
|
+
def index(file_rec)
|
|
32
|
+
if @index_manager.using_index?
|
|
33
|
+
@index_manager.index(file_rec)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Load the metadata record for the provided file record
|
|
38
|
+
# @param file_rec [FileRecord] file record
|
|
39
|
+
# @return [MetadataRecord] the metadata record for the file record
|
|
40
|
+
def load(file_rec)
|
|
41
|
+
md_rec = MetadataDeserializer.deserialize(file_path: file_rec.metadata_path,
|
|
42
|
+
digest_algs: file_rec.storage_location.metadata_location.digests)
|
|
43
|
+
file_rec.metadata_record = md_rec
|
|
44
|
+
md_rec
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -1,71 +1,204 @@
|
|
|
1
1
|
require 'yaml'
|
|
2
|
-
|
|
3
|
-
|
|
2
|
+
require 'longleaf/models/metadata_record'
|
|
3
|
+
require 'longleaf/models/md_fields'
|
|
4
|
+
require 'longleaf/helpers/digest_helper'
|
|
5
|
+
require 'longleaf/errors'
|
|
6
|
+
require 'longleaf/logging'
|
|
7
|
+
require 'pathname'
|
|
8
|
+
require "tempfile"
|
|
4
9
|
|
|
5
|
-
# Service which serializes MetadataRecord objects
|
|
6
10
|
module Longleaf
|
|
11
|
+
# Service which serializes MetadataRecord objects
|
|
7
12
|
class MetadataSerializer
|
|
8
|
-
|
|
9
|
-
|
|
13
|
+
extend Longleaf::Logging
|
|
14
|
+
MDF ||= MDFields
|
|
15
|
+
|
|
10
16
|
# Serialize the contents of the provided metadata record to the specified path
|
|
11
17
|
#
|
|
12
18
|
# @param metadata [MetadataRecord] metadata record to serialize. Required.
|
|
13
19
|
# @param file_path [String] path to write the file to. Required.
|
|
14
20
|
# @param format [String] format to serialize the metadata in. Default is 'yaml'.
|
|
15
|
-
|
|
21
|
+
# @param digest_algs [Array] if provided, sidecar digest files for the metadata file
|
|
22
|
+
# will be generated for each algorithm.
|
|
23
|
+
def self.write(metadata:, file_path:, format: 'yaml', digest_algs: [])
|
|
16
24
|
raise ArgumentError.new('metadata parameter must be a MetadataRecord') \
|
|
17
|
-
unless metadata.class ==
|
|
18
|
-
|
|
25
|
+
unless metadata.class == MetadataRecord
|
|
26
|
+
|
|
19
27
|
case format
|
|
20
28
|
when 'yaml'
|
|
21
29
|
content = to_yaml(metadata)
|
|
22
30
|
else
|
|
23
|
-
raise ArgumentError.new(
|
|
31
|
+
raise ArgumentError.new("Invalid serialization format #{format} specified")
|
|
24
32
|
end
|
|
25
|
-
|
|
26
|
-
|
|
33
|
+
|
|
34
|
+
atomic_write(file_path, content, digest_algs)
|
|
27
35
|
end
|
|
28
|
-
|
|
36
|
+
|
|
29
37
|
# @param metadata [MetadataRecord] metadata record to transform
|
|
30
38
|
# @return [String] a yaml representation of the provided MetadataRecord
|
|
31
39
|
def self.to_yaml(metadata)
|
|
32
40
|
props = to_hash(metadata)
|
|
33
41
|
props.to_yaml
|
|
34
42
|
end
|
|
35
|
-
|
|
43
|
+
|
|
44
|
+
# Create a hash representation of the given MetadataRecord file
|
|
45
|
+
# @param metadata [MetadataRecord] metadata record to transform into a hash
|
|
36
46
|
def self.to_hash(metadata)
|
|
37
47
|
props = Hash.new
|
|
38
|
-
|
|
48
|
+
|
|
39
49
|
data = Hash.new.merge(metadata.properties)
|
|
40
50
|
data[MDF::REGISTERED_TIMESTAMP] = metadata.registered if metadata.registered
|
|
41
51
|
data[MDF::DEREGISTERED_TIMESTAMP] = metadata.deregistered if metadata.deregistered
|
|
42
|
-
data[MDF::CHECKSUMS] = metadata.checksums unless metadata.checksums
|
|
52
|
+
data[MDF::CHECKSUMS] = metadata.checksums unless metadata.checksums && metadata.checksums.empty?
|
|
43
53
|
data[MDF::FILE_SIZE] = metadata.file_size unless metadata.file_size.nil?
|
|
44
54
|
data[MDF::LAST_MODIFIED] = metadata.last_modified if metadata.last_modified
|
|
45
|
-
|
|
55
|
+
data[MDF::PHYSICAL_PATH] = metadata.physical_path if metadata.physical_path
|
|
56
|
+
|
|
46
57
|
props[MDF::DATA] = data
|
|
47
|
-
|
|
58
|
+
|
|
48
59
|
services = Hash.new
|
|
49
60
|
metadata.list_services.each do |name|
|
|
50
61
|
service = metadata.service(name)
|
|
51
62
|
service[MDF::STALE_REPLICAS] = service.stale_replicas if service.stale_replicas
|
|
52
63
|
service[MDF::SERVICE_TIMESTAMP] = service.timestamp unless service.timestamp.nil?
|
|
53
64
|
service[MDF::RUN_NEEDED] = service.run_needed if service.run_needed
|
|
54
|
-
services[name] = service.properties
|
|
65
|
+
services[name] = service.properties unless service.properties.empty?
|
|
55
66
|
end
|
|
56
|
-
|
|
67
|
+
|
|
57
68
|
props[MDF::SERVICES] = services
|
|
58
|
-
|
|
69
|
+
|
|
59
70
|
props
|
|
60
71
|
end
|
|
61
|
-
|
|
72
|
+
|
|
73
|
+
# @param format [String] encoding format used for metadata file
|
|
74
|
+
# @return [String] the suffix used to indicate that a file is a metadata file in the provided encoding
|
|
75
|
+
# @raise [ArgumentError] raised if the provided format is not a supported metadata encoding format
|
|
62
76
|
def self.metadata_suffix(format: 'yaml')
|
|
63
77
|
case format
|
|
64
78
|
when 'yaml'
|
|
65
79
|
'-llmd.yaml'
|
|
66
80
|
else
|
|
67
|
-
raise ArgumentError.new(
|
|
81
|
+
raise ArgumentError.new("Invalid serialization format #{format} specified")
|
|
68
82
|
end
|
|
69
83
|
end
|
|
84
|
+
|
|
85
|
+
# Safely writes the new metadata file and its digests.
|
|
86
|
+
# It does so by first writing the content and its digests to temp files,
|
|
87
|
+
# then making the temp files the current version of the file.
|
|
88
|
+
# Attempts to clean up new data in the case of failure.
|
|
89
|
+
def self.atomic_write(file_path, content, digest_algs)
|
|
90
|
+
# Fill in parent directories if they do not exist
|
|
91
|
+
parent_dir = Pathname(file_path).parent
|
|
92
|
+
parent_dir.mkpath unless parent_dir.exist?
|
|
93
|
+
|
|
94
|
+
file_path = file_path.path if file_path.respond_to?(:path)
|
|
95
|
+
|
|
96
|
+
# If file does not already exist, then simply write it
|
|
97
|
+
if !File.exist?(file_path)
|
|
98
|
+
File.write(file_path, content)
|
|
99
|
+
write_digests(file_path, content, digest_algs)
|
|
100
|
+
return
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Updating file, use safe atomic write
|
|
104
|
+
File.open(file_path) do |original_file|
|
|
105
|
+
original_file.flock(File::LOCK_EX)
|
|
106
|
+
|
|
107
|
+
base_name = File.basename(file_path)
|
|
108
|
+
old_renamed = nil
|
|
109
|
+
Tempfile.open(base_name, parent_dir) do |temp_file|
|
|
110
|
+
begin
|
|
111
|
+
# Write content to temp file
|
|
112
|
+
temp_file.write(content)
|
|
113
|
+
temp_file.close
|
|
114
|
+
|
|
115
|
+
temp_path = temp_file.path
|
|
116
|
+
|
|
117
|
+
# Set permissions of new file to match old if it exists
|
|
118
|
+
old_stat = File.stat(file_path)
|
|
119
|
+
set_perms(temp_path, old_stat)
|
|
120
|
+
|
|
121
|
+
# Produce digest files for the temp file
|
|
122
|
+
digest_paths = write_digests(temp_path, content, digest_algs)
|
|
123
|
+
|
|
124
|
+
# Move the old file to a temp path in case it needs to be restored
|
|
125
|
+
old_renamed = temp_path + ".old"
|
|
126
|
+
File.rename(file_path, old_renamed)
|
|
127
|
+
|
|
128
|
+
# Move move the new file into place as the new main file
|
|
129
|
+
File.rename(temp_path, file_path)
|
|
130
|
+
rescue => e
|
|
131
|
+
# Attempt to restore old file if it had already been moved
|
|
132
|
+
if !old_renamed.nil? && !File.exist?(file_path)
|
|
133
|
+
File.rename(old_renamed, file_path)
|
|
134
|
+
end
|
|
135
|
+
# Cleanup the temp file and any digest files written for it
|
|
136
|
+
temp_file.delete if File.exist?(temp_file.path)
|
|
137
|
+
unless digest_paths.nil?
|
|
138
|
+
digest_paths.each do |digest_path|
|
|
139
|
+
File.delete(digest_path)
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
raise e
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Cleanup all existing digest files, in case the set of algorithms has changed
|
|
146
|
+
cleanup_digests(file_path)
|
|
147
|
+
# Move new digests into place
|
|
148
|
+
digest_paths.each do |digest_path|
|
|
149
|
+
File.rename(digest_path, digest_path.sub(temp_path, file_path))
|
|
150
|
+
end
|
|
151
|
+
# Cleanup the old file
|
|
152
|
+
File.delete(old_renamed)
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def self.set_perms(file_path, stat_info)
|
|
158
|
+
if stat_info
|
|
159
|
+
# Set correct permissions on new file
|
|
160
|
+
begin
|
|
161
|
+
File.chown(stat_info.uid, stat_info.gid, file_path)
|
|
162
|
+
# This operation will affect filesystem ACL's
|
|
163
|
+
File.chmod(stat_info.mode, file_path)
|
|
164
|
+
rescue Errno::EPERM, Errno::EACCES
|
|
165
|
+
# Changing file ownership failed, moving on.
|
|
166
|
+
return false
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
true
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# Deletes all known digest files for the provided file path
|
|
173
|
+
def self.cleanup_digests(file_path)
|
|
174
|
+
DigestHelper::KNOWN_DIGESTS.each do |alg|
|
|
175
|
+
digest_path = "#{file_path}.#{alg}"
|
|
176
|
+
File.delete(digest_path) if File.exist?(digest_path)
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def self.write_digests(file_path, content, digests)
|
|
181
|
+
return [] if digests.nil? || digests.empty?
|
|
182
|
+
|
|
183
|
+
digest_paths = Array.new
|
|
184
|
+
|
|
185
|
+
digests.each do |alg|
|
|
186
|
+
digest_class = DigestHelper::start_digest(alg)
|
|
187
|
+
result = digest_class.hexdigest(content)
|
|
188
|
+
digest_path = "#{file_path}.#{alg}"
|
|
189
|
+
|
|
190
|
+
File.write(digest_path, result)
|
|
191
|
+
|
|
192
|
+
digest_paths.push(digest_path)
|
|
193
|
+
|
|
194
|
+
self.logger.debug("Generated #{alg} digest for metadata file #{file_path}: #{digest_path} #{result}")
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
digest_paths
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
private_class_method :cleanup_digests
|
|
201
|
+
private_class_method :write_digests
|
|
202
|
+
private_class_method :atomic_write
|
|
70
203
|
end
|
|
71
|
-
end
|
|
204
|
+
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
require 'pathname'
|
|
2
|
+
require 'longleaf/models/md_fields'
|
|
3
|
+
require 'longleaf/errors'
|
|
4
|
+
require_relative 'configuration_validator'
|
|
5
|
+
|
|
6
|
+
module Longleaf
|
|
7
|
+
# Validator for file metadata
|
|
8
|
+
class MetadataValidator < ConfigurationValidator
|
|
9
|
+
MDF ||= MDFields
|
|
10
|
+
|
|
11
|
+
# @param config [Hash] hash containing the application configuration
|
|
12
|
+
def initialize(config)
|
|
13
|
+
super(config)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
protected
|
|
17
|
+
# Validates the provided metadata for a file to ensure that it is syntactically correct and field types
|
|
18
|
+
# are validate.
|
|
19
|
+
def validate
|
|
20
|
+
assert("Metadata must be a hash, but a #{@config.class} was provided", @config.class == Hash)
|
|
21
|
+
assert("Metadata must contain a '#{MDF::DATA}' key", @config.key?(MDF::DATA))
|
|
22
|
+
assert("Metadata must contain a '#{MDF::SERVICES}' key", @config.key?(MDF::SERVICES))
|
|
23
|
+
|
|
24
|
+
data = @config[MDF::DATA]
|
|
25
|
+
register_on_failure { validate_date_field(data, MDF::REGISTERED_TIMESTAMP) }
|
|
26
|
+
register_on_failure { validate_date_field(data, MDF::DEREGISTERED_TIMESTAMP, required: false) }
|
|
27
|
+
register_on_failure { validate_date_field(data, MDF::LAST_MODIFIED) }
|
|
28
|
+
|
|
29
|
+
register_on_failure { validate_positive_integer(data, MDF::FILE_SIZE) }
|
|
30
|
+
|
|
31
|
+
checksums = data[MDF::CHECKSUMS]
|
|
32
|
+
register_on_failure do
|
|
33
|
+
if !checksums.nil? && !checksums.is_a?(Hash)
|
|
34
|
+
fail("Field '#{MDF::CHECKSUMS}' must be a map of algorithms to digests, but was a #{checksums.class}")
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Ensure that any service timestamps present are valid dates
|
|
39
|
+
services = @config[MDF::SERVICES]
|
|
40
|
+
services.each do |service_name, service_rec|
|
|
41
|
+
register_on_failure { validate_date_field(service_rec, MDF::SERVICE_TIMESTAMP, required: false) }
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def validate_date_field(section, field_key, required: true)
|
|
46
|
+
field_val = section[field_key]
|
|
47
|
+
|
|
48
|
+
if field_val
|
|
49
|
+
begin
|
|
50
|
+
Time.iso8601(section[field_key])
|
|
51
|
+
rescue ArgumentError
|
|
52
|
+
fail("Field '#{field_key}' must be a valid ISO8601 timestamp, but contained value '#{section[field_key]}'")
|
|
53
|
+
end
|
|
54
|
+
elsif required
|
|
55
|
+
fail("Metadata must contain a '#{field_key}' field")
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def validate_positive_integer(section, field_key, required: true)
|
|
60
|
+
field_val = section[field_key]
|
|
61
|
+
|
|
62
|
+
if field_val
|
|
63
|
+
begin
|
|
64
|
+
val = field_val.is_a?(Integer) ? field_val : Integer(field_val, 10)
|
|
65
|
+
if val < 0
|
|
66
|
+
fail("Field '#{field_key}' must be a positive integer")
|
|
67
|
+
end
|
|
68
|
+
rescue ArgumentError => err
|
|
69
|
+
fail("Field '#{field_key}' must be a positive integer")
|
|
70
|
+
end
|
|
71
|
+
elsif required
|
|
72
|
+
fail("Metadata must contain a '#{field_key}' field")
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
require 'pathname'
|
|
2
|
+
require 'longleaf/errors'
|
|
3
|
+
require 'longleaf/helpers/s3_uri_helper'
|
|
4
|
+
|
|
5
|
+
module Longleaf
|
|
6
|
+
# Validates the configuration of a s3 based location
|
|
7
|
+
class S3LocationValidator
|
|
8
|
+
def self.validate(p_validator, name, path_prop, section_name, path)
|
|
9
|
+
base_msg = "Storage location '#{name}' specifies invalid #{section_name} '#{path_prop}' property: "
|
|
10
|
+
p_validator.assert(base_msg + 'Path must not be empty', !path.nil? && !path.to_s.strip.empty?)
|
|
11
|
+
begin
|
|
12
|
+
bucket_name = S3UriHelper.extract_bucket(path)
|
|
13
|
+
p_validator.assert(base_msg + 'Path must specify a bucket', !bucket_name.nil?)
|
|
14
|
+
rescue ArgumentError => e
|
|
15
|
+
p_validator.fail(base_msg + e.message)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
require 'pathname'
|
|
2
|
+
|
|
3
|
+
module Longleaf
|
|
4
|
+
# Cache for loading and retrieving preservation service classes
|
|
5
|
+
class ServiceClassCache
|
|
6
|
+
STD_PRESERVATION_SERVICE_PATH = 'longleaf/preservation_services/'
|
|
7
|
+
|
|
8
|
+
def initialize(app_manager)
|
|
9
|
+
@app_manager = app_manager
|
|
10
|
+
# Cache storing per service definition instances of service classes
|
|
11
|
+
@service_instance_cache = Hash.new
|
|
12
|
+
# Cache storing per script path class of service
|
|
13
|
+
@class_cache = Hash.new
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Returns an instance of the preversation service defined for the provided service definition,
|
|
17
|
+
# based on the work_script and work_class properties provided.
|
|
18
|
+
#
|
|
19
|
+
# @param service_def [ServiceDefinition] definition of service to instantiate
|
|
20
|
+
# @return [PreservationService] Instance of the preservation service class for the definition.
|
|
21
|
+
def service_instance(service_def)
|
|
22
|
+
service_name = service_def.name
|
|
23
|
+
# Return the cached instance of the service
|
|
24
|
+
if @service_instance_cache.key?(service_name)
|
|
25
|
+
return @service_instance_cache[service_name]
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
clazz = service_class(service_def)
|
|
29
|
+
# Cache and return the class instance
|
|
30
|
+
@service_instance_cache[service_name] = clazz.new(service_def, @app_manager)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Load and return the PreservationService class assigned to the provided service definition,
|
|
34
|
+
# based on the work_script and work_class properties provided.
|
|
35
|
+
#
|
|
36
|
+
# @param service_def [ServiceDefinition] definition of service to retrieve class for
|
|
37
|
+
# @return [Class] class of work_script
|
|
38
|
+
def service_class(service_def)
|
|
39
|
+
service_name = service_def.name
|
|
40
|
+
work_script = service_def.work_script
|
|
41
|
+
|
|
42
|
+
if work_script.include?('/')
|
|
43
|
+
expanded_path = Pathname.new(work_script).expand_path.to_s
|
|
44
|
+
if !from_permitted_path?(expanded_path)
|
|
45
|
+
raise ConfigurationError.new("Unable to load work_script for service #{service_name}, #{work_script} is not in a known library path.")
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
last_slash_index = work_script.rindex('/')
|
|
49
|
+
script_path = work_script[0..last_slash_index]
|
|
50
|
+
script_name = work_script[(last_slash_index + 1)..-1]
|
|
51
|
+
else
|
|
52
|
+
script_path = STD_PRESERVATION_SERVICE_PATH
|
|
53
|
+
script_name = work_script
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Strip off the extension
|
|
57
|
+
script_name.sub!('.rb', '')
|
|
58
|
+
|
|
59
|
+
require_path = File.join(script_path, script_name)
|
|
60
|
+
# Return the cached Class if this path has been encountered before
|
|
61
|
+
if @class_cache.key?(require_path)
|
|
62
|
+
return @class_cache[require_path]
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Load the script
|
|
66
|
+
begin
|
|
67
|
+
require require_path
|
|
68
|
+
rescue LoadError
|
|
69
|
+
raise ConfigurationError.new("Failed to load work_script '#{script_name}' for service #{service_name}")
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Generate the class name, either configured or from file naming convention if possible
|
|
73
|
+
if service_def.work_class
|
|
74
|
+
class_name = service_def.work_class
|
|
75
|
+
else
|
|
76
|
+
class_name = script_name.split('_').map(&:capitalize).join
|
|
77
|
+
# Assume the longleaf module for classes in the standard path
|
|
78
|
+
class_name = 'Longleaf::' + class_name if script_path == STD_PRESERVATION_SERVICE_PATH
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
begin
|
|
82
|
+
class_constant = constantize(class_name)
|
|
83
|
+
# cache the class for this work_script and return it
|
|
84
|
+
@class_cache[require_path] = class_constant
|
|
85
|
+
rescue NameError
|
|
86
|
+
raise ConfigurationError.new("Failed to load work_script '#{script_name}' for service #{service_name}, class name #{class_name} was not found.")
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
private
|
|
91
|
+
# Borrowed from sidekiq implementation
|
|
92
|
+
def constantize(str)
|
|
93
|
+
names = str.split('::')
|
|
94
|
+
names.shift if names.empty? || names.first.empty?
|
|
95
|
+
|
|
96
|
+
names.inject(Object) do |constant, name|
|
|
97
|
+
# the false flag limits search for name to under the constant namespace
|
|
98
|
+
# which mimics Rails' behaviour
|
|
99
|
+
constant.const_defined?(name, false) ? constant.const_get(name, false) : constant.const_missing(name)
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def from_permitted_path?(script_path)
|
|
104
|
+
$LOAD_PATH.each do |lib_path|
|
|
105
|
+
if script_path.start_with?(lib_path)
|
|
106
|
+
return true
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
false
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|