longleaf 0.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +94 -0
- data/.editorconfig +13 -0
- data/.gitignore +4 -1
- data/.rubocop.yml +44 -0
- data/.rubocop_todo.yml +834 -0
- data/.yardopts +1 -0
- data/Gemfile +16 -1
- data/README.md +98 -12
- data/Rakefile +6 -0
- data/bin/setup +16 -1
- data/docs/aboutlongleaf.md +28 -0
- data/docs/extra.css +32 -0
- data/docs/img/change-file.png +0 -0
- data/docs/img/ll-example-preserved.png +0 -0
- data/docs/index.md +19 -0
- data/docs/install.md +66 -0
- data/docs/ll-example/config-example-relative.yml +33 -0
- data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
- data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
- data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
- data/docs/ll-example/metadata-dir/.gitkeep +0 -0
- data/docs/ll-example/replica-files/.gitkeep +0 -0
- data/docs/ll-example/replica-metadata/.gitkeep +0 -0
- data/docs/quickstart.md +270 -0
- data/docs/rdocs/Longleaf.html +135 -0
- data/docs/rdocs/Longleaf/AppFields.html +178 -0
- data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
- data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
- data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
- data/docs/rdocs/Longleaf/CLI.html +909 -0
- data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
- data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
- data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
- data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
- data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
- data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
- data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
- data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
- data/docs/rdocs/Longleaf/EventError.html +147 -0
- data/docs/rdocs/Longleaf/EventNames.html +163 -0
- data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
- data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
- data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
- data/docs/rdocs/Longleaf/FileRecord.html +716 -0
- data/docs/rdocs/Longleaf/FileSelector.html +901 -0
- data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
- data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
- data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
- data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
- data/docs/rdocs/Longleaf/Logging.html +405 -0
- data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
- data/docs/rdocs/Longleaf/LongleafError.html +139 -0
- data/docs/rdocs/Longleaf/MDFields.html +193 -0
- data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
- data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
- data/docs/rdocs/Longleaf/MetadataError.html +143 -0
- data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
- data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
- data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
- data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
- data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
- data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
- data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
- data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
- data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
- data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
- data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
- data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
- data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
- data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
- data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
- data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
- data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
- data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
- data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
- data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
- data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
- data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
- data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
- data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
- data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
- data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
- data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
- data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
- data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
- data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
- data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
- data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
- data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
- data/docs/rdocs/_index.html +660 -0
- data/docs/rdocs/class_list.html +51 -0
- data/docs/rdocs/css/common.css +1 -0
- data/docs/rdocs/css/full_list.css +58 -0
- data/docs/rdocs/css/style.css +496 -0
- data/docs/rdocs/file.README.html +165 -0
- data/docs/rdocs/file_list.html +56 -0
- data/docs/rdocs/frames.html +17 -0
- data/docs/rdocs/index.html +165 -0
- data/docs/rdocs/js/app.js +303 -0
- data/docs/rdocs/js/full_list.js +216 -0
- data/docs/rdocs/js/jquery.js +4 -0
- data/docs/rdocs/method_list.html +2051 -0
- data/docs/rdocs/top-level-namespace.html +110 -0
- data/lib/longleaf/candidates/file_selector.rb +150 -0
- data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
- data/lib/longleaf/candidates/physical_path_provider.rb +17 -0
- data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
- data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
- data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
- data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
- data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
- data/lib/longleaf/cli.rb +252 -46
- data/lib/longleaf/commands/deregister_command.rb +51 -0
- data/lib/longleaf/commands/preserve_command.rb +50 -0
- data/lib/longleaf/commands/register_command.rb +34 -43
- data/lib/longleaf/commands/reindex_command.rb +92 -0
- data/lib/longleaf/commands/validate_config_command.rb +33 -8
- data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
- data/lib/longleaf/errors.rb +26 -7
- data/lib/longleaf/events/deregister_event.rb +53 -0
- data/lib/longleaf/events/event_names.rb +9 -0
- data/lib/longleaf/events/event_status_tracking.rb +59 -0
- data/lib/longleaf/events/preserve_event.rb +82 -0
- data/lib/longleaf/events/register_event.rb +59 -51
- data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
- data/lib/longleaf/helpers/digest_helper.rb +56 -0
- data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
- data/lib/longleaf/helpers/selection_options_parser.rb +215 -0
- data/lib/longleaf/helpers/service_date_helper.rb +78 -0
- data/lib/longleaf/indexing/index_manager.rb +101 -0
- data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
- data/lib/longleaf/logging.rb +5 -4
- data/lib/longleaf/logging/redirecting_logger.rb +30 -25
- data/lib/longleaf/models/app_fields.rb +7 -2
- data/lib/longleaf/models/file_record.rb +31 -8
- data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
- data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
- data/lib/longleaf/models/md_fields.rb +3 -1
- data/lib/longleaf/models/metadata_location.rb +47 -0
- data/lib/longleaf/models/metadata_record.rb +43 -16
- data/lib/longleaf/models/s3_storage_location.rb +138 -0
- data/lib/longleaf/models/service_definition.rb +7 -6
- data/lib/longleaf/models/service_fields.rb +7 -1
- data/lib/longleaf/models/service_record.rb +10 -6
- data/lib/longleaf/models/storage_location.rb +24 -19
- data/lib/longleaf/models/storage_types.rb +9 -0
- data/lib/longleaf/models/system_config_fields.rb +9 -0
- data/lib/longleaf/preservation_services/file_check_service.rb +59 -0
- data/lib/longleaf/preservation_services/fixity_check_service.rb +124 -0
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +198 -0
- data/lib/longleaf/preservation_services/s3_replication_service.rb +131 -0
- data/lib/longleaf/services/application_config_deserializer.rb +81 -24
- data/lib/longleaf/services/application_config_manager.rb +20 -6
- data/lib/longleaf/services/application_config_validator.rb +19 -9
- data/lib/longleaf/services/configuration_validator.rb +67 -4
- data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
- data/lib/longleaf/services/metadata_deserializer.rb +115 -42
- data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
- data/lib/longleaf/services/metadata_serializer.rb +156 -23
- data/lib/longleaf/services/metadata_validator.rb +76 -0
- data/lib/longleaf/services/s3_location_validator.rb +19 -0
- data/lib/longleaf/services/service_class_cache.rb +112 -0
- data/lib/longleaf/services/service_definition_manager.rb +10 -7
- data/lib/longleaf/services/service_definition_validator.rb +25 -18
- data/lib/longleaf/services/service_manager.rb +86 -11
- data/lib/longleaf/services/service_mapping_manager.rb +13 -12
- data/lib/longleaf/services/service_mapping_validator.rb +36 -26
- data/lib/longleaf/services/storage_location_manager.rb +76 -15
- data/lib/longleaf/services/storage_location_validator.rb +49 -35
- data/lib/longleaf/specs/config_builder.rb +47 -23
- data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
- data/lib/longleaf/specs/custom_matchers.rb +9 -0
- data/lib/longleaf/specs/file_helpers.rb +61 -0
- data/lib/longleaf/specs/metadata_builder.rb +98 -0
- data/lib/longleaf/specs/system_config_builder.rb +27 -0
- data/lib/longleaf/version.rb +1 -1
- data/longleaf.gemspec +20 -7
- data/mkdocs.yml +21 -0
- metadata +308 -24
- data/.travis.yml +0 -4
- data/lib/longleaf/commands/abstract_command.rb +0 -37
- data/lib/longleaf/services/storage_path_validator.rb +0 -16
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
require 'longleaf/events/event_names'
|
|
2
|
+
require 'longleaf/logging'
|
|
3
|
+
require 'longleaf/errors'
|
|
4
|
+
require 'longleaf/models/file_record'
|
|
5
|
+
require 'longleaf/models/service_fields'
|
|
6
|
+
require 'longleaf/events/register_event'
|
|
7
|
+
require 'longleaf/models/storage_types'
|
|
8
|
+
require 'aws-sdk-s3'
|
|
9
|
+
|
|
10
|
+
module Longleaf
|
|
11
|
+
# Preservation service which performs replication of a file to one or more s3 destinations.
|
|
12
|
+
#
|
|
13
|
+
# The service definition must contain one or more destinations, specified with the "to" property.
|
|
14
|
+
# These destinations must be either a known s3 storage location. The s3 client configuration
|
|
15
|
+
# is controlled by the storage location.
|
|
16
|
+
#
|
|
17
|
+
# Optional service configuration properties:
|
|
18
|
+
# * replica_collision_policy = specifies the desired outcome if the service attempts to replicate
|
|
19
|
+
# a file which already exists at a destination. Default: "replace".
|
|
20
|
+
class S3ReplicationService
|
|
21
|
+
include Longleaf::Logging
|
|
22
|
+
ST ||= Longleaf::StorageTypes
|
|
23
|
+
SF ||= Longleaf::ServiceFields
|
|
24
|
+
|
|
25
|
+
attr_reader :collision_policy
|
|
26
|
+
|
|
27
|
+
# Initialize a S3ReplicationService from the given service definition
|
|
28
|
+
#
|
|
29
|
+
# @param service_def [ServiceDefinition] the configuration for this service
|
|
30
|
+
# @param app_manager [ApplicationConfigManager] the application configuration
|
|
31
|
+
def initialize(service_def, app_manager)
|
|
32
|
+
@service_def = service_def
|
|
33
|
+
@app_manager = app_manager
|
|
34
|
+
|
|
35
|
+
# Set and validate the replica collision policy
|
|
36
|
+
@collision_policy = @service_def.properties[SF::COLLISION_PROPERTY] || SF::DEFAULT_COLLISION_POLICY
|
|
37
|
+
if !SF::VALID_COLLISION_POLICIES.include?(@collision_policy)
|
|
38
|
+
raise ArgumentError.new("Service #{service_def.name} received invalid #{SF::COLLISION_PROPERTY}" \
|
|
39
|
+
+ " value #{@collision_policy}")
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Store and validate destinations
|
|
43
|
+
replicate_to = @service_def.properties[SF::REPLICATE_TO]
|
|
44
|
+
if replicate_to.nil? || replicate_to.empty?
|
|
45
|
+
raise ArgumentError.new("Service #{service_def.name} must provide one or more replication destinations.")
|
|
46
|
+
end
|
|
47
|
+
replicate_to = [replicate_to] if replicate_to.is_a?(String)
|
|
48
|
+
|
|
49
|
+
loc_manager = app_manager.location_manager
|
|
50
|
+
# Build list of destinations, translating to storage locations when relevant
|
|
51
|
+
@destinations = Array.new
|
|
52
|
+
replicate_to.each do |dest|
|
|
53
|
+
if loc_manager.locations.key?(dest)
|
|
54
|
+
location = loc_manager.locations[dest]
|
|
55
|
+
if location.type != ST::S3_STORAGE_TYPE
|
|
56
|
+
raise ArgumentError.new(
|
|
57
|
+
"Service #{service_def.name} specifies destination #{dest} which is not of type 's3'")
|
|
58
|
+
end
|
|
59
|
+
@destinations << loc_manager.locations[dest]
|
|
60
|
+
else
|
|
61
|
+
raise ArgumentError.new("Service #{service_def.name} specifies unknown storage location '#{dest}'" \
|
|
62
|
+
+ " as a replication destination")
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# During a replication event, perform replication of the specified file to all configured destinations
|
|
68
|
+
# as necessary.
|
|
69
|
+
#
|
|
70
|
+
# @param file_rec [FileRecord] record representing the file to perform the service on.
|
|
71
|
+
# @param event [String] name of the event this service is being invoked by.
|
|
72
|
+
# @raise [PreservationServiceError] if the rsync replication fails
|
|
73
|
+
def perform(file_rec, event)
|
|
74
|
+
if file_rec.storage_location.type == ST::FILESYSTEM_STORAGE_TYPE
|
|
75
|
+
replicate_from_fs(file_rec)
|
|
76
|
+
else
|
|
77
|
+
raise PreservationServiceError.new("Replication from storage location of type " \
|
|
78
|
+
+ "#{file_rec.storage_location.type} to s3 is not supported")
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def replicate_from_fs(file_rec)
|
|
83
|
+
# Determine the path to the file being replicated relative to its storage location
|
|
84
|
+
rel_path = file_rec.storage_location.relativize(file_rec.path)
|
|
85
|
+
|
|
86
|
+
@destinations.each do |destination|
|
|
87
|
+
# Check that the destination is available before attempting to write
|
|
88
|
+
verify_destination_available(destination, file_rec)
|
|
89
|
+
|
|
90
|
+
rel_to_bucket = destination.relative_to_bucket_path(rel_path)
|
|
91
|
+
file_obj = destination.s3_bucket.object(rel_to_bucket)
|
|
92
|
+
begin
|
|
93
|
+
file_obj.upload_file(file_rec.physical_path)
|
|
94
|
+
rescue Aws::S3::Errors::BadDigest => e
|
|
95
|
+
raise ChecksumMismatchError.new("Transfer to bucket '#{destination.s3_bucket.name}' failed, " \
|
|
96
|
+
+ "MD5 provided did not match the received content for #{file_rec.path}")
|
|
97
|
+
rescue Aws::Errors::ServiceError => e
|
|
98
|
+
raise PreservationServiceError.new("Failed to transfer #{file_rec.path} to bucket " \
|
|
99
|
+
+ "'#{destination.s3_bucket.name}': #{e.message}")
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
logger.info("Replicated #{file_rec.path} to destination #{file_obj.public_url}")
|
|
103
|
+
|
|
104
|
+
# TODO register file in destination
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Determine if this service is applicable for the provided event, given the configured service definition
|
|
109
|
+
#
|
|
110
|
+
# @param event [String] name of the event
|
|
111
|
+
# @return [Boolean] returns true if this service is applicable for the provided event
|
|
112
|
+
def is_applicable?(event)
|
|
113
|
+
case event
|
|
114
|
+
when EventNames::PRESERVE
|
|
115
|
+
true
|
|
116
|
+
else
|
|
117
|
+
false
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
private
|
|
122
|
+
def verify_destination_available(destination, file_rec)
|
|
123
|
+
begin
|
|
124
|
+
destination.available?
|
|
125
|
+
rescue StorageLocationUnavailableError => e
|
|
126
|
+
raise StorageLocationUnavailableError.new("Cannot replicate #{file_rec.path} to destination #{destination.name}: " \
|
|
127
|
+
+ e.message)
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
@@ -1,46 +1,103 @@
|
|
|
1
1
|
require 'longleaf/services/application_config_validator'
|
|
2
2
|
require 'longleaf/services/application_config_manager'
|
|
3
|
+
require 'digest/md5'
|
|
4
|
+
require 'pathname'
|
|
3
5
|
|
|
4
|
-
# Deserializer for application configuration files
|
|
5
6
|
module Longleaf
|
|
7
|
+
# Deserializer for application configuration files
|
|
6
8
|
class ApplicationConfigDeserializer
|
|
7
|
-
|
|
9
|
+
AF ||= Longleaf::AppFields
|
|
10
|
+
|
|
8
11
|
# Deserializes a valid application configuration file as a ApplicationConfigManager option
|
|
9
|
-
# @param config_path [String] file path to the
|
|
12
|
+
# @param config_path [String] file path to the service and storage mapping configuration file
|
|
10
13
|
# @param format [String] encoding format of the config file
|
|
11
|
-
# return [
|
|
14
|
+
# return [ApplicationConfigManager] manager for the loaded configuration
|
|
12
15
|
def self.deserialize(config_path, format: 'yaml')
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
16
|
+
content = load_config_file(config_path)
|
|
17
|
+
config = load(content, format)
|
|
18
|
+
|
|
19
|
+
config_md5 = Digest::MD5.hexdigest(content)
|
|
20
|
+
|
|
21
|
+
make_paths_absolute(config_path, config)
|
|
22
|
+
|
|
23
|
+
ApplicationConfigValidator.new(config).validate_config.raise_if_invalid
|
|
24
|
+
ApplicationConfigManager.new(config, config_md5)
|
|
17
25
|
end
|
|
18
|
-
|
|
26
|
+
|
|
27
|
+
def self.load_config_file(config_path)
|
|
28
|
+
begin
|
|
29
|
+
File.read(config_path)
|
|
30
|
+
rescue Errno::ENOENT
|
|
31
|
+
raise Longleaf::ConfigurationError.new(
|
|
32
|
+
"Configuration file #{config_path} does not exist.")
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
19
36
|
# Deserialize a configuration file into a hash
|
|
20
|
-
# @param
|
|
37
|
+
# @param content [String] the contents of the application configuration file
|
|
21
38
|
# @param format [String] encoding format of the config file
|
|
22
39
|
# return [Hash] hash containing the configuration
|
|
23
|
-
def self.load(
|
|
40
|
+
def self.load(content, format)
|
|
24
41
|
case format
|
|
25
42
|
when 'yaml'
|
|
26
|
-
from_yaml(
|
|
43
|
+
from_yaml(content)
|
|
27
44
|
else
|
|
28
|
-
raise ArgumentError.new(
|
|
45
|
+
raise ArgumentError.new("Invalid deserialization format #{format} specified")
|
|
29
46
|
end
|
|
30
47
|
end
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def self.from_yaml(config_path)
|
|
48
|
+
|
|
49
|
+
def self.from_yaml(content)
|
|
34
50
|
begin
|
|
35
|
-
YAML.
|
|
36
|
-
rescue Errno::ENOENT => err
|
|
37
|
-
raise Longleaf::ConfigurationError.new(
|
|
38
|
-
"Cannot load application configuration, file #{config_path} does not exist.")
|
|
51
|
+
YAML.safe_load(content, [], [], true)
|
|
39
52
|
rescue => err
|
|
40
|
-
raise Longleaf::ConfigurationError.new(
|
|
41
|
-
%Q(Failed to load application configuration due to the following reason:
|
|
42
|
-
#{err.message}))
|
|
53
|
+
raise Longleaf::ConfigurationError.new(err)
|
|
43
54
|
end
|
|
44
55
|
end
|
|
56
|
+
|
|
57
|
+
def self.make_paths_absolute(config_path, config)
|
|
58
|
+
base_pathname = Pathname.new(config_path).expand_path.parent
|
|
59
|
+
|
|
60
|
+
config[AF::LOCATIONS].each do |name, properties|
|
|
61
|
+
properties[AF::LOCATION_PATH] = make_file_paths_absolute(base_pathname, properties)
|
|
62
|
+
|
|
63
|
+
# Resolve single field metadata location into expanded form
|
|
64
|
+
md_config = properties[AF::METADATA_CONFIG]
|
|
65
|
+
if md_config.nil?
|
|
66
|
+
next
|
|
67
|
+
end
|
|
68
|
+
if md_config.is_a?(String)
|
|
69
|
+
md_config = { AF::LOCATION => m_config }
|
|
70
|
+
end
|
|
71
|
+
md_config[AF::LOCATION_PATH] = make_file_paths_absolute(base_pathname, md_config)
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def self.make_file_paths_absolute(base_pathname, properties)
|
|
76
|
+
path = properties[AF::LOCATION_PATH]
|
|
77
|
+
return nil if path.nil?
|
|
78
|
+
|
|
79
|
+
uri = URI(path)
|
|
80
|
+
|
|
81
|
+
if uri.scheme.nil? || uri.scheme.casecmp("file") == 0
|
|
82
|
+
absolution(base_pathname, uri.path)
|
|
83
|
+
else
|
|
84
|
+
path
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def self.absolution(base_pathname, file_path)
|
|
89
|
+
if file_path.nil?
|
|
90
|
+
nil
|
|
91
|
+
else
|
|
92
|
+
path = Pathname.new(file_path)
|
|
93
|
+
if path.absolute?
|
|
94
|
+
path = path.expand_path.to_s
|
|
95
|
+
else
|
|
96
|
+
path = (base_pathname + path).to_s
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
private_class_method :load_config_file
|
|
45
102
|
end
|
|
46
|
-
end
|
|
103
|
+
end
|
|
@@ -5,20 +5,34 @@ require_relative 'service_definition_manager'
|
|
|
5
5
|
require_relative 'service_mapping_validator'
|
|
6
6
|
require_relative 'service_mapping_manager'
|
|
7
7
|
require_relative 'service_manager'
|
|
8
|
+
require_relative 'metadata_persistence_manager'
|
|
9
|
+
require 'longleaf/indexing/index_manager'
|
|
10
|
+
require 'longleaf/models/app_fields'
|
|
8
11
|
|
|
9
|
-
# Manager which loads and provides access to the configuration of the application
|
|
10
12
|
module Longleaf
|
|
13
|
+
# Manager which loads and provides access to the configuration of the application
|
|
11
14
|
class ApplicationConfigManager
|
|
15
|
+
attr_reader :config_md5
|
|
12
16
|
attr_reader :service_manager
|
|
13
17
|
attr_reader :location_manager
|
|
14
|
-
|
|
15
|
-
|
|
18
|
+
attr_reader :index_manager
|
|
19
|
+
attr_reader :md_manager
|
|
20
|
+
|
|
21
|
+
def initialize(config, config_md5 = nil)
|
|
22
|
+
@config_md5 = config_md5
|
|
23
|
+
|
|
16
24
|
@location_manager = Longleaf::StorageLocationManager.new(config)
|
|
17
|
-
|
|
25
|
+
|
|
18
26
|
definition_manager = Longleaf::ServiceDefinitionManager.new(config)
|
|
19
27
|
mapping_manager = Longleaf::ServiceMappingManager.new(config)
|
|
20
28
|
@service_manager = Longleaf::ServiceManager.new(
|
|
21
|
-
definition_manager: definition_manager,
|
|
29
|
+
definition_manager: definition_manager,
|
|
30
|
+
mapping_manager: mapping_manager,
|
|
31
|
+
app_manager: self)
|
|
32
|
+
|
|
33
|
+
sys_config = config[AppFields::SYSTEM]
|
|
34
|
+
@index_manager = IndexManager.new(sys_config, self)
|
|
35
|
+
@md_manager = MetadataPersistenceManager.new(@index_manager)
|
|
22
36
|
end
|
|
23
37
|
end
|
|
24
|
-
end
|
|
38
|
+
end
|
|
@@ -2,17 +2,27 @@ require_relative 'storage_location_validator'
|
|
|
2
2
|
require_relative 'service_definition_validator'
|
|
3
3
|
require_relative 'service_mapping_validator'
|
|
4
4
|
|
|
5
|
-
# Validator for Longleaf application configuration
|
|
6
5
|
module Longleaf
|
|
7
|
-
|
|
8
|
-
|
|
6
|
+
# Validator for Longleaf application configuration
|
|
7
|
+
class ApplicationConfigValidator < ConfigurationValidator
|
|
8
|
+
# @param config [Hash] hash containing the application configuration
|
|
9
|
+
def initialize(config)
|
|
10
|
+
super(config)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
protected
|
|
9
14
|
# Validates the application configuration provided. Will raise ConfigurationError
|
|
10
15
|
# if any portion of the configuration is not syntactically or semantically valid.
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
+
def validate
|
|
17
|
+
loc_result = StorageLocationValidator.new(@config).validate_config
|
|
18
|
+
defs_result = ServiceDefinitionValidator.new(@config).validate_config
|
|
19
|
+
mapping_result = ServiceMappingValidator.new(@config).validate_config
|
|
20
|
+
|
|
21
|
+
@result.errors.concat(loc_result.errors) unless loc_result.valid?
|
|
22
|
+
@result.errors.concat(defs_result.errors) unless defs_result.valid?
|
|
23
|
+
@result.errors.concat(mapping_result.errors) unless mapping_result.valid?
|
|
24
|
+
|
|
25
|
+
@result
|
|
16
26
|
end
|
|
17
27
|
end
|
|
18
|
-
end
|
|
28
|
+
end
|
|
@@ -1,8 +1,71 @@
|
|
|
1
1
|
module Longleaf
|
|
2
|
+
# Abstract configuration validator class
|
|
2
3
|
class ConfigurationValidator
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
4
|
+
attr_reader :result
|
|
5
|
+
|
|
6
|
+
def initialize(config)
|
|
7
|
+
@result = ConfigurationValidationResult.new
|
|
8
|
+
@config = config
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# Verify that the provided configuration is valid
|
|
12
|
+
# @return [ConfigurationValidationResult] the result of the validation
|
|
13
|
+
def validate_config
|
|
14
|
+
register_on_failure { validate }
|
|
15
|
+
|
|
16
|
+
@result
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Asserts that the given conditional is true, raising a ConfigurationError if it is not.
|
|
20
|
+
def assert(fail_message, assertion_passed)
|
|
21
|
+
fail(fail_message) unless assertion_passed
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Indicate that validation has failed, throwing a Configuration error with the given message
|
|
25
|
+
def fail(fail_message)
|
|
26
|
+
raise ConfigurationError.new(fail_message)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Registers an error to the result for this validator
|
|
30
|
+
def register_error(error)
|
|
31
|
+
if error.is_a?(StandardError)
|
|
32
|
+
@result.register_error(error.msg)
|
|
33
|
+
else
|
|
34
|
+
@result.register_error(error)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Performs the provided block. If the block produces a ConfigurationError, the error
|
|
39
|
+
# is swallowed and registered to the result
|
|
40
|
+
def register_on_failure
|
|
41
|
+
begin
|
|
42
|
+
yield
|
|
43
|
+
rescue ConfigurationError => err
|
|
44
|
+
register_error(err.message)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
class ConfigurationValidationResult
|
|
50
|
+
attr_reader :errors
|
|
51
|
+
|
|
52
|
+
def initialize
|
|
53
|
+
@errors = Array.new
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Register an error with this validation result
|
|
57
|
+
def register_error(error_message)
|
|
58
|
+
@errors << error_message
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# @return [boolean] true if validation produced not errors
|
|
62
|
+
def valid?
|
|
63
|
+
@errors.length == 0
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# @raise [ConfigurationError] if the result is not valid, which lists all failures
|
|
67
|
+
def raise_if_invalid
|
|
68
|
+
raise ConfigurationError.new(@errors.join("\n")) unless valid?
|
|
6
69
|
end
|
|
7
70
|
end
|
|
8
|
-
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
require 'pathname'
|
|
2
|
+
require 'longleaf/errors'
|
|
3
|
+
|
|
4
|
+
module Longleaf
|
|
5
|
+
# Validates the configuration of a filesystem based location
|
|
6
|
+
class FilesystemLocationValidator
|
|
7
|
+
|
|
8
|
+
def self.validate(p_validator, name, path_prop, section_name, path)
|
|
9
|
+
base_msg = "Storage location '#{name}' specifies invalid #{section_name} '#{path_prop}' property: "
|
|
10
|
+
p_validator.assert(base_msg + 'Path must not be empty', !path.nil? && !path.to_s.strip.empty?)
|
|
11
|
+
p_validator.assert(base_msg + 'Path must not contain any relative modifiers (/..)', !path.include?('/..'))
|
|
12
|
+
p_validator.assert(base_msg + 'Path must be absolute', Pathname.new(path).absolute?)
|
|
13
|
+
p_validator.assert(base_msg + 'Path does not exist', Dir.exist?(path))
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -1,29 +1,30 @@
|
|
|
1
1
|
require 'yaml'
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
2
|
+
require 'longleaf/models/metadata_record'
|
|
3
|
+
require 'longleaf/models/md_fields'
|
|
4
|
+
require 'longleaf/services/metadata_validator'
|
|
5
|
+
require 'longleaf/errors'
|
|
6
|
+
require 'longleaf/logging'
|
|
5
7
|
|
|
6
|
-
# Service which deserializes metadata files into MetadataRecord objects
|
|
7
8
|
module Longleaf
|
|
9
|
+
# Service which deserializes metadata files into MetadataRecord objects
|
|
8
10
|
class MetadataDeserializer
|
|
9
|
-
|
|
10
|
-
|
|
11
|
+
extend Longleaf::Logging
|
|
12
|
+
MDF ||= MDFields
|
|
13
|
+
|
|
11
14
|
# Deserialize a file into a MetadataRecord object
|
|
12
15
|
#
|
|
13
16
|
# @param file_path [String] path of the file to read. Required.
|
|
14
17
|
# @param format [String] format the file is stored in. Default is 'yaml'.
|
|
15
|
-
def self.deserialize(file_path:, format: 'yaml')
|
|
18
|
+
def self.deserialize(file_path:, format: 'yaml', digest_algs: [])
|
|
19
|
+
file_path = file_path.path if file_path.is_a?(File)
|
|
20
|
+
|
|
16
21
|
case format
|
|
17
22
|
when 'yaml'
|
|
18
|
-
md = from_yaml(file_path)
|
|
23
|
+
md = from_yaml(file_path, digest_algs)
|
|
19
24
|
else
|
|
20
|
-
raise ArgumentError.new(
|
|
25
|
+
raise ArgumentError.new("Invalid deserialization format #{format} specified")
|
|
21
26
|
end
|
|
22
|
-
|
|
23
|
-
if !md || !md.key?(MDF::DATA) || !md.key?(MDF::SERVICES)
|
|
24
|
-
raise Longleaf::MetadataError.new("Invalid metadata file, did not contain data or services fields: #{file_path}")
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
+
|
|
27
28
|
data = Hash.new.merge(md[MDF::DATA])
|
|
28
29
|
# Extract reserved properties for submission as separate parameters
|
|
29
30
|
registered = data.delete(MDFields::REGISTERED_TIMESTAMP)
|
|
@@ -31,38 +32,110 @@ module Longleaf
|
|
|
31
32
|
checksums = data.delete(MDFields::CHECKSUMS)
|
|
32
33
|
file_size = data.delete(MDFields::FILE_SIZE)
|
|
33
34
|
last_modified = data.delete(MDFields::LAST_MODIFIED)
|
|
34
|
-
|
|
35
|
+
physical_path = data.delete(MDFields::PHYSICAL_PATH)
|
|
36
|
+
|
|
35
37
|
services = md[MDF::SERVICES]
|
|
36
38
|
service_records = Hash.new
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
run_needed: run_needed)
|
|
52
|
-
end
|
|
39
|
+
services&.each do |name, props|
|
|
40
|
+
raise MetadataError.new("Value of service #{name} must be a hash") unless props.class == Hash
|
|
41
|
+
|
|
42
|
+
service_props = Hash.new.merge(props)
|
|
43
|
+
|
|
44
|
+
stale_replicas = service_props.delete(MDFields::STALE_REPLICAS)
|
|
45
|
+
timestamp = service_props.delete(MDFields::SERVICE_TIMESTAMP)
|
|
46
|
+
run_needed = service_props.delete(MDFields::RUN_NEEDED)
|
|
47
|
+
|
|
48
|
+
service_records[name] = ServiceRecord.new(
|
|
49
|
+
properties: service_props,
|
|
50
|
+
stale_replicas: stale_replicas,
|
|
51
|
+
timestamp: timestamp,
|
|
52
|
+
run_needed: run_needed)
|
|
53
53
|
end
|
|
54
|
-
|
|
54
|
+
|
|
55
55
|
MetadataRecord.new(properties: data,
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
56
|
+
services: service_records,
|
|
57
|
+
registered: registered,
|
|
58
|
+
deregistered: deregistered,
|
|
59
|
+
checksums: checksums,
|
|
60
|
+
file_size: file_size,
|
|
61
|
+
last_modified: last_modified,
|
|
62
|
+
physical_path: physical_path)
|
|
62
63
|
end
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
64
|
+
|
|
65
|
+
# Load configuration a yaml encoded configuration file
|
|
66
|
+
def self.from_yaml(file_path, digest_algs)
|
|
67
|
+
File.open(file_path, 'r:bom|utf-8') do |f|
|
|
68
|
+
contents = f.read
|
|
69
|
+
|
|
70
|
+
checksum_error = nil
|
|
71
|
+
begin
|
|
72
|
+
verify_digests(file_path, contents, digest_algs)
|
|
73
|
+
rescue ChecksumMismatchError => err
|
|
74
|
+
# Hold onto the checksum error, in case we can identify the underlying cause
|
|
75
|
+
checksum_error = err
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
begin
|
|
79
|
+
md = nil
|
|
80
|
+
begin
|
|
81
|
+
md = YAML.safe_load(contents, [], [], true)
|
|
82
|
+
rescue => err
|
|
83
|
+
raise MetadataError.new("Failed to parse metadata file #{file_path}: #{err.message}")
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
validation_result = MetadataValidator.new(md).validate_config
|
|
87
|
+
if !validation_result.valid?
|
|
88
|
+
if checksum_error.nil?
|
|
89
|
+
raise MetadataError.new("Invalid metadata file #{file_path.to_s}:\n#{validation_result.errors.join("\n")}")
|
|
90
|
+
else
|
|
91
|
+
raise MetadataError.new(validation_result.errors.join("\n"))
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Either return the valid metadata, or raise the checksum error as is
|
|
96
|
+
if checksum_error.nil?
|
|
97
|
+
md
|
|
98
|
+
else
|
|
99
|
+
raise checksum_error
|
|
100
|
+
end
|
|
101
|
+
rescue MetadataError => err
|
|
102
|
+
if checksum_error.nil?
|
|
103
|
+
raise err
|
|
104
|
+
else
|
|
105
|
+
# Add underlying cause from the metadata error to the checksum mismatch error
|
|
106
|
+
msg = checksum_error.message + "\nWith related issue(s):\n#{err.message}"
|
|
107
|
+
raise ChecksumMismatchError.new(msg)
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def self.verify_digests(file_path, contents, digest_algs)
|
|
114
|
+
return if digest_algs.nil? || digest_algs.empty?
|
|
115
|
+
|
|
116
|
+
digest_algs.each do |alg|
|
|
117
|
+
if file_path.respond_to?(:path)
|
|
118
|
+
path = file_path.path
|
|
119
|
+
else
|
|
120
|
+
path = file_path
|
|
121
|
+
end
|
|
122
|
+
digest_path = "#{path}.#{alg}"
|
|
123
|
+
unless File.exist?(digest_path)
|
|
124
|
+
logger.warn("Missing expected #{alg} digest for #{path}")
|
|
125
|
+
next
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
digest = DigestHelper::start_digest(alg)
|
|
129
|
+
result = digest.hexdigest(contents)
|
|
130
|
+
existing_digest = IO.read(digest_path)
|
|
131
|
+
|
|
132
|
+
if result == existing_digest
|
|
133
|
+
logger.info("Metadata fixity check using algorithm '#{alg}' succeeded for file #{path}")
|
|
134
|
+
else
|
|
135
|
+
raise ChecksumMismatchError.new("Metadata digest of type #{alg} did not match the contents of #{path}:" \
|
|
136
|
+
+ " expected #{existing_digest}, calculated #{result}")
|
|
137
|
+
end
|
|
138
|
+
end
|
|
66
139
|
end
|
|
67
140
|
end
|
|
68
|
-
end
|
|
141
|
+
end
|