longleaf 0.1.0.pre.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +94 -0
- data/.editorconfig +13 -0
- data/.gitignore +4 -1
- data/.rubocop.yml +44 -0
- data/.rubocop_todo.yml +834 -0
- data/.yardopts +1 -0
- data/Gemfile +16 -1
- data/README.md +98 -12
- data/Rakefile +6 -0
- data/bin/setup +16 -1
- data/docs/aboutlongleaf.md +28 -0
- data/docs/extra.css +32 -0
- data/docs/img/change-file.png +0 -0
- data/docs/img/ll-example-preserved.png +0 -0
- data/docs/index.md +19 -0
- data/docs/install.md +66 -0
- data/docs/ll-example/config-example-relative.yml +33 -0
- data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
- data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
- data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
- data/docs/ll-example/metadata-dir/.gitkeep +0 -0
- data/docs/ll-example/replica-files/.gitkeep +0 -0
- data/docs/ll-example/replica-metadata/.gitkeep +0 -0
- data/docs/quickstart.md +270 -0
- data/docs/rdocs/Longleaf.html +135 -0
- data/docs/rdocs/Longleaf/AppFields.html +178 -0
- data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
- data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
- data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
- data/docs/rdocs/Longleaf/CLI.html +909 -0
- data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
- data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
- data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
- data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
- data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
- data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
- data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
- data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
- data/docs/rdocs/Longleaf/EventError.html +147 -0
- data/docs/rdocs/Longleaf/EventNames.html +163 -0
- data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
- data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
- data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
- data/docs/rdocs/Longleaf/FileRecord.html +716 -0
- data/docs/rdocs/Longleaf/FileSelector.html +901 -0
- data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
- data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
- data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
- data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
- data/docs/rdocs/Longleaf/Logging.html +405 -0
- data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
- data/docs/rdocs/Longleaf/LongleafError.html +139 -0
- data/docs/rdocs/Longleaf/MDFields.html +193 -0
- data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
- data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
- data/docs/rdocs/Longleaf/MetadataError.html +143 -0
- data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
- data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
- data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
- data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
- data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
- data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
- data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
- data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
- data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
- data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
- data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
- data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
- data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
- data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
- data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
- data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
- data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
- data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
- data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
- data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
- data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
- data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
- data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
- data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
- data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
- data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
- data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
- data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
- data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
- data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
- data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
- data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
- data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
- data/docs/rdocs/_index.html +660 -0
- data/docs/rdocs/class_list.html +51 -0
- data/docs/rdocs/css/common.css +1 -0
- data/docs/rdocs/css/full_list.css +58 -0
- data/docs/rdocs/css/style.css +496 -0
- data/docs/rdocs/file.README.html +165 -0
- data/docs/rdocs/file_list.html +56 -0
- data/docs/rdocs/frames.html +17 -0
- data/docs/rdocs/index.html +165 -0
- data/docs/rdocs/js/app.js +303 -0
- data/docs/rdocs/js/full_list.js +216 -0
- data/docs/rdocs/js/jquery.js +4 -0
- data/docs/rdocs/method_list.html +2051 -0
- data/docs/rdocs/top-level-namespace.html +110 -0
- data/lib/longleaf/candidates/file_selector.rb +139 -0
- data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
- data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
- data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
- data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
- data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
- data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
- data/lib/longleaf/cli.rb +237 -46
- data/lib/longleaf/commands/deregister_command.rb +51 -0
- data/lib/longleaf/commands/preserve_command.rb +50 -0
- data/lib/longleaf/commands/register_command.rb +32 -43
- data/lib/longleaf/commands/reindex_command.rb +92 -0
- data/lib/longleaf/commands/validate_config_command.rb +33 -8
- data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
- data/lib/longleaf/errors.rb +26 -7
- data/lib/longleaf/events/deregister_event.rb +53 -0
- data/lib/longleaf/events/event_names.rb +9 -0
- data/lib/longleaf/events/event_status_tracking.rb +59 -0
- data/lib/longleaf/events/preserve_event.rb +81 -0
- data/lib/longleaf/events/register_event.rb +52 -51
- data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
- data/lib/longleaf/helpers/digest_helper.rb +56 -0
- data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
- data/lib/longleaf/helpers/selection_options_parser.rb +189 -0
- data/lib/longleaf/helpers/service_date_helper.rb +78 -0
- data/lib/longleaf/indexing/index_manager.rb +101 -0
- data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
- data/lib/longleaf/logging.rb +5 -4
- data/lib/longleaf/logging/redirecting_logger.rb +26 -25
- data/lib/longleaf/models/app_fields.rb +7 -2
- data/lib/longleaf/models/file_record.rb +17 -8
- data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
- data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
- data/lib/longleaf/models/md_fields.rb +2 -1
- data/lib/longleaf/models/metadata_location.rb +47 -0
- data/lib/longleaf/models/metadata_record.rb +39 -15
- data/lib/longleaf/models/s3_storage_location.rb +133 -0
- data/lib/longleaf/models/service_definition.rb +7 -6
- data/lib/longleaf/models/service_fields.rb +7 -1
- data/lib/longleaf/models/service_record.rb +10 -6
- data/lib/longleaf/models/storage_location.rb +24 -19
- data/lib/longleaf/models/storage_types.rb +9 -0
- data/lib/longleaf/models/system_config_fields.rb +9 -0
- data/lib/longleaf/preservation_services/file_check_service.rb +58 -0
- data/lib/longleaf/preservation_services/fixity_check_service.rb +123 -0
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +182 -0
- data/lib/longleaf/preservation_services/s3_replication_service.rb +143 -0
- data/lib/longleaf/services/application_config_deserializer.rb +81 -24
- data/lib/longleaf/services/application_config_manager.rb +20 -6
- data/lib/longleaf/services/application_config_validator.rb +19 -9
- data/lib/longleaf/services/configuration_validator.rb +67 -4
- data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
- data/lib/longleaf/services/metadata_deserializer.rb +113 -42
- data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
- data/lib/longleaf/services/metadata_serializer.rb +138 -25
- data/lib/longleaf/services/metadata_validator.rb +76 -0
- data/lib/longleaf/services/s3_location_validator.rb +19 -0
- data/lib/longleaf/services/service_class_cache.rb +112 -0
- data/lib/longleaf/services/service_definition_manager.rb +10 -7
- data/lib/longleaf/services/service_definition_validator.rb +25 -18
- data/lib/longleaf/services/service_manager.rb +86 -11
- data/lib/longleaf/services/service_mapping_manager.rb +13 -12
- data/lib/longleaf/services/service_mapping_validator.rb +36 -26
- data/lib/longleaf/services/storage_location_manager.rb +76 -15
- data/lib/longleaf/services/storage_location_validator.rb +49 -35
- data/lib/longleaf/specs/config_builder.rb +47 -23
- data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
- data/lib/longleaf/specs/custom_matchers.rb +9 -0
- data/lib/longleaf/specs/file_helpers.rb +61 -0
- data/lib/longleaf/specs/metadata_builder.rb +92 -0
- data/lib/longleaf/specs/system_config_builder.rb +27 -0
- data/lib/longleaf/version.rb +1 -1
- data/longleaf.gemspec +20 -7
- data/mkdocs.yml +21 -0
- metadata +306 -23
- data/.travis.yml +0 -4
- data/lib/longleaf/commands/abstract_command.rb +0 -37
- data/lib/longleaf/services/storage_path_validator.rb +0 -16
|
@@ -5,20 +5,34 @@ require_relative 'service_definition_manager'
|
|
|
5
5
|
require_relative 'service_mapping_validator'
|
|
6
6
|
require_relative 'service_mapping_manager'
|
|
7
7
|
require_relative 'service_manager'
|
|
8
|
+
require_relative 'metadata_persistence_manager'
|
|
9
|
+
require 'longleaf/indexing/index_manager'
|
|
10
|
+
require 'longleaf/models/app_fields'
|
|
8
11
|
|
|
9
|
-
# Manager which loads and provides access to the configuration of the application
|
|
10
12
|
module Longleaf
|
|
13
|
+
# Manager which loads and provides access to the configuration of the application
|
|
11
14
|
class ApplicationConfigManager
|
|
15
|
+
attr_reader :config_md5
|
|
12
16
|
attr_reader :service_manager
|
|
13
17
|
attr_reader :location_manager
|
|
14
|
-
|
|
15
|
-
|
|
18
|
+
attr_reader :index_manager
|
|
19
|
+
attr_reader :md_manager
|
|
20
|
+
|
|
21
|
+
def initialize(config, config_md5 = nil)
|
|
22
|
+
@config_md5 = config_md5
|
|
23
|
+
|
|
16
24
|
@location_manager = Longleaf::StorageLocationManager.new(config)
|
|
17
|
-
|
|
25
|
+
|
|
18
26
|
definition_manager = Longleaf::ServiceDefinitionManager.new(config)
|
|
19
27
|
mapping_manager = Longleaf::ServiceMappingManager.new(config)
|
|
20
28
|
@service_manager = Longleaf::ServiceManager.new(
|
|
21
|
-
definition_manager: definition_manager,
|
|
29
|
+
definition_manager: definition_manager,
|
|
30
|
+
mapping_manager: mapping_manager,
|
|
31
|
+
app_manager: self)
|
|
32
|
+
|
|
33
|
+
sys_config = config[AppFields::SYSTEM]
|
|
34
|
+
@index_manager = IndexManager.new(sys_config, self)
|
|
35
|
+
@md_manager = MetadataPersistenceManager.new(@index_manager)
|
|
22
36
|
end
|
|
23
37
|
end
|
|
24
|
-
end
|
|
38
|
+
end
|
|
@@ -2,17 +2,27 @@ require_relative 'storage_location_validator'
|
|
|
2
2
|
require_relative 'service_definition_validator'
|
|
3
3
|
require_relative 'service_mapping_validator'
|
|
4
4
|
|
|
5
|
-
# Validator for Longleaf application configuration
|
|
6
5
|
module Longleaf
|
|
7
|
-
|
|
8
|
-
|
|
6
|
+
# Validator for Longleaf application configuration
|
|
7
|
+
class ApplicationConfigValidator < ConfigurationValidator
|
|
8
|
+
# @param config [Hash] hash containing the application configuration
|
|
9
|
+
def initialize(config)
|
|
10
|
+
super(config)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
protected
|
|
9
14
|
# Validates the application configuration provided. Will raise ConfigurationError
|
|
10
15
|
# if any portion of the configuration is not syntactically or semantically valid.
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
+
def validate
|
|
17
|
+
loc_result = StorageLocationValidator.new(@config).validate_config
|
|
18
|
+
defs_result = ServiceDefinitionValidator.new(@config).validate_config
|
|
19
|
+
mapping_result = ServiceMappingValidator.new(@config).validate_config
|
|
20
|
+
|
|
21
|
+
@result.errors.concat(loc_result.errors) unless loc_result.valid?
|
|
22
|
+
@result.errors.concat(defs_result.errors) unless defs_result.valid?
|
|
23
|
+
@result.errors.concat(mapping_result.errors) unless mapping_result.valid?
|
|
24
|
+
|
|
25
|
+
@result
|
|
16
26
|
end
|
|
17
27
|
end
|
|
18
|
-
end
|
|
28
|
+
end
|
|
@@ -1,8 +1,71 @@
|
|
|
1
1
|
module Longleaf
|
|
2
|
+
# Abstract configuration validator class
|
|
2
3
|
class ConfigurationValidator
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
4
|
+
attr_reader :result
|
|
5
|
+
|
|
6
|
+
def initialize(config)
|
|
7
|
+
@result = ConfigurationValidationResult.new
|
|
8
|
+
@config = config
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# Verify that the provided configuration is valid
|
|
12
|
+
# @return [ConfigurationValidationResult] the result of the validation
|
|
13
|
+
def validate_config
|
|
14
|
+
register_on_failure { validate }
|
|
15
|
+
|
|
16
|
+
@result
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Asserts that the given conditional is true, raising a ConfigurationError if it is not.
|
|
20
|
+
def assert(fail_message, assertion_passed)
|
|
21
|
+
fail(fail_message) unless assertion_passed
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Indicate that validation has failed, throwing a Configuration error with the given message
|
|
25
|
+
def fail(fail_message)
|
|
26
|
+
raise ConfigurationError.new(fail_message)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Registers an error to the result for this validator
|
|
30
|
+
def register_error(error)
|
|
31
|
+
if error.is_a?(StandardError)
|
|
32
|
+
@result.register_error(error.msg)
|
|
33
|
+
else
|
|
34
|
+
@result.register_error(error)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Performs the provided block. If the block produces a ConfigurationError, the error
|
|
39
|
+
# is swallowed and registered to the result
|
|
40
|
+
def register_on_failure
|
|
41
|
+
begin
|
|
42
|
+
yield
|
|
43
|
+
rescue ConfigurationError => err
|
|
44
|
+
register_error(err.message)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
class ConfigurationValidationResult
|
|
50
|
+
attr_reader :errors
|
|
51
|
+
|
|
52
|
+
def initialize
|
|
53
|
+
@errors = Array.new
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Register an error with this validation result
|
|
57
|
+
def register_error(error_message)
|
|
58
|
+
@errors << error_message
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# @return [boolean] true if validation produced not errors
|
|
62
|
+
def valid?
|
|
63
|
+
@errors.length == 0
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# @raise [ConfigurationError] if the result is not valid, which lists all failures
|
|
67
|
+
def raise_if_invalid
|
|
68
|
+
raise ConfigurationError.new(@errors.join("\n")) unless valid?
|
|
6
69
|
end
|
|
7
70
|
end
|
|
8
|
-
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
require 'pathname'
|
|
2
|
+
require 'longleaf/errors'
|
|
3
|
+
|
|
4
|
+
module Longleaf
|
|
5
|
+
# Validates the configuration of a filesystem based location
|
|
6
|
+
class FilesystemLocationValidator
|
|
7
|
+
|
|
8
|
+
def self.validate(p_validator, name, path_prop, section_name, path)
|
|
9
|
+
base_msg = "Storage location '#{name}' specifies invalid #{section_name} '#{path_prop}' property: "
|
|
10
|
+
p_validator.assert(base_msg + 'Path must not be empty', !path.nil? && !path.to_s.strip.empty?)
|
|
11
|
+
p_validator.assert(base_msg + 'Path must not contain any relative modifiers (/..)', !path.include?('/..'))
|
|
12
|
+
p_validator.assert(base_msg + 'Path must be absolute', Pathname.new(path).absolute?)
|
|
13
|
+
p_validator.assert(base_msg + 'Path does not exist', Dir.exist?(path))
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -1,29 +1,30 @@
|
|
|
1
1
|
require 'yaml'
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
2
|
+
require 'longleaf/models/metadata_record'
|
|
3
|
+
require 'longleaf/models/md_fields'
|
|
4
|
+
require 'longleaf/services/metadata_validator'
|
|
5
|
+
require 'longleaf/errors'
|
|
6
|
+
require 'longleaf/logging'
|
|
5
7
|
|
|
6
|
-
# Service which deserializes metadata files into MetadataRecord objects
|
|
7
8
|
module Longleaf
|
|
9
|
+
# Service which deserializes metadata files into MetadataRecord objects
|
|
8
10
|
class MetadataDeserializer
|
|
9
|
-
|
|
10
|
-
|
|
11
|
+
extend Longleaf::Logging
|
|
12
|
+
MDF ||= MDFields
|
|
13
|
+
|
|
11
14
|
# Deserialize a file into a MetadataRecord object
|
|
12
15
|
#
|
|
13
16
|
# @param file_path [String] path of the file to read. Required.
|
|
14
17
|
# @param format [String] format the file is stored in. Default is 'yaml'.
|
|
15
|
-
def self.deserialize(file_path:, format: 'yaml')
|
|
18
|
+
def self.deserialize(file_path:, format: 'yaml', digest_algs: [])
|
|
19
|
+
file_path = file_path.path if file_path.is_a?(File)
|
|
20
|
+
|
|
16
21
|
case format
|
|
17
22
|
when 'yaml'
|
|
18
|
-
md = from_yaml(file_path)
|
|
23
|
+
md = from_yaml(file_path, digest_algs)
|
|
19
24
|
else
|
|
20
|
-
raise ArgumentError.new(
|
|
25
|
+
raise ArgumentError.new("Invalid deserialization format #{format} specified")
|
|
21
26
|
end
|
|
22
|
-
|
|
23
|
-
if !md || !md.key?(MDF::DATA) || !md.key?(MDF::SERVICES)
|
|
24
|
-
raise Longleaf::MetadataError.new("Invalid metadata file, did not contain data or services fields: #{file_path}")
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
+
|
|
27
28
|
data = Hash.new.merge(md[MDF::DATA])
|
|
28
29
|
# Extract reserved properties for submission as separate parameters
|
|
29
30
|
registered = data.delete(MDFields::REGISTERED_TIMESTAMP)
|
|
@@ -31,38 +32,108 @@ module Longleaf
|
|
|
31
32
|
checksums = data.delete(MDFields::CHECKSUMS)
|
|
32
33
|
file_size = data.delete(MDFields::FILE_SIZE)
|
|
33
34
|
last_modified = data.delete(MDFields::LAST_MODIFIED)
|
|
34
|
-
|
|
35
|
+
|
|
35
36
|
services = md[MDF::SERVICES]
|
|
36
37
|
service_records = Hash.new
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
run_needed: run_needed)
|
|
52
|
-
end
|
|
38
|
+
services&.each do |name, props|
|
|
39
|
+
raise MetadataError.new("Value of service #{name} must be a hash") unless props.class == Hash
|
|
40
|
+
|
|
41
|
+
service_props = Hash.new.merge(props)
|
|
42
|
+
|
|
43
|
+
stale_replicas = service_props.delete(MDFields::STALE_REPLICAS)
|
|
44
|
+
timestamp = service_props.delete(MDFields::SERVICE_TIMESTAMP)
|
|
45
|
+
run_needed = service_props.delete(MDFields::RUN_NEEDED)
|
|
46
|
+
|
|
47
|
+
service_records[name] = ServiceRecord.new(
|
|
48
|
+
properties: service_props,
|
|
49
|
+
stale_replicas: stale_replicas,
|
|
50
|
+
timestamp: timestamp,
|
|
51
|
+
run_needed: run_needed)
|
|
53
52
|
end
|
|
54
|
-
|
|
53
|
+
|
|
55
54
|
MetadataRecord.new(properties: data,
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
55
|
+
services: service_records,
|
|
56
|
+
registered: registered,
|
|
57
|
+
deregistered: deregistered,
|
|
58
|
+
checksums: checksums,
|
|
59
|
+
file_size: file_size,
|
|
60
|
+
last_modified: last_modified)
|
|
62
61
|
end
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
62
|
+
|
|
63
|
+
# Load configuration a yaml encoded configuration file
|
|
64
|
+
def self.from_yaml(file_path, digest_algs)
|
|
65
|
+
File.open(file_path, 'r:bom|utf-8') do |f|
|
|
66
|
+
contents = f.read
|
|
67
|
+
|
|
68
|
+
checksum_error = nil
|
|
69
|
+
begin
|
|
70
|
+
verify_digests(file_path, contents, digest_algs)
|
|
71
|
+
rescue ChecksumMismatchError => err
|
|
72
|
+
# Hold onto the checksum error, in case we can identify the underlying cause
|
|
73
|
+
checksum_error = err
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
begin
|
|
77
|
+
md = nil
|
|
78
|
+
begin
|
|
79
|
+
md = YAML.safe_load(contents, [], [], true)
|
|
80
|
+
rescue => err
|
|
81
|
+
raise MetadataError.new("Failed to parse metadata file #{file_path}: #{err.message}")
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
validation_result = MetadataValidator.new(md).validate_config
|
|
85
|
+
if !validation_result.valid?
|
|
86
|
+
if checksum_error.nil?
|
|
87
|
+
raise MetadataError.new("Invalid metadata file #{file_path.to_s}:\n#{validation_result.errors.join("\n")}")
|
|
88
|
+
else
|
|
89
|
+
raise MetadataError.new(validation_result.errors.join("\n"))
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Either return the valid metadata, or raise the checksum error as is
|
|
94
|
+
if checksum_error.nil?
|
|
95
|
+
md
|
|
96
|
+
else
|
|
97
|
+
raise checksum_error
|
|
98
|
+
end
|
|
99
|
+
rescue MetadataError => err
|
|
100
|
+
if checksum_error.nil?
|
|
101
|
+
raise err
|
|
102
|
+
else
|
|
103
|
+
# Add underlying cause from the metadata error to the checksum mismatch error
|
|
104
|
+
msg = checksum_error.message + "\nWith related issue(s):\n#{err.message}"
|
|
105
|
+
raise ChecksumMismatchError.new(msg)
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def self.verify_digests(file_path, contents, digest_algs)
|
|
112
|
+
return if digest_algs.nil? || digest_algs.empty?
|
|
113
|
+
|
|
114
|
+
digest_algs.each do |alg|
|
|
115
|
+
if file_path.respond_to?(:path)
|
|
116
|
+
path = file_path.path
|
|
117
|
+
else
|
|
118
|
+
path = file_path
|
|
119
|
+
end
|
|
120
|
+
digest_path = "#{path}.#{alg}"
|
|
121
|
+
unless File.exist?(digest_path)
|
|
122
|
+
logger.warn("Missing expected #{alg} digest for #{path}")
|
|
123
|
+
next
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
digest = DigestHelper::start_digest(alg)
|
|
127
|
+
result = digest.hexdigest(contents)
|
|
128
|
+
existing_digest = IO.read(digest_path)
|
|
129
|
+
|
|
130
|
+
if result == existing_digest
|
|
131
|
+
logger.info("Metadata fixity check using algorithm '#{alg}' succeeded for file #{path}")
|
|
132
|
+
else
|
|
133
|
+
raise ChecksumMismatchError.new("Metadata digest of type #{alg} did not match the contents of #{path}:" \
|
|
134
|
+
+ " expected #{existing_digest}, calculated #{result}")
|
|
135
|
+
end
|
|
136
|
+
end
|
|
66
137
|
end
|
|
67
138
|
end
|
|
68
|
-
end
|
|
139
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
require 'longleaf/services/metadata_serializer'
|
|
2
|
+
require 'longleaf/services/metadata_deserializer'
|
|
3
|
+
require 'longleaf/errors'
|
|
4
|
+
|
|
5
|
+
module Longleaf
|
|
6
|
+
# Handles the persistence of metadata records
|
|
7
|
+
class MetadataPersistenceManager
|
|
8
|
+
# Initialize the MetadataPersistenceManager
|
|
9
|
+
# @param index_manager [IndexManager] system config manager
|
|
10
|
+
def initialize(index_manager)
|
|
11
|
+
@index_manager = index_manager
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Persist the metadata for the provided file record to all configured destinations.
|
|
15
|
+
# This may include to disk as well as to an index.
|
|
16
|
+
# @param file_rec [FileRecord] file record
|
|
17
|
+
def persist(file_rec)
|
|
18
|
+
if file_rec.metadata_record.nil?
|
|
19
|
+
raise MetadataError.new("No metadata record provided, cannot persist metadata for #{file_rec.path}")
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
MetadataSerializer::write(metadata: file_rec.metadata_record,
|
|
23
|
+
file_path: file_rec.metadata_path,
|
|
24
|
+
digest_algs: file_rec.storage_location.metadata_location.digests)
|
|
25
|
+
|
|
26
|
+
index(file_rec)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Index metadata for the provided file record
|
|
30
|
+
# @param file_rec [FileRecord] file record
|
|
31
|
+
def index(file_rec)
|
|
32
|
+
if @index_manager.using_index?
|
|
33
|
+
@index_manager.index(file_rec)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Load the metadata record for the provided file record
|
|
38
|
+
# @param file_rec [FileRecord] file record
|
|
39
|
+
# @return [MetadataRecord] the metadata record for the file record
|
|
40
|
+
def load(file_rec)
|
|
41
|
+
md_rec = MetadataDeserializer.deserialize(file_path: file_rec.metadata_path,
|
|
42
|
+
digest_algs: file_rec.storage_location.metadata_location.digests)
|
|
43
|
+
file_rec.metadata_record = md_rec
|
|
44
|
+
md_rec
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -1,76 +1,189 @@
|
|
|
1
1
|
require 'yaml'
|
|
2
2
|
require 'longleaf/models/metadata_record'
|
|
3
3
|
require 'longleaf/models/md_fields'
|
|
4
|
+
require 'longleaf/helpers/digest_helper'
|
|
5
|
+
require 'longleaf/errors'
|
|
6
|
+
require 'longleaf/logging'
|
|
4
7
|
require 'pathname'
|
|
8
|
+
require "tempfile"
|
|
5
9
|
|
|
6
|
-
# Service which serializes MetadataRecord objects
|
|
7
10
|
module Longleaf
|
|
11
|
+
# Service which serializes MetadataRecord objects
|
|
8
12
|
class MetadataSerializer
|
|
9
|
-
|
|
10
|
-
|
|
13
|
+
extend Longleaf::Logging
|
|
14
|
+
MDF ||= MDFields
|
|
15
|
+
|
|
11
16
|
# Serialize the contents of the provided metadata record to the specified path
|
|
12
17
|
#
|
|
13
18
|
# @param metadata [MetadataRecord] metadata record to serialize. Required.
|
|
14
19
|
# @param file_path [String] path to write the file to. Required.
|
|
15
20
|
# @param format [String] format to serialize the metadata in. Default is 'yaml'.
|
|
16
|
-
|
|
21
|
+
# @param digest_algs [Array] if provided, sidecar digest files for the metadata file
|
|
22
|
+
# will be generated for each algorithm.
|
|
23
|
+
def self.write(metadata:, file_path:, format: 'yaml', digest_algs: [])
|
|
17
24
|
raise ArgumentError.new('metadata parameter must be a MetadataRecord') \
|
|
18
|
-
unless metadata.class ==
|
|
19
|
-
|
|
25
|
+
unless metadata.class == MetadataRecord
|
|
26
|
+
|
|
20
27
|
case format
|
|
21
28
|
when 'yaml'
|
|
22
29
|
content = to_yaml(metadata)
|
|
23
30
|
else
|
|
24
|
-
raise ArgumentError.new(
|
|
31
|
+
raise ArgumentError.new("Invalid serialization format #{format} specified")
|
|
25
32
|
end
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
parent_dir = Pathname(file_path).parent
|
|
29
|
-
parent_dir.mkpath unless parent_dir.exist?
|
|
30
|
-
|
|
31
|
-
File.write(file_path, content)
|
|
33
|
+
|
|
34
|
+
atomic_write(file_path, content, digest_algs)
|
|
32
35
|
end
|
|
33
|
-
|
|
36
|
+
|
|
34
37
|
# @param metadata [MetadataRecord] metadata record to transform
|
|
35
38
|
# @return [String] a yaml representation of the provided MetadataRecord
|
|
36
39
|
def self.to_yaml(metadata)
|
|
37
40
|
props = to_hash(metadata)
|
|
38
41
|
props.to_yaml
|
|
39
42
|
end
|
|
40
|
-
|
|
43
|
+
|
|
44
|
+
# Create a hash representation of the given MetadataRecord file
|
|
45
|
+
# @param metadata [MetadataRecord] metadata record to transform into a hash
|
|
41
46
|
def self.to_hash(metadata)
|
|
42
47
|
props = Hash.new
|
|
43
|
-
|
|
48
|
+
|
|
44
49
|
data = Hash.new.merge(metadata.properties)
|
|
45
50
|
data[MDF::REGISTERED_TIMESTAMP] = metadata.registered if metadata.registered
|
|
46
51
|
data[MDF::DEREGISTERED_TIMESTAMP] = metadata.deregistered if metadata.deregistered
|
|
47
|
-
data[MDF::CHECKSUMS] = metadata.checksums unless metadata.checksums
|
|
52
|
+
data[MDF::CHECKSUMS] = metadata.checksums unless metadata.checksums && metadata.checksums.empty?
|
|
48
53
|
data[MDF::FILE_SIZE] = metadata.file_size unless metadata.file_size.nil?
|
|
49
54
|
data[MDF::LAST_MODIFIED] = metadata.last_modified if metadata.last_modified
|
|
50
|
-
|
|
55
|
+
|
|
51
56
|
props[MDF::DATA] = data
|
|
52
|
-
|
|
57
|
+
|
|
53
58
|
services = Hash.new
|
|
54
59
|
metadata.list_services.each do |name|
|
|
55
60
|
service = metadata.service(name)
|
|
56
61
|
service[MDF::STALE_REPLICAS] = service.stale_replicas if service.stale_replicas
|
|
57
62
|
service[MDF::SERVICE_TIMESTAMP] = service.timestamp unless service.timestamp.nil?
|
|
58
63
|
service[MDF::RUN_NEEDED] = service.run_needed if service.run_needed
|
|
59
|
-
services[name] = service.properties
|
|
64
|
+
services[name] = service.properties unless service.properties.empty?
|
|
60
65
|
end
|
|
61
|
-
|
|
66
|
+
|
|
62
67
|
props[MDF::SERVICES] = services
|
|
63
|
-
|
|
68
|
+
|
|
64
69
|
props
|
|
65
70
|
end
|
|
66
|
-
|
|
71
|
+
|
|
72
|
+
# @param format [String] encoding format used for metadata file
|
|
73
|
+
# @return [String] the suffix used to indicate that a file is a metadata file in the provided encoding
|
|
74
|
+
# @raise [ArgumentError] raised if the provided format is not a supported metadata encoding format
|
|
67
75
|
def self.metadata_suffix(format: 'yaml')
|
|
68
76
|
case format
|
|
69
77
|
when 'yaml'
|
|
70
78
|
'-llmd.yaml'
|
|
71
79
|
else
|
|
72
|
-
raise ArgumentError.new(
|
|
80
|
+
raise ArgumentError.new("Invalid serialization format #{format} specified")
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Safely writes the new metadata file and its digests.
|
|
85
|
+
# It does so by first writing the content and its digests to temp files,
|
|
86
|
+
# then making the temp files the current version of the file.
|
|
87
|
+
# Attempts to clean up new data in the case of failure.
|
|
88
|
+
def self.atomic_write(file_path, content, digest_algs)
|
|
89
|
+
# Fill in parent directories if they do not exist
|
|
90
|
+
parent_dir = Pathname(file_path).parent
|
|
91
|
+
parent_dir.mkpath unless parent_dir.exist?
|
|
92
|
+
|
|
93
|
+
file_path = file_path.path if file_path.respond_to?(:path)
|
|
94
|
+
|
|
95
|
+
# If file does not already exist, then simply write it
|
|
96
|
+
if !File.exist?(file_path)
|
|
97
|
+
File.write(file_path, content)
|
|
98
|
+
write_digests(file_path, content, digest_algs)
|
|
99
|
+
return
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Updating file, use safe atomic write
|
|
103
|
+
File.open(file_path) do |original_file|
|
|
104
|
+
original_file.flock(File::LOCK_EX)
|
|
105
|
+
|
|
106
|
+
base_name = File.basename(file_path)
|
|
107
|
+
Tempfile.open(base_name, parent_dir) do |temp_file|
|
|
108
|
+
begin
|
|
109
|
+
# Write content to temp file
|
|
110
|
+
temp_file.write(content)
|
|
111
|
+
temp_file.close
|
|
112
|
+
|
|
113
|
+
temp_path = temp_file.path
|
|
114
|
+
|
|
115
|
+
# Set permissions of new file to match old if it exists
|
|
116
|
+
old_stat = File.stat(file_path)
|
|
117
|
+
set_perms(temp_path, old_stat)
|
|
118
|
+
|
|
119
|
+
begin
|
|
120
|
+
digest_paths = write_digests(temp_path, content, digest_algs)
|
|
121
|
+
|
|
122
|
+
File.rename(temp_path, file_path)
|
|
123
|
+
rescue => e
|
|
124
|
+
cleanup_digests(temp_path)
|
|
125
|
+
raise e
|
|
126
|
+
end
|
|
127
|
+
rescue => e
|
|
128
|
+
temp_file.delete
|
|
129
|
+
raise e
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Cleanup all existing digest files, in case the set of algorithms has changed
|
|
133
|
+
cleanup_digests(file_path)
|
|
134
|
+
# Move new digests into place
|
|
135
|
+
digest_paths.each do |digest_path|
|
|
136
|
+
File.rename(digest_path, digest_path.sub(temp_path, file_path))
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def self.set_perms(file_path, stat_info)
|
|
143
|
+
if stat_info
|
|
144
|
+
# Set correct permissions on new file
|
|
145
|
+
begin
|
|
146
|
+
File.chown(stat_info.uid, stat_info.gid, file_path)
|
|
147
|
+
# This operation will affect filesystem ACL's
|
|
148
|
+
File.chmod(stat_info.mode, file_path)
|
|
149
|
+
rescue Errno::EPERM, Errno::EACCES
|
|
150
|
+
# Changing file ownership failed, moving on.
|
|
151
|
+
return false
|
|
152
|
+
end
|
|
73
153
|
end
|
|
154
|
+
true
|
|
74
155
|
end
|
|
156
|
+
|
|
157
|
+
# Deletes all known digest files for the provided file path
|
|
158
|
+
def self.cleanup_digests(file_path)
|
|
159
|
+
DigestHelper::KNOWN_DIGESTS.each do |alg|
|
|
160
|
+
digest_path = "#{file_path}.#{alg}"
|
|
161
|
+
File.delete(digest_path) if File.exist?(digest_path)
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def self.write_digests(file_path, content, digests)
|
|
166
|
+
return [] if digests.nil? || digests.empty?
|
|
167
|
+
|
|
168
|
+
digest_paths = Array.new
|
|
169
|
+
|
|
170
|
+
digests.each do |alg|
|
|
171
|
+
digest_class = DigestHelper::start_digest(alg)
|
|
172
|
+
result = digest_class.hexdigest(content)
|
|
173
|
+
digest_path = "#{file_path}.#{alg}"
|
|
174
|
+
|
|
175
|
+
File.write(digest_path, result)
|
|
176
|
+
|
|
177
|
+
digest_paths.push(digest_path)
|
|
178
|
+
|
|
179
|
+
self.logger.debug("Generated #{alg} digest for metadata file #{file_path}: #{digest_path} #{result}")
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
digest_paths
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
private_class_method :cleanup_digests
|
|
186
|
+
private_class_method :write_digests
|
|
187
|
+
private_class_method :atomic_write
|
|
75
188
|
end
|
|
76
|
-
end
|
|
189
|
+
end
|