longleaf 0.1.0.pre.3 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +94 -0
- data/.editorconfig +13 -0
- data/.gitignore +4 -1
- data/.rubocop.yml +44 -0
- data/.rubocop_todo.yml +834 -0
- data/.yardopts +1 -0
- data/Gemfile +16 -1
- data/README.md +98 -12
- data/Rakefile +6 -0
- data/bin/setup +16 -1
- data/docs/aboutlongleaf.md +28 -0
- data/docs/extra.css +32 -0
- data/docs/img/change-file.png +0 -0
- data/docs/img/ll-example-preserved.png +0 -0
- data/docs/index.md +19 -0
- data/docs/install.md +66 -0
- data/docs/ll-example/config-example-relative.yml +33 -0
- data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
- data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
- data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
- data/docs/ll-example/metadata-dir/.gitkeep +0 -0
- data/docs/ll-example/replica-files/.gitkeep +0 -0
- data/docs/ll-example/replica-metadata/.gitkeep +0 -0
- data/docs/quickstart.md +270 -0
- data/docs/rdocs/Longleaf.html +135 -0
- data/docs/rdocs/Longleaf/AppFields.html +178 -0
- data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
- data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
- data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
- data/docs/rdocs/Longleaf/CLI.html +909 -0
- data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
- data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
- data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
- data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
- data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
- data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
- data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
- data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
- data/docs/rdocs/Longleaf/EventError.html +147 -0
- data/docs/rdocs/Longleaf/EventNames.html +163 -0
- data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
- data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
- data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
- data/docs/rdocs/Longleaf/FileRecord.html +716 -0
- data/docs/rdocs/Longleaf/FileSelector.html +901 -0
- data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
- data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
- data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
- data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
- data/docs/rdocs/Longleaf/Logging.html +405 -0
- data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
- data/docs/rdocs/Longleaf/LongleafError.html +139 -0
- data/docs/rdocs/Longleaf/MDFields.html +193 -0
- data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
- data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
- data/docs/rdocs/Longleaf/MetadataError.html +143 -0
- data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
- data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
- data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
- data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
- data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
- data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
- data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
- data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
- data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
- data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
- data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
- data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
- data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
- data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
- data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
- data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
- data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
- data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
- data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
- data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
- data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
- data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
- data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
- data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
- data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
- data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
- data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
- data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
- data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
- data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
- data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
- data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
- data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
- data/docs/rdocs/_index.html +660 -0
- data/docs/rdocs/class_list.html +51 -0
- data/docs/rdocs/css/common.css +1 -0
- data/docs/rdocs/css/full_list.css +58 -0
- data/docs/rdocs/css/style.css +496 -0
- data/docs/rdocs/file.README.html +165 -0
- data/docs/rdocs/file_list.html +56 -0
- data/docs/rdocs/frames.html +17 -0
- data/docs/rdocs/index.html +165 -0
- data/docs/rdocs/js/app.js +303 -0
- data/docs/rdocs/js/full_list.js +216 -0
- data/docs/rdocs/js/jquery.js +4 -0
- data/docs/rdocs/method_list.html +2051 -0
- data/docs/rdocs/top-level-namespace.html +110 -0
- data/lib/longleaf/candidates/file_selector.rb +150 -0
- data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
- data/lib/longleaf/candidates/physical_path_provider.rb +17 -0
- data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
- data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
- data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
- data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
- data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
- data/lib/longleaf/cli.rb +249 -44
- data/lib/longleaf/commands/deregister_command.rb +51 -0
- data/lib/longleaf/commands/preserve_command.rb +50 -0
- data/lib/longleaf/commands/register_command.rb +34 -43
- data/lib/longleaf/commands/reindex_command.rb +92 -0
- data/lib/longleaf/commands/validate_config_command.rb +33 -8
- data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
- data/lib/longleaf/errors.rb +26 -7
- data/lib/longleaf/events/deregister_event.rb +53 -0
- data/lib/longleaf/events/event_names.rb +9 -0
- data/lib/longleaf/events/event_status_tracking.rb +59 -0
- data/lib/longleaf/events/preserve_event.rb +82 -0
- data/lib/longleaf/events/register_event.rb +59 -51
- data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
- data/lib/longleaf/helpers/digest_helper.rb +56 -0
- data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
- data/lib/longleaf/helpers/selection_options_parser.rb +215 -0
- data/lib/longleaf/helpers/service_date_helper.rb +78 -0
- data/lib/longleaf/indexing/index_manager.rb +101 -0
- data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
- data/lib/longleaf/logging.rb +5 -4
- data/lib/longleaf/logging/redirecting_logger.rb +30 -25
- data/lib/longleaf/models/app_fields.rb +7 -2
- data/lib/longleaf/models/file_record.rb +31 -8
- data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
- data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
- data/lib/longleaf/models/md_fields.rb +3 -1
- data/lib/longleaf/models/metadata_location.rb +47 -0
- data/lib/longleaf/models/metadata_record.rb +43 -16
- data/lib/longleaf/models/s3_storage_location.rb +138 -0
- data/lib/longleaf/models/service_definition.rb +7 -6
- data/lib/longleaf/models/service_fields.rb +7 -1
- data/lib/longleaf/models/service_record.rb +10 -6
- data/lib/longleaf/models/storage_location.rb +24 -21
- data/lib/longleaf/models/storage_types.rb +9 -0
- data/lib/longleaf/models/system_config_fields.rb +9 -0
- data/lib/longleaf/preservation_services/file_check_service.rb +59 -0
- data/lib/longleaf/preservation_services/fixity_check_service.rb +124 -0
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +198 -0
- data/lib/longleaf/preservation_services/s3_replication_service.rb +131 -0
- data/lib/longleaf/services/application_config_deserializer.rb +80 -21
- data/lib/longleaf/services/application_config_manager.rb +20 -6
- data/lib/longleaf/services/application_config_validator.rb +19 -9
- data/lib/longleaf/services/configuration_validator.rb +67 -4
- data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
- data/lib/longleaf/services/metadata_deserializer.rb +115 -42
- data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
- data/lib/longleaf/services/metadata_serializer.rb +139 -25
- data/lib/longleaf/services/metadata_validator.rb +76 -0
- data/lib/longleaf/services/s3_location_validator.rb +19 -0
- data/lib/longleaf/services/service_class_cache.rb +112 -0
- data/lib/longleaf/services/service_definition_manager.rb +10 -7
- data/lib/longleaf/services/service_definition_validator.rb +25 -18
- data/lib/longleaf/services/service_manager.rb +86 -11
- data/lib/longleaf/services/service_mapping_manager.rb +13 -12
- data/lib/longleaf/services/service_mapping_validator.rb +36 -26
- data/lib/longleaf/services/storage_location_manager.rb +76 -15
- data/lib/longleaf/services/storage_location_validator.rb +49 -35
- data/lib/longleaf/specs/config_builder.rb +47 -23
- data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
- data/lib/longleaf/specs/custom_matchers.rb +9 -0
- data/lib/longleaf/specs/file_helpers.rb +61 -0
- data/lib/longleaf/specs/metadata_builder.rb +98 -0
- data/lib/longleaf/specs/system_config_builder.rb +27 -0
- data/lib/longleaf/version.rb +1 -1
- data/longleaf.gemspec +20 -7
- data/mkdocs.yml +21 -0
- metadata +310 -26
- data/.travis.yml +0 -4
- data/lib/longleaf/commands/abstract_command.rb +0 -37
- data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -1,21 +1,22 @@
|
|
1
1
|
require_relative 'service_fields'
|
2
2
|
|
3
|
-
# Definition of a preservation service
|
4
3
|
module Longleaf
|
4
|
+
# Definition of a configured preservation service
|
5
5
|
class ServiceDefinition
|
6
6
|
attr_reader :name
|
7
|
-
attr_reader :work_script
|
7
|
+
attr_reader :work_script, :work_class
|
8
8
|
attr_reader :frequency, :delay
|
9
9
|
attr_reader :properties
|
10
|
-
|
11
|
-
def initialize(name:, work_script:, frequency: nil, delay: nil, properties: Hash.new)
|
10
|
+
|
11
|
+
def initialize(name:, work_script:, work_class: nil, frequency: nil, delay: nil, properties: Hash.new)
|
12
12
|
raise ArgumentError.new("Parameters name and work_script are required") unless name && work_script
|
13
|
-
|
13
|
+
|
14
14
|
@properties = properties
|
15
15
|
@name = name
|
16
16
|
@work_script = work_script
|
17
|
+
@work_class = work_class
|
17
18
|
@frequency = frequency
|
18
19
|
@delay = delay
|
19
20
|
end
|
20
21
|
end
|
21
|
-
end
|
22
|
+
end
|
@@ -1,10 +1,16 @@
|
|
1
1
|
module Longleaf
|
2
|
+
# Constants for common configuration fields for preservation service definitions
|
2
3
|
class ServiceFields
|
3
4
|
WORK_SCRIPT = 'work_script'
|
5
|
+
WORK_CLASS = 'work_class'
|
4
6
|
FREQUENCY = 'frequency'
|
5
7
|
DELAY = 'delay'
|
6
|
-
|
8
|
+
|
7
9
|
REPLICATE_TO = 'to'
|
8
10
|
DIGEST_ALGORITHMS = 'algorithms'
|
11
|
+
|
12
|
+
COLLISION_PROPERTY = "replica_collision_policy"
|
13
|
+
DEFAULT_COLLISION_POLICY = "replace"
|
14
|
+
VALID_COLLISION_POLICIES = ["replace"]
|
9
15
|
end
|
10
16
|
end
|
@@ -1,27 +1,31 @@
|
|
1
|
-
# Record for an individual service in a file's metadata record.
|
2
1
|
module Longleaf
|
2
|
+
# Record for an individual service in a file's metadata record.
|
3
3
|
class ServiceRecord
|
4
4
|
attr_reader :properties
|
5
5
|
attr_accessor :stale_replicas, :timestamp, :run_needed
|
6
|
-
|
6
|
+
attr_accessor :failure_timestamp
|
7
|
+
|
7
8
|
# @param properties [Hash] initial properties for this service record
|
9
|
+
# @param stale_replicas [Boolean] whether there are any stale replicas from this service
|
10
|
+
# @param timestamp [String] timestamp when this service last ran or was initialized
|
11
|
+
# @param run_needed [Boolean] flag indicating that this service should be run at the next available opportunity
|
8
12
|
def initialize(properties: Hash.new, stale_replicas: false, timestamp: nil, run_needed: false)
|
9
13
|
raise ArgumentError.new("Service properties must be a hash") if properties.class != Hash
|
10
|
-
|
14
|
+
|
11
15
|
@properties = properties
|
12
16
|
@timestamp = timestamp
|
13
17
|
@stale_replicas = stale_replicas
|
14
18
|
@run_needed = run_needed
|
15
19
|
end
|
16
|
-
|
20
|
+
|
17
21
|
# @return the value of a service property identified by key
|
18
22
|
def [](key)
|
19
23
|
@properties[key]
|
20
24
|
end
|
21
|
-
|
25
|
+
|
22
26
|
# set the value of a service property identified by key
|
23
27
|
def []=(key, value)
|
24
28
|
@properties[key] = value
|
25
29
|
end
|
26
30
|
end
|
27
|
-
end
|
31
|
+
end
|
@@ -1,21 +1,25 @@
|
|
1
|
-
require 'longleaf/
|
1
|
+
require 'longleaf/models/app_fields'
|
2
2
|
|
3
3
|
module Longleaf
|
4
|
+
# Representation of a configured storage location
|
4
5
|
class StorageLocation
|
6
|
+
AF ||= Longleaf::AppFields
|
7
|
+
|
5
8
|
attr_reader :name
|
6
9
|
attr_reader :path
|
7
|
-
attr_reader :
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
attr_reader :metadata_location
|
11
|
+
|
12
|
+
# @param name [String] the name of this storage location
|
13
|
+
# @param config [Hash] hash containing the configuration options for this location
|
14
|
+
# @param md_loc [MetadataLocation] metadata location associated with this storage location
|
15
|
+
def initialize(name, config, md_loc)
|
16
|
+
raise ArgumentError.new("Config parameter is required") unless config
|
17
|
+
@path = config[AF::LOCATION_PATH]
|
14
18
|
@name = name
|
15
|
-
@
|
16
|
-
@
|
19
|
+
raise ArgumentError.new("Parameters name, path and metadata location are required") unless @name && @path && md_loc
|
20
|
+
@metadata_location = md_loc
|
17
21
|
end
|
18
|
-
|
22
|
+
|
19
23
|
# Get the path for the metadata file for the given file path located in this storage location.
|
20
24
|
# @param file_path [String] path of the file
|
21
25
|
# @raise [ArgumentError] if the file_path is not provided or is not in this storage location.
|
@@ -24,16 +28,15 @@ module Longleaf
|
|
24
28
|
raise ArgumentError.new("Provided file path is not contained by storage location #{@name}: #{file_path}") \
|
25
29
|
unless file_path.start_with?(@path)
|
26
30
|
|
27
|
-
file_path
|
31
|
+
rel_file_path = relativize(file_path)
|
32
|
+
|
33
|
+
@metadata_location.metadata_path_for(rel_file_path)
|
28
34
|
end
|
29
|
-
|
30
|
-
#
|
31
|
-
# @
|
32
|
-
def
|
33
|
-
|
34
|
-
unless Dir.exist?(@path)
|
35
|
-
raise StorageLocationUnavailableError.new("Metadata path does not exist or is not a directory: #{@metadata_path}")\
|
36
|
-
unless Dir.exist?(@metadata_path)
|
35
|
+
|
36
|
+
# @param [String] path to check
|
37
|
+
# @return true if the file path is contained by the path for this location
|
38
|
+
def contains?(file_path)
|
39
|
+
file_path.start_with?(@path)
|
37
40
|
end
|
38
41
|
end
|
39
|
-
end
|
42
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'longleaf/events/event_names'
|
2
|
+
require 'longleaf/logging'
|
3
|
+
|
4
|
+
module Longleaf
|
5
|
+
# Preservation service which validates a file using current filesystem information compared against the
|
6
|
+
# last registered details for that file. Checks using file name, size and last modified timestamp.
|
7
|
+
class FileCheckService
|
8
|
+
include Longleaf::Logging
|
9
|
+
|
10
|
+
# Initialize a FileCheckService from the given service definition
|
11
|
+
#
|
12
|
+
# @param service_def [ServiceDefinition] the configuration for this service
|
13
|
+
# @param app_manager [ApplicationConfigManager] manager for configured storage locations
|
14
|
+
def initialize(service_def, app_manager)
|
15
|
+
@service_def = service_def
|
16
|
+
@app_manager = app_manager
|
17
|
+
end
|
18
|
+
|
19
|
+
# Perform file information check.
|
20
|
+
#
|
21
|
+
# @param file_rec [FileRecord] record representing the file to perform the service on.
|
22
|
+
# @param event [String] name of the event this service is being invoked by.
|
23
|
+
# @raise [PreservationServiceError] if the file system information does not match the stored details
|
24
|
+
def perform(file_rec, event)
|
25
|
+
file_path = file_rec.path
|
26
|
+
phys_path = file_rec.physical_path
|
27
|
+
md_rec = file_rec.metadata_record
|
28
|
+
|
29
|
+
logger.debug("Performing file information check of #{file_path}")
|
30
|
+
|
31
|
+
if !File.exist?(phys_path)
|
32
|
+
raise PreservationServiceError.new("File does not exist: #{phys_path}")
|
33
|
+
end
|
34
|
+
|
35
|
+
file_size = File.size(phys_path)
|
36
|
+
if file_size != md_rec.file_size
|
37
|
+
raise PreservationServiceError.new("File size for #{phys_path} does not match the expected value: registered = #{md_rec.file_size} bytes, actual = #{file_size} bytes")
|
38
|
+
end
|
39
|
+
|
40
|
+
last_modified = File.mtime(phys_path).utc.iso8601(3)
|
41
|
+
if last_modified != md_rec.last_modified
|
42
|
+
raise PreservationServiceError.new("Last modified timestamp for #{phys_path} does not match the expected value: registered = #{md_rec.last_modified}, actual = #{last_modified}")
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Determine if this service is applicable for the provided event, given the configured service definition
|
47
|
+
#
|
48
|
+
# @param event [String] name of the event
|
49
|
+
# @return [Boolean] returns true if this service is applicable for the provided event
|
50
|
+
def is_applicable?(event)
|
51
|
+
case event
|
52
|
+
when EventNames::PRESERVE
|
53
|
+
true
|
54
|
+
else
|
55
|
+
false
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,124 @@
|
|
1
|
+
require 'longleaf/events/event_names'
|
2
|
+
require 'longleaf/models/service_fields'
|
3
|
+
require 'longleaf/logging'
|
4
|
+
require 'longleaf/helpers/digest_helper'
|
5
|
+
require 'set'
|
6
|
+
|
7
|
+
module Longleaf
|
8
|
+
# Preservation service which performs one or more fixity checks on a file based on the configured list
|
9
|
+
# of digest algorithms. It currently supports 'md5', 'sha1', 'sha2', 'sha256', 'sha384', 'sha512' and 'rmd160'.
|
10
|
+
#
|
11
|
+
# If the service encounters a file which is missing any of the digest algorithms the service is configured
|
12
|
+
# to check, the outcome may be controlled with the 'absent_digest' property via the following values:
|
13
|
+
# * 'fail' - the service will raise a ChecksumMismatchError for the missing algorithm. This is the default.
|
14
|
+
# * 'ignore' - the service will skip calculating any algorithms not already present for the file.
|
15
|
+
# * 'generate' - the service will generate and store any missing digests from the set of configured algorithms.
|
16
|
+
class FixityCheckService
|
17
|
+
include Longleaf::Logging
|
18
|
+
|
19
|
+
SUPPORTED_ALGORITHMS = ['md5', 'sha1', 'sha2', 'sha256', 'sha384', 'sha512', 'rmd160']
|
20
|
+
|
21
|
+
# service configuration property indicating how to handle situations where a file does not
|
22
|
+
# have a digest for one of the expected algorithms on record.
|
23
|
+
ABSENT_DIGEST_PROPERTY = 'absent_digest'
|
24
|
+
FAIL_IF_ABSENT = 'fail'
|
25
|
+
GENERATE_IF_ABSENT = 'generate'
|
26
|
+
IGNORE_IF_ABSENT = 'ignore'
|
27
|
+
ABSENT_DIGEST_OPTIONS = [FAIL_IF_ABSENT, GENERATE_IF_ABSENT, IGNORE_IF_ABSENT]
|
28
|
+
|
29
|
+
# Initialize a FixityCheckService from the given service definition
|
30
|
+
#
|
31
|
+
# @param service_def [ServiceDefinition] the configuration for this service
|
32
|
+
# @param app_manager [ApplicationConfigManager] manager for configured storage locations
|
33
|
+
def initialize(service_def, app_manager)
|
34
|
+
@service_def = service_def
|
35
|
+
@absent_digest_behavior = @service_def.properties[ABSENT_DIGEST_PROPERTY] || FAIL_IF_ABSENT
|
36
|
+
unless ABSENT_DIGEST_OPTIONS.include?(@absent_digest_behavior)
|
37
|
+
raise ArgumentError.new("Invalid option '#{@absent_digest_behavior}' for property #{ABSENT_DIGEST_PROPERTY} in service #{service_def.name}")
|
38
|
+
end
|
39
|
+
|
40
|
+
service_algs = service_def.properties[ServiceFields::DIGEST_ALGORITHMS]
|
41
|
+
if service_algs.nil? || service_algs.empty?
|
42
|
+
raise ArgumentError.new("FixityCheckService from definition #{service_def.name} requires a list of one or more digest algorithms")
|
43
|
+
end
|
44
|
+
|
45
|
+
service_algs = [service_algs] if service_algs.is_a?(String)
|
46
|
+
|
47
|
+
# Store the list of digest algorithms to verify, using normalized algorithm names.
|
48
|
+
@digest_algs = Set.new
|
49
|
+
service_algs.each do |alg|
|
50
|
+
normalized_alg = alg.downcase.delete('-')
|
51
|
+
if SUPPORTED_ALGORITHMS.include?(normalized_alg)
|
52
|
+
@digest_algs << normalized_alg
|
53
|
+
else
|
54
|
+
raise ArgumentError.new("Unsupported checksum algorithm '#{alg}' in definition #{service_def.name}. Supported algorithms are: #{SUPPORTED_ALGORITHMS}")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Perform all configured fixity checks on the provided file
|
60
|
+
#
|
61
|
+
# @param file_rec [FileRecord] record representing the file to perform the service on.
|
62
|
+
# @param event [String] name of the event this service is being invoked by.
|
63
|
+
# @raise [ChecksumMismatchError] if the checksum on record does not match the generated checksum
|
64
|
+
def perform(file_rec, event)
|
65
|
+
path = file_rec.path
|
66
|
+
phys_path = file_rec.physical_path
|
67
|
+
md_rec = file_rec.metadata_record
|
68
|
+
|
69
|
+
# Get the list of existing checksums for the file and normalize algorithm names
|
70
|
+
file_digests = Hash.new
|
71
|
+
md_rec.checksums&.each do |alg, digest|
|
72
|
+
normalized_alg = alg.downcase.delete('-')
|
73
|
+
if @digest_algs.include?(normalized_alg)
|
74
|
+
file_digests[normalized_alg] = digest
|
75
|
+
else
|
76
|
+
logger.debug("Metadata for file #{path} contains unexpected '#{alg}' digest, it will be ignored.")
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
@digest_algs.each do |alg|
|
81
|
+
existing_digest = file_digests[alg]
|
82
|
+
|
83
|
+
if existing_digest.nil?
|
84
|
+
if @absent_digest_behavior == FAIL_IF_ABSENT
|
85
|
+
raise ChecksumMismatchError.new("Fixity check using algorithm '#{alg}' failed for file #{path}: no existing digest of type '#{alg}' on record.")
|
86
|
+
elsif @absent_digest_behavior == IGNORE_IF_ABSENT
|
87
|
+
logger.debug("Skipping check of algorithm '#{alg}' for file #{path}: no digest on record.")
|
88
|
+
next
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
digest = DigestHelper::start_digest(alg)
|
93
|
+
digest.file(phys_path)
|
94
|
+
generated_digest = digest.hexdigest
|
95
|
+
|
96
|
+
# Store the missing checksum if using the 'generate' behavior
|
97
|
+
if existing_digest.nil? && @absent_digest_behavior == GENERATE_IF_ABSENT
|
98
|
+
md_rec.checksums[alg] = generated_digest
|
99
|
+
logger.info("Generated and stored digest using algorithm '#{alg}' for file #{phys_path}")
|
100
|
+
else
|
101
|
+
# Compare the new digest to the one on record
|
102
|
+
if existing_digest == generated_digest
|
103
|
+
logger.info("Fixity check using algorithm '#{alg}' succeeded for file #{phys_path}")
|
104
|
+
else
|
105
|
+
raise ChecksumMismatchError.new("Fixity check using algorithm '#{alg}' failed for file #{phys_path}: expected '#{existing_digest}', calculated '#{generated_digest}.'")
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# Determine if this service is applicable for the provided event, given the configured service definition
|
112
|
+
#
|
113
|
+
# @param event [String] name of the event
|
114
|
+
# @return [Boolean] returns true if this service is applicable for the provided event
|
115
|
+
def is_applicable?(event)
|
116
|
+
case event
|
117
|
+
when EventNames::PRESERVE
|
118
|
+
true
|
119
|
+
else
|
120
|
+
false
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
@@ -0,0 +1,198 @@
|
|
1
|
+
require 'longleaf/events/event_names'
|
2
|
+
require 'longleaf/logging'
|
3
|
+
require 'longleaf/errors'
|
4
|
+
require 'longleaf/models/file_record'
|
5
|
+
require 'longleaf/models/service_fields'
|
6
|
+
require 'longleaf/events/register_event'
|
7
|
+
require 'longleaf/candidates/single_digest_provider'
|
8
|
+
require 'open3'
|
9
|
+
|
10
|
+
module Longleaf
|
11
|
+
# Preservation service which performs replication of a file to one or more destinations using rsync.
|
12
|
+
#
|
13
|
+
# The service definition must contain one or more destinations, specified with the "to" property.
|
14
|
+
# These destinations must be either a known storage location name, a remote path, or absolute path.
|
15
|
+
#
|
16
|
+
# Optional service configuration properties:
|
17
|
+
# * replica_collision_policy = specifies the desired outcome if the service attempts to replicate
|
18
|
+
# a file which already exists at a destination. Default: "replace".
|
19
|
+
# * rsync_command = the command to invoke in order to execute rsync. Default: "rsync"
|
20
|
+
# * rsync_options = additional parameters that will be passed along to rsync. Cannot include options
|
21
|
+
# which change the target of the command or prevent its execution, such as "files-from", "dry-run",
|
22
|
+
# "help", etc. Command will always include "-R". Default "-a".
|
23
|
+
class RsyncReplicationService
|
24
|
+
include Longleaf::Logging
|
25
|
+
SF ||= Longleaf::ServiceFields
|
26
|
+
|
27
|
+
RSYNC_COMMAND_PROPERTY = "rsync_command"
|
28
|
+
DEFAULT_COMMAND = "rsync"
|
29
|
+
|
30
|
+
RSYNC_OPTIONS_PROPERTY = "rsync_options"
|
31
|
+
DEFAULT_OPTIONS = "-a"
|
32
|
+
DISALLOWED_OPTIONS = ["files-from", "n", "dry-run", "exclude", "exclude-from", "cvs-exclude",
|
33
|
+
"h", "help", "f", "F", "filter"]
|
34
|
+
|
35
|
+
attr_reader :command, :options, :collision_policy
|
36
|
+
|
37
|
+
# Initialize a RsyncReplicationService from the given service definition
|
38
|
+
#
|
39
|
+
# @param service_def [ServiceDefinition] the configuration for this service
|
40
|
+
# @param app_manager [ApplicationConfigManager] the application configuration
|
41
|
+
def initialize(service_def, app_manager)
|
42
|
+
@service_def = service_def
|
43
|
+
@app_manager = app_manager
|
44
|
+
|
45
|
+
@command = @service_def.properties[RSYNC_COMMAND_PROPERTY] || DEFAULT_COMMAND
|
46
|
+
|
47
|
+
# Validate rsync parameters
|
48
|
+
@options = @service_def.properties[RSYNC_OPTIONS_PROPERTY] || DEFAULT_OPTIONS
|
49
|
+
if contains_disallowed_option?(@options)
|
50
|
+
raise ArgumentError.new("Service #{service_def.name} specifies a disallowed rsync paramter," \
|
51
|
+
+ " rsync_options may not include the following: #{DISALLOWED_OPTIONS.join(' ')}")
|
52
|
+
end
|
53
|
+
|
54
|
+
# Set and validate the replica collision policy
|
55
|
+
@collision_policy = @service_def.properties[SF::COLLISION_PROPERTY] || SF::DEFAULT_COLLISION_POLICY
|
56
|
+
if !SF::VALID_COLLISION_POLICIES.include?(@collision_policy)
|
57
|
+
raise ArgumentError.new("Service #{service_def.name} received invalid #{SF::COLLISION_PROPERTY}" \
|
58
|
+
+ " value #{@collision_policy}")
|
59
|
+
end
|
60
|
+
|
61
|
+
# Store and validate destinations
|
62
|
+
replicate_to = @service_def.properties[SF::REPLICATE_TO]
|
63
|
+
if replicate_to.nil? || replicate_to.empty?
|
64
|
+
raise ArgumentError.new("Service #{service_def.name} must provide one or more replication destinations.")
|
65
|
+
end
|
66
|
+
replicate_to = [replicate_to] if replicate_to.is_a?(String)
|
67
|
+
|
68
|
+
loc_manager = app_manager.location_manager
|
69
|
+
# Build list of destinations, translating to storage locations when relevant
|
70
|
+
@destinations = Array.new
|
71
|
+
replicate_to.each do |dest|
|
72
|
+
# Assume that if destination contains a : or / it is a path rather than storage location
|
73
|
+
if dest =~ /[:\/]/
|
74
|
+
@destinations << dest
|
75
|
+
else
|
76
|
+
if loc_manager.locations.key?(dest)
|
77
|
+
@destinations << loc_manager.locations[dest]
|
78
|
+
else
|
79
|
+
raise ArgumentError.new("Service #{service_def.name} specifies unknown storage location '#{dest}'" \
|
80
|
+
+ " as a replication destination")
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# During a replication event, perform replication of the specified file to all configured destinations
|
87
|
+
# as necessary.
|
88
|
+
#
|
89
|
+
# @param file_rec [FileRecord] record representing the file to perform the service on.
|
90
|
+
# @param event [String] name of the event this service is being invoked by.
|
91
|
+
# @raise [PreservationServiceError] if the rsync replication fails
|
92
|
+
def perform(file_rec, event)
|
93
|
+
@destinations.each do |destination|
|
94
|
+
dest_is_storage_loc = destination.is_a?(Longleaf::StorageLocation)
|
95
|
+
|
96
|
+
if dest_is_storage_loc
|
97
|
+
dest_path = destination.path
|
98
|
+
else
|
99
|
+
dest_path = destination
|
100
|
+
end
|
101
|
+
|
102
|
+
logical_physical_same = file_rec.path == file_rec.physical_path
|
103
|
+
# Determine the path to the file being replicated relative to its storage location
|
104
|
+
rel_path = file_rec.storage_location.relativize(file_rec.path)
|
105
|
+
|
106
|
+
options = @options
|
107
|
+
if logical_physical_same
|
108
|
+
options = options + " -R"
|
109
|
+
# source path with . so that rsync will only create destination directories starting from that point
|
110
|
+
source_path = File.join(file_rec.storage_location.path, "./#{rel_path}")
|
111
|
+
else
|
112
|
+
options = options + " --no-relative"
|
113
|
+
source_path = file_rec.physical_path
|
114
|
+
dest_path = File.join(dest_path, rel_path)
|
115
|
+
if (dest_is_storage_loc && destination.is_a?(Longleaf::FilesystemStorageLocation)) || !dest_is_storage_loc
|
116
|
+
# Fill in missing parent directories, as rsync cannot do so when specifying a different source and dest filename
|
117
|
+
dirname = File.dirname(dest_path)
|
118
|
+
logger.debug("Creating parent dirs #{dirname} for #{file_rec.path}")
|
119
|
+
FileUtils.mkdir_p(dirname)
|
120
|
+
else
|
121
|
+
raise PreservationServiceError.new(
|
122
|
+
"Destination #{destination.name} does not currently support separate physical and logical paths")
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Check that the destination is available because attempting to write
|
127
|
+
verify_destination_available(destination, file_rec)
|
128
|
+
|
129
|
+
logger.debug("Invoking rsync with command: #{@command} \"#{source_path}\" \"#{dest_path}\" #{options}")
|
130
|
+
stdout, stderr, status = Open3.capture3("#{@command} \"#{source_path}\" \"#{dest_path}\" #{options}")
|
131
|
+
raise PreservationServiceError.new("Failed to replicate #{file_rec.path} to #{dest_path}: #{stderr}") \
|
132
|
+
unless status.success?
|
133
|
+
|
134
|
+
logger.info("Replicated #{file_rec.path} to destination #{dest_path}")
|
135
|
+
|
136
|
+
# For destinations which are storage locations, register the replica with longleaf
|
137
|
+
if dest_is_storage_loc
|
138
|
+
register_replica(destination, rel_path, file_rec)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
# Determine if this service is applicable for the provided event, given the configured service definition
|
144
|
+
#
|
145
|
+
# @param event [String] name of the event
|
146
|
+
# @return [Boolean] returns true if this service is applicable for the provided event
|
147
|
+
def is_applicable?(event)
|
148
|
+
case event
|
149
|
+
when EventNames::PRESERVE
|
150
|
+
true
|
151
|
+
else
|
152
|
+
false
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
private
|
157
|
+
def contains_disallowed_option?(options)
|
158
|
+
DISALLOWED_OPTIONS.each do |disallowed|
|
159
|
+
if disallowed.length == 1
|
160
|
+
if options =~ /(\A| )-[a-zA-Z0-9]*#{disallowed}[a-zA-Z0-9]*( |=|\z)/
|
161
|
+
return true
|
162
|
+
end
|
163
|
+
else
|
164
|
+
if options =~ /(\A| )--#{disallowed}( |=|\z)/
|
165
|
+
return true
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
false
|
171
|
+
end
|
172
|
+
|
173
|
+
def verify_destination_available(destination, file_rec)
|
174
|
+
if destination.is_a?(Longleaf::StorageLocation)
|
175
|
+
begin
|
176
|
+
destination.available?
|
177
|
+
rescue StorageLocationUnavailableError => e
|
178
|
+
raise StorageLocationUnavailableError.new("Cannot replicate #{file_rec.path} to destination #{destination.name}: " \
|
179
|
+
+ e.message)
|
180
|
+
end
|
181
|
+
elsif destination.start_with?("/")
|
182
|
+
raise StorageLocationUnavailableError.new("Cannot replicate #{file_rec.path} to destination" \
|
183
|
+
+ " #{destination}, path does not exist.") unless Dir.exist?(destination)
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
def register_replica(destination, rel_path, file_rec)
|
188
|
+
dest_file_path = File.join(destination.path, rel_path)
|
189
|
+
dest_file_rec = FileRecord.new(dest_file_path, destination)
|
190
|
+
|
191
|
+
register_event = RegisterEvent.new(file_rec: dest_file_rec,
|
192
|
+
app_manager: @app_manager,
|
193
|
+
force: true,
|
194
|
+
digest_provider: SingleDigestProvider.new(file_rec.metadata_record.checksums))
|
195
|
+
register_event.perform
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|