longleaf 0.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +94 -0
- data/.editorconfig +13 -0
- data/.gitignore +4 -1
- data/.rubocop.yml +44 -0
- data/.rubocop_todo.yml +834 -0
- data/.yardopts +1 -0
- data/Gemfile +16 -1
- data/README.md +98 -12
- data/Rakefile +6 -0
- data/bin/setup +16 -1
- data/docs/aboutlongleaf.md +28 -0
- data/docs/extra.css +32 -0
- data/docs/img/change-file.png +0 -0
- data/docs/img/ll-example-preserved.png +0 -0
- data/docs/index.md +19 -0
- data/docs/install.md +66 -0
- data/docs/ll-example/config-example-relative.yml +33 -0
- data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
- data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
- data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
- data/docs/ll-example/metadata-dir/.gitkeep +0 -0
- data/docs/ll-example/replica-files/.gitkeep +0 -0
- data/docs/ll-example/replica-metadata/.gitkeep +0 -0
- data/docs/quickstart.md +270 -0
- data/docs/rdocs/Longleaf.html +135 -0
- data/docs/rdocs/Longleaf/AppFields.html +178 -0
- data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
- data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
- data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
- data/docs/rdocs/Longleaf/CLI.html +909 -0
- data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
- data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
- data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
- data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
- data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
- data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
- data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
- data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
- data/docs/rdocs/Longleaf/EventError.html +147 -0
- data/docs/rdocs/Longleaf/EventNames.html +163 -0
- data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
- data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
- data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
- data/docs/rdocs/Longleaf/FileRecord.html +716 -0
- data/docs/rdocs/Longleaf/FileSelector.html +901 -0
- data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
- data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
- data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
- data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
- data/docs/rdocs/Longleaf/Logging.html +405 -0
- data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
- data/docs/rdocs/Longleaf/LongleafError.html +139 -0
- data/docs/rdocs/Longleaf/MDFields.html +193 -0
- data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
- data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
- data/docs/rdocs/Longleaf/MetadataError.html +143 -0
- data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
- data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
- data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
- data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
- data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
- data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
- data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
- data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
- data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
- data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
- data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
- data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
- data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
- data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
- data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
- data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
- data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
- data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
- data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
- data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
- data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
- data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
- data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
- data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
- data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
- data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
- data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
- data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
- data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
- data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
- data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
- data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
- data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
- data/docs/rdocs/_index.html +660 -0
- data/docs/rdocs/class_list.html +51 -0
- data/docs/rdocs/css/common.css +1 -0
- data/docs/rdocs/css/full_list.css +58 -0
- data/docs/rdocs/css/style.css +496 -0
- data/docs/rdocs/file.README.html +165 -0
- data/docs/rdocs/file_list.html +56 -0
- data/docs/rdocs/frames.html +17 -0
- data/docs/rdocs/index.html +165 -0
- data/docs/rdocs/js/app.js +303 -0
- data/docs/rdocs/js/full_list.js +216 -0
- data/docs/rdocs/js/jquery.js +4 -0
- data/docs/rdocs/method_list.html +2051 -0
- data/docs/rdocs/top-level-namespace.html +110 -0
- data/lib/longleaf/candidates/file_selector.rb +150 -0
- data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
- data/lib/longleaf/candidates/physical_path_provider.rb +17 -0
- data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
- data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
- data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
- data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
- data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
- data/lib/longleaf/cli.rb +252 -46
- data/lib/longleaf/commands/deregister_command.rb +51 -0
- data/lib/longleaf/commands/preserve_command.rb +50 -0
- data/lib/longleaf/commands/register_command.rb +34 -43
- data/lib/longleaf/commands/reindex_command.rb +92 -0
- data/lib/longleaf/commands/validate_config_command.rb +33 -8
- data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
- data/lib/longleaf/errors.rb +26 -7
- data/lib/longleaf/events/deregister_event.rb +53 -0
- data/lib/longleaf/events/event_names.rb +9 -0
- data/lib/longleaf/events/event_status_tracking.rb +59 -0
- data/lib/longleaf/events/preserve_event.rb +82 -0
- data/lib/longleaf/events/register_event.rb +59 -51
- data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
- data/lib/longleaf/helpers/digest_helper.rb +56 -0
- data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
- data/lib/longleaf/helpers/selection_options_parser.rb +215 -0
- data/lib/longleaf/helpers/service_date_helper.rb +78 -0
- data/lib/longleaf/indexing/index_manager.rb +101 -0
- data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
- data/lib/longleaf/logging.rb +5 -4
- data/lib/longleaf/logging/redirecting_logger.rb +30 -25
- data/lib/longleaf/models/app_fields.rb +7 -2
- data/lib/longleaf/models/file_record.rb +31 -8
- data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
- data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
- data/lib/longleaf/models/md_fields.rb +3 -1
- data/lib/longleaf/models/metadata_location.rb +47 -0
- data/lib/longleaf/models/metadata_record.rb +43 -16
- data/lib/longleaf/models/s3_storage_location.rb +138 -0
- data/lib/longleaf/models/service_definition.rb +7 -6
- data/lib/longleaf/models/service_fields.rb +7 -1
- data/lib/longleaf/models/service_record.rb +10 -6
- data/lib/longleaf/models/storage_location.rb +24 -19
- data/lib/longleaf/models/storage_types.rb +9 -0
- data/lib/longleaf/models/system_config_fields.rb +9 -0
- data/lib/longleaf/preservation_services/file_check_service.rb +59 -0
- data/lib/longleaf/preservation_services/fixity_check_service.rb +124 -0
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +198 -0
- data/lib/longleaf/preservation_services/s3_replication_service.rb +131 -0
- data/lib/longleaf/services/application_config_deserializer.rb +81 -24
- data/lib/longleaf/services/application_config_manager.rb +20 -6
- data/lib/longleaf/services/application_config_validator.rb +19 -9
- data/lib/longleaf/services/configuration_validator.rb +67 -4
- data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
- data/lib/longleaf/services/metadata_deserializer.rb +115 -42
- data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
- data/lib/longleaf/services/metadata_serializer.rb +156 -23
- data/lib/longleaf/services/metadata_validator.rb +76 -0
- data/lib/longleaf/services/s3_location_validator.rb +19 -0
- data/lib/longleaf/services/service_class_cache.rb +112 -0
- data/lib/longleaf/services/service_definition_manager.rb +10 -7
- data/lib/longleaf/services/service_definition_validator.rb +25 -18
- data/lib/longleaf/services/service_manager.rb +86 -11
- data/lib/longleaf/services/service_mapping_manager.rb +13 -12
- data/lib/longleaf/services/service_mapping_validator.rb +36 -26
- data/lib/longleaf/services/storage_location_manager.rb +76 -15
- data/lib/longleaf/services/storage_location_validator.rb +49 -35
- data/lib/longleaf/specs/config_builder.rb +47 -23
- data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
- data/lib/longleaf/specs/custom_matchers.rb +9 -0
- data/lib/longleaf/specs/file_helpers.rb +61 -0
- data/lib/longleaf/specs/metadata_builder.rb +98 -0
- data/lib/longleaf/specs/system_config_builder.rb +27 -0
- data/lib/longleaf/version.rb +1 -1
- data/longleaf.gemspec +20 -7
- data/mkdocs.yml +21 -0
- metadata +308 -24
- data/.travis.yml +0 -4
- data/lib/longleaf/commands/abstract_command.rb +0 -37
- data/lib/longleaf/services/storage_path_validator.rb +0 -16
|
@@ -1,21 +1,22 @@
|
|
|
1
1
|
require_relative 'service_fields'
|
|
2
2
|
|
|
3
|
-
# Definition of a preservation service
|
|
4
3
|
module Longleaf
|
|
4
|
+
# Definition of a configured preservation service
|
|
5
5
|
class ServiceDefinition
|
|
6
6
|
attr_reader :name
|
|
7
|
-
attr_reader :work_script
|
|
7
|
+
attr_reader :work_script, :work_class
|
|
8
8
|
attr_reader :frequency, :delay
|
|
9
9
|
attr_reader :properties
|
|
10
|
-
|
|
11
|
-
def initialize(name:, work_script:, frequency: nil, delay: nil, properties: Hash.new)
|
|
10
|
+
|
|
11
|
+
def initialize(name:, work_script:, work_class: nil, frequency: nil, delay: nil, properties: Hash.new)
|
|
12
12
|
raise ArgumentError.new("Parameters name and work_script are required") unless name && work_script
|
|
13
|
-
|
|
13
|
+
|
|
14
14
|
@properties = properties
|
|
15
15
|
@name = name
|
|
16
16
|
@work_script = work_script
|
|
17
|
+
@work_class = work_class
|
|
17
18
|
@frequency = frequency
|
|
18
19
|
@delay = delay
|
|
19
20
|
end
|
|
20
21
|
end
|
|
21
|
-
end
|
|
22
|
+
end
|
|
@@ -1,10 +1,16 @@
|
|
|
1
1
|
module Longleaf
|
|
2
|
+
# Constants for common configuration fields for preservation service definitions
|
|
2
3
|
class ServiceFields
|
|
3
4
|
WORK_SCRIPT = 'work_script'
|
|
5
|
+
WORK_CLASS = 'work_class'
|
|
4
6
|
FREQUENCY = 'frequency'
|
|
5
7
|
DELAY = 'delay'
|
|
6
|
-
|
|
8
|
+
|
|
7
9
|
REPLICATE_TO = 'to'
|
|
8
10
|
DIGEST_ALGORITHMS = 'algorithms'
|
|
11
|
+
|
|
12
|
+
COLLISION_PROPERTY = "replica_collision_policy"
|
|
13
|
+
DEFAULT_COLLISION_POLICY = "replace"
|
|
14
|
+
VALID_COLLISION_POLICIES = ["replace"]
|
|
9
15
|
end
|
|
10
16
|
end
|
|
@@ -1,27 +1,31 @@
|
|
|
1
|
-
# Record for an individual service in a file's metadata record.
|
|
2
1
|
module Longleaf
|
|
2
|
+
# Record for an individual service in a file's metadata record.
|
|
3
3
|
class ServiceRecord
|
|
4
4
|
attr_reader :properties
|
|
5
5
|
attr_accessor :stale_replicas, :timestamp, :run_needed
|
|
6
|
-
|
|
6
|
+
attr_accessor :failure_timestamp
|
|
7
|
+
|
|
7
8
|
# @param properties [Hash] initial properties for this service record
|
|
9
|
+
# @param stale_replicas [Boolean] whether there are any stale replicas from this service
|
|
10
|
+
# @param timestamp [String] timestamp when this service last ran or was initialized
|
|
11
|
+
# @param run_needed [Boolean] flag indicating that this service should be run at the next available opportunity
|
|
8
12
|
def initialize(properties: Hash.new, stale_replicas: false, timestamp: nil, run_needed: false)
|
|
9
13
|
raise ArgumentError.new("Service properties must be a hash") if properties.class != Hash
|
|
10
|
-
|
|
14
|
+
|
|
11
15
|
@properties = properties
|
|
12
16
|
@timestamp = timestamp
|
|
13
17
|
@stale_replicas = stale_replicas
|
|
14
18
|
@run_needed = run_needed
|
|
15
19
|
end
|
|
16
|
-
|
|
20
|
+
|
|
17
21
|
# @return the value of a service property identified by key
|
|
18
22
|
def [](key)
|
|
19
23
|
@properties[key]
|
|
20
24
|
end
|
|
21
|
-
|
|
25
|
+
|
|
22
26
|
# set the value of a service property identified by key
|
|
23
27
|
def []=(key, value)
|
|
24
28
|
@properties[key] = value
|
|
25
29
|
end
|
|
26
30
|
end
|
|
27
|
-
end
|
|
31
|
+
end
|
|
@@ -1,19 +1,25 @@
|
|
|
1
|
-
require 'longleaf/
|
|
1
|
+
require 'longleaf/models/app_fields'
|
|
2
2
|
|
|
3
3
|
module Longleaf
|
|
4
|
+
# Representation of a configured storage location
|
|
4
5
|
class StorageLocation
|
|
6
|
+
AF ||= Longleaf::AppFields
|
|
7
|
+
|
|
5
8
|
attr_reader :name
|
|
6
9
|
attr_reader :path
|
|
7
|
-
attr_reader :
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
10
|
+
attr_reader :metadata_location
|
|
11
|
+
|
|
12
|
+
# @param name [String] the name of this storage location
|
|
13
|
+
# @param config [Hash] hash containing the configuration options for this location
|
|
14
|
+
# @param md_loc [MetadataLocation] metadata location associated with this storage location
|
|
15
|
+
def initialize(name, config, md_loc)
|
|
16
|
+
raise ArgumentError.new("Config parameter is required") unless config
|
|
17
|
+
@path = config[AF::LOCATION_PATH]
|
|
13
18
|
@name = name
|
|
14
|
-
@
|
|
19
|
+
raise ArgumentError.new("Parameters name, path and metadata location are required") unless @name && @path && md_loc
|
|
20
|
+
@metadata_location = md_loc
|
|
15
21
|
end
|
|
16
|
-
|
|
22
|
+
|
|
17
23
|
# Get the path for the metadata file for the given file path located in this storage location.
|
|
18
24
|
# @param file_path [String] path of the file
|
|
19
25
|
# @raise [ArgumentError] if the file_path is not provided or is not in this storage location.
|
|
@@ -22,16 +28,15 @@ module Longleaf
|
|
|
22
28
|
raise ArgumentError.new("Provided file path is not contained by storage location #{@name}: #{file_path}") \
|
|
23
29
|
unless file_path.start_with?(@path)
|
|
24
30
|
|
|
25
|
-
file_path
|
|
31
|
+
rel_file_path = relativize(file_path)
|
|
32
|
+
|
|
33
|
+
@metadata_location.metadata_path_for(rel_file_path)
|
|
26
34
|
end
|
|
27
|
-
|
|
28
|
-
#
|
|
29
|
-
# @
|
|
30
|
-
def
|
|
31
|
-
|
|
32
|
-
unless Dir.exist?(@path)
|
|
33
|
-
raise StorageLocationUnavailableError.new("Metadata path does not exist or is not a directory: #{@metadata_path}")\
|
|
34
|
-
unless Dir.exist?(@metadata_path)
|
|
35
|
+
|
|
36
|
+
# @param [String] path to check
|
|
37
|
+
# @return true if the file path is contained by the path for this location
|
|
38
|
+
def contains?(file_path)
|
|
39
|
+
file_path.start_with?(@path)
|
|
35
40
|
end
|
|
36
41
|
end
|
|
37
|
-
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
require 'longleaf/events/event_names'
|
|
2
|
+
require 'longleaf/logging'
|
|
3
|
+
|
|
4
|
+
module Longleaf
|
|
5
|
+
# Preservation service which validates a file using current filesystem information compared against the
|
|
6
|
+
# last registered details for that file. Checks using file name, size and last modified timestamp.
|
|
7
|
+
class FileCheckService
|
|
8
|
+
include Longleaf::Logging
|
|
9
|
+
|
|
10
|
+
# Initialize a FileCheckService from the given service definition
|
|
11
|
+
#
|
|
12
|
+
# @param service_def [ServiceDefinition] the configuration for this service
|
|
13
|
+
# @param app_manager [ApplicationConfigManager] manager for configured storage locations
|
|
14
|
+
def initialize(service_def, app_manager)
|
|
15
|
+
@service_def = service_def
|
|
16
|
+
@app_manager = app_manager
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Perform file information check.
|
|
20
|
+
#
|
|
21
|
+
# @param file_rec [FileRecord] record representing the file to perform the service on.
|
|
22
|
+
# @param event [String] name of the event this service is being invoked by.
|
|
23
|
+
# @raise [PreservationServiceError] if the file system information does not match the stored details
|
|
24
|
+
def perform(file_rec, event)
|
|
25
|
+
file_path = file_rec.path
|
|
26
|
+
phys_path = file_rec.physical_path
|
|
27
|
+
md_rec = file_rec.metadata_record
|
|
28
|
+
|
|
29
|
+
logger.debug("Performing file information check of #{file_path}")
|
|
30
|
+
|
|
31
|
+
if !File.exist?(phys_path)
|
|
32
|
+
raise PreservationServiceError.new("File does not exist: #{phys_path}")
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
file_size = File.size(phys_path)
|
|
36
|
+
if file_size != md_rec.file_size
|
|
37
|
+
raise PreservationServiceError.new("File size for #{phys_path} does not match the expected value: registered = #{md_rec.file_size} bytes, actual = #{file_size} bytes")
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
last_modified = File.mtime(phys_path).utc.iso8601(3)
|
|
41
|
+
if last_modified != md_rec.last_modified
|
|
42
|
+
raise PreservationServiceError.new("Last modified timestamp for #{phys_path} does not match the expected value: registered = #{md_rec.last_modified}, actual = #{last_modified}")
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Determine if this service is applicable for the provided event, given the configured service definition
|
|
47
|
+
#
|
|
48
|
+
# @param event [String] name of the event
|
|
49
|
+
# @return [Boolean] returns true if this service is applicable for the provided event
|
|
50
|
+
def is_applicable?(event)
|
|
51
|
+
case event
|
|
52
|
+
when EventNames::PRESERVE
|
|
53
|
+
true
|
|
54
|
+
else
|
|
55
|
+
false
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
require 'longleaf/events/event_names'
|
|
2
|
+
require 'longleaf/models/service_fields'
|
|
3
|
+
require 'longleaf/logging'
|
|
4
|
+
require 'longleaf/helpers/digest_helper'
|
|
5
|
+
require 'set'
|
|
6
|
+
|
|
7
|
+
module Longleaf
|
|
8
|
+
# Preservation service which performs one or more fixity checks on a file based on the configured list
|
|
9
|
+
# of digest algorithms. It currently supports 'md5', 'sha1', 'sha2', 'sha256', 'sha384', 'sha512' and 'rmd160'.
|
|
10
|
+
#
|
|
11
|
+
# If the service encounters a file which is missing any of the digest algorithms the service is configured
|
|
12
|
+
# to check, the outcome may be controlled with the 'absent_digest' property via the following values:
|
|
13
|
+
# * 'fail' - the service will raise a ChecksumMismatchError for the missing algorithm. This is the default.
|
|
14
|
+
# * 'ignore' - the service will skip calculating any algorithms not already present for the file.
|
|
15
|
+
# * 'generate' - the service will generate and store any missing digests from the set of configured algorithms.
|
|
16
|
+
class FixityCheckService
|
|
17
|
+
include Longleaf::Logging
|
|
18
|
+
|
|
19
|
+
SUPPORTED_ALGORITHMS = ['md5', 'sha1', 'sha2', 'sha256', 'sha384', 'sha512', 'rmd160']
|
|
20
|
+
|
|
21
|
+
# service configuration property indicating how to handle situations where a file does not
|
|
22
|
+
# have a digest for one of the expected algorithms on record.
|
|
23
|
+
ABSENT_DIGEST_PROPERTY = 'absent_digest'
|
|
24
|
+
FAIL_IF_ABSENT = 'fail'
|
|
25
|
+
GENERATE_IF_ABSENT = 'generate'
|
|
26
|
+
IGNORE_IF_ABSENT = 'ignore'
|
|
27
|
+
ABSENT_DIGEST_OPTIONS = [FAIL_IF_ABSENT, GENERATE_IF_ABSENT, IGNORE_IF_ABSENT]
|
|
28
|
+
|
|
29
|
+
# Initialize a FixityCheckService from the given service definition
|
|
30
|
+
#
|
|
31
|
+
# @param service_def [ServiceDefinition] the configuration for this service
|
|
32
|
+
# @param app_manager [ApplicationConfigManager] manager for configured storage locations
|
|
33
|
+
def initialize(service_def, app_manager)
|
|
34
|
+
@service_def = service_def
|
|
35
|
+
@absent_digest_behavior = @service_def.properties[ABSENT_DIGEST_PROPERTY] || FAIL_IF_ABSENT
|
|
36
|
+
unless ABSENT_DIGEST_OPTIONS.include?(@absent_digest_behavior)
|
|
37
|
+
raise ArgumentError.new("Invalid option '#{@absent_digest_behavior}' for property #{ABSENT_DIGEST_PROPERTY} in service #{service_def.name}")
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
service_algs = service_def.properties[ServiceFields::DIGEST_ALGORITHMS]
|
|
41
|
+
if service_algs.nil? || service_algs.empty?
|
|
42
|
+
raise ArgumentError.new("FixityCheckService from definition #{service_def.name} requires a list of one or more digest algorithms")
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
service_algs = [service_algs] if service_algs.is_a?(String)
|
|
46
|
+
|
|
47
|
+
# Store the list of digest algorithms to verify, using normalized algorithm names.
|
|
48
|
+
@digest_algs = Set.new
|
|
49
|
+
service_algs.each do |alg|
|
|
50
|
+
normalized_alg = alg.downcase.delete('-')
|
|
51
|
+
if SUPPORTED_ALGORITHMS.include?(normalized_alg)
|
|
52
|
+
@digest_algs << normalized_alg
|
|
53
|
+
else
|
|
54
|
+
raise ArgumentError.new("Unsupported checksum algorithm '#{alg}' in definition #{service_def.name}. Supported algorithms are: #{SUPPORTED_ALGORITHMS}")
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Perform all configured fixity checks on the provided file
|
|
60
|
+
#
|
|
61
|
+
# @param file_rec [FileRecord] record representing the file to perform the service on.
|
|
62
|
+
# @param event [String] name of the event this service is being invoked by.
|
|
63
|
+
# @raise [ChecksumMismatchError] if the checksum on record does not match the generated checksum
|
|
64
|
+
def perform(file_rec, event)
|
|
65
|
+
path = file_rec.path
|
|
66
|
+
phys_path = file_rec.physical_path
|
|
67
|
+
md_rec = file_rec.metadata_record
|
|
68
|
+
|
|
69
|
+
# Get the list of existing checksums for the file and normalize algorithm names
|
|
70
|
+
file_digests = Hash.new
|
|
71
|
+
md_rec.checksums&.each do |alg, digest|
|
|
72
|
+
normalized_alg = alg.downcase.delete('-')
|
|
73
|
+
if @digest_algs.include?(normalized_alg)
|
|
74
|
+
file_digests[normalized_alg] = digest
|
|
75
|
+
else
|
|
76
|
+
logger.debug("Metadata for file #{path} contains unexpected '#{alg}' digest, it will be ignored.")
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
@digest_algs.each do |alg|
|
|
81
|
+
existing_digest = file_digests[alg]
|
|
82
|
+
|
|
83
|
+
if existing_digest.nil?
|
|
84
|
+
if @absent_digest_behavior == FAIL_IF_ABSENT
|
|
85
|
+
raise ChecksumMismatchError.new("Fixity check using algorithm '#{alg}' failed for file #{path}: no existing digest of type '#{alg}' on record.")
|
|
86
|
+
elsif @absent_digest_behavior == IGNORE_IF_ABSENT
|
|
87
|
+
logger.debug("Skipping check of algorithm '#{alg}' for file #{path}: no digest on record.")
|
|
88
|
+
next
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
digest = DigestHelper::start_digest(alg)
|
|
93
|
+
digest.file(phys_path)
|
|
94
|
+
generated_digest = digest.hexdigest
|
|
95
|
+
|
|
96
|
+
# Store the missing checksum if using the 'generate' behavior
|
|
97
|
+
if existing_digest.nil? && @absent_digest_behavior == GENERATE_IF_ABSENT
|
|
98
|
+
md_rec.checksums[alg] = generated_digest
|
|
99
|
+
logger.info("Generated and stored digest using algorithm '#{alg}' for file #{phys_path}")
|
|
100
|
+
else
|
|
101
|
+
# Compare the new digest to the one on record
|
|
102
|
+
if existing_digest == generated_digest
|
|
103
|
+
logger.info("Fixity check using algorithm '#{alg}' succeeded for file #{phys_path}")
|
|
104
|
+
else
|
|
105
|
+
raise ChecksumMismatchError.new("Fixity check using algorithm '#{alg}' failed for file #{phys_path}: expected '#{existing_digest}', calculated '#{generated_digest}.'")
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Determine if this service is applicable for the provided event, given the configured service definition
|
|
112
|
+
#
|
|
113
|
+
# @param event [String] name of the event
|
|
114
|
+
# @return [Boolean] returns true if this service is applicable for the provided event
|
|
115
|
+
def is_applicable?(event)
|
|
116
|
+
case event
|
|
117
|
+
when EventNames::PRESERVE
|
|
118
|
+
true
|
|
119
|
+
else
|
|
120
|
+
false
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
require 'longleaf/events/event_names'
|
|
2
|
+
require 'longleaf/logging'
|
|
3
|
+
require 'longleaf/errors'
|
|
4
|
+
require 'longleaf/models/file_record'
|
|
5
|
+
require 'longleaf/models/service_fields'
|
|
6
|
+
require 'longleaf/events/register_event'
|
|
7
|
+
require 'longleaf/candidates/single_digest_provider'
|
|
8
|
+
require 'open3'
|
|
9
|
+
|
|
10
|
+
module Longleaf
|
|
11
|
+
# Preservation service which performs replication of a file to one or more destinations using rsync.
|
|
12
|
+
#
|
|
13
|
+
# The service definition must contain one or more destinations, specified with the "to" property.
|
|
14
|
+
# These destinations must be either a known storage location name, a remote path, or absolute path.
|
|
15
|
+
#
|
|
16
|
+
# Optional service configuration properties:
|
|
17
|
+
# * replica_collision_policy = specifies the desired outcome if the service attempts to replicate
|
|
18
|
+
# a file which already exists at a destination. Default: "replace".
|
|
19
|
+
# * rsync_command = the command to invoke in order to execute rsync. Default: "rsync"
|
|
20
|
+
# * rsync_options = additional parameters that will be passed along to rsync. Cannot include options
|
|
21
|
+
# which change the target of the command or prevent its execution, such as "files-from", "dry-run",
|
|
22
|
+
# "help", etc. Command will always include "-R". Default "-a".
|
|
23
|
+
class RsyncReplicationService
|
|
24
|
+
include Longleaf::Logging
|
|
25
|
+
SF ||= Longleaf::ServiceFields
|
|
26
|
+
|
|
27
|
+
RSYNC_COMMAND_PROPERTY = "rsync_command"
|
|
28
|
+
DEFAULT_COMMAND = "rsync"
|
|
29
|
+
|
|
30
|
+
RSYNC_OPTIONS_PROPERTY = "rsync_options"
|
|
31
|
+
DEFAULT_OPTIONS = "-a"
|
|
32
|
+
DISALLOWED_OPTIONS = ["files-from", "n", "dry-run", "exclude", "exclude-from", "cvs-exclude",
|
|
33
|
+
"h", "help", "f", "F", "filter"]
|
|
34
|
+
|
|
35
|
+
attr_reader :command, :options, :collision_policy
|
|
36
|
+
|
|
37
|
+
# Initialize a RsyncReplicationService from the given service definition
|
|
38
|
+
#
|
|
39
|
+
# @param service_def [ServiceDefinition] the configuration for this service
|
|
40
|
+
# @param app_manager [ApplicationConfigManager] the application configuration
|
|
41
|
+
def initialize(service_def, app_manager)
|
|
42
|
+
@service_def = service_def
|
|
43
|
+
@app_manager = app_manager
|
|
44
|
+
|
|
45
|
+
@command = @service_def.properties[RSYNC_COMMAND_PROPERTY] || DEFAULT_COMMAND
|
|
46
|
+
|
|
47
|
+
# Validate rsync parameters
|
|
48
|
+
@options = @service_def.properties[RSYNC_OPTIONS_PROPERTY] || DEFAULT_OPTIONS
|
|
49
|
+
if contains_disallowed_option?(@options)
|
|
50
|
+
raise ArgumentError.new("Service #{service_def.name} specifies a disallowed rsync paramter," \
|
|
51
|
+
+ " rsync_options may not include the following: #{DISALLOWED_OPTIONS.join(' ')}")
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Set and validate the replica collision policy
|
|
55
|
+
@collision_policy = @service_def.properties[SF::COLLISION_PROPERTY] || SF::DEFAULT_COLLISION_POLICY
|
|
56
|
+
if !SF::VALID_COLLISION_POLICIES.include?(@collision_policy)
|
|
57
|
+
raise ArgumentError.new("Service #{service_def.name} received invalid #{SF::COLLISION_PROPERTY}" \
|
|
58
|
+
+ " value #{@collision_policy}")
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Store and validate destinations
|
|
62
|
+
replicate_to = @service_def.properties[SF::REPLICATE_TO]
|
|
63
|
+
if replicate_to.nil? || replicate_to.empty?
|
|
64
|
+
raise ArgumentError.new("Service #{service_def.name} must provide one or more replication destinations.")
|
|
65
|
+
end
|
|
66
|
+
replicate_to = [replicate_to] if replicate_to.is_a?(String)
|
|
67
|
+
|
|
68
|
+
loc_manager = app_manager.location_manager
|
|
69
|
+
# Build list of destinations, translating to storage locations when relevant
|
|
70
|
+
@destinations = Array.new
|
|
71
|
+
replicate_to.each do |dest|
|
|
72
|
+
# Assume that if destination contains a : or / it is a path rather than storage location
|
|
73
|
+
if dest =~ /[:\/]/
|
|
74
|
+
@destinations << dest
|
|
75
|
+
else
|
|
76
|
+
if loc_manager.locations.key?(dest)
|
|
77
|
+
@destinations << loc_manager.locations[dest]
|
|
78
|
+
else
|
|
79
|
+
raise ArgumentError.new("Service #{service_def.name} specifies unknown storage location '#{dest}'" \
|
|
80
|
+
+ " as a replication destination")
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# During a replication event, perform replication of the specified file to all configured destinations
|
|
87
|
+
# as necessary.
|
|
88
|
+
#
|
|
89
|
+
# @param file_rec [FileRecord] record representing the file to perform the service on.
|
|
90
|
+
# @param event [String] name of the event this service is being invoked by.
|
|
91
|
+
# @raise [PreservationServiceError] if the rsync replication fails
|
|
92
|
+
def perform(file_rec, event)
|
|
93
|
+
@destinations.each do |destination|
|
|
94
|
+
dest_is_storage_loc = destination.is_a?(Longleaf::StorageLocation)
|
|
95
|
+
|
|
96
|
+
if dest_is_storage_loc
|
|
97
|
+
dest_path = destination.path
|
|
98
|
+
else
|
|
99
|
+
dest_path = destination
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
logical_physical_same = file_rec.path == file_rec.physical_path
|
|
103
|
+
# Determine the path to the file being replicated relative to its storage location
|
|
104
|
+
rel_path = file_rec.storage_location.relativize(file_rec.path)
|
|
105
|
+
|
|
106
|
+
options = @options
|
|
107
|
+
if logical_physical_same
|
|
108
|
+
options = options + " -R"
|
|
109
|
+
# source path with . so that rsync will only create destination directories starting from that point
|
|
110
|
+
source_path = File.join(file_rec.storage_location.path, "./#{rel_path}")
|
|
111
|
+
else
|
|
112
|
+
options = options + " --no-relative"
|
|
113
|
+
source_path = file_rec.physical_path
|
|
114
|
+
dest_path = File.join(dest_path, rel_path)
|
|
115
|
+
if (dest_is_storage_loc && destination.is_a?(Longleaf::FilesystemStorageLocation)) || !dest_is_storage_loc
|
|
116
|
+
# Fill in missing parent directories, as rsync cannot do so when specifying a different source and dest filename
|
|
117
|
+
dirname = File.dirname(dest_path)
|
|
118
|
+
logger.debug("Creating parent dirs #{dirname} for #{file_rec.path}")
|
|
119
|
+
FileUtils.mkdir_p(dirname)
|
|
120
|
+
else
|
|
121
|
+
raise PreservationServiceError.new(
|
|
122
|
+
"Destination #{destination.name} does not currently support separate physical and logical paths")
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Check that the destination is available because attempting to write
|
|
127
|
+
verify_destination_available(destination, file_rec)
|
|
128
|
+
|
|
129
|
+
logger.debug("Invoking rsync with command: #{@command} \"#{source_path}\" \"#{dest_path}\" #{options}")
|
|
130
|
+
stdout, stderr, status = Open3.capture3("#{@command} \"#{source_path}\" \"#{dest_path}\" #{options}")
|
|
131
|
+
raise PreservationServiceError.new("Failed to replicate #{file_rec.path} to #{dest_path}: #{stderr}") \
|
|
132
|
+
unless status.success?
|
|
133
|
+
|
|
134
|
+
logger.info("Replicated #{file_rec.path} to destination #{dest_path}")
|
|
135
|
+
|
|
136
|
+
# For destinations which are storage locations, register the replica with longleaf
|
|
137
|
+
if dest_is_storage_loc
|
|
138
|
+
register_replica(destination, rel_path, file_rec)
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Determine if this service is applicable for the provided event, given the configured service definition
|
|
144
|
+
#
|
|
145
|
+
# @param event [String] name of the event
|
|
146
|
+
# @return [Boolean] returns true if this service is applicable for the provided event
|
|
147
|
+
def is_applicable?(event)
|
|
148
|
+
case event
|
|
149
|
+
when EventNames::PRESERVE
|
|
150
|
+
true
|
|
151
|
+
else
|
|
152
|
+
false
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
private
|
|
157
|
+
def contains_disallowed_option?(options)
|
|
158
|
+
DISALLOWED_OPTIONS.each do |disallowed|
|
|
159
|
+
if disallowed.length == 1
|
|
160
|
+
if options =~ /(\A| )-[a-zA-Z0-9]*#{disallowed}[a-zA-Z0-9]*( |=|\z)/
|
|
161
|
+
return true
|
|
162
|
+
end
|
|
163
|
+
else
|
|
164
|
+
if options =~ /(\A| )--#{disallowed}( |=|\z)/
|
|
165
|
+
return true
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
false
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def verify_destination_available(destination, file_rec)
|
|
174
|
+
if destination.is_a?(Longleaf::StorageLocation)
|
|
175
|
+
begin
|
|
176
|
+
destination.available?
|
|
177
|
+
rescue StorageLocationUnavailableError => e
|
|
178
|
+
raise StorageLocationUnavailableError.new("Cannot replicate #{file_rec.path} to destination #{destination.name}: " \
|
|
179
|
+
+ e.message)
|
|
180
|
+
end
|
|
181
|
+
elsif destination.start_with?("/")
|
|
182
|
+
raise StorageLocationUnavailableError.new("Cannot replicate #{file_rec.path} to destination" \
|
|
183
|
+
+ " #{destination}, path does not exist.") unless Dir.exist?(destination)
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def register_replica(destination, rel_path, file_rec)
|
|
188
|
+
dest_file_path = File.join(destination.path, rel_path)
|
|
189
|
+
dest_file_rec = FileRecord.new(dest_file_path, destination)
|
|
190
|
+
|
|
191
|
+
register_event = RegisterEvent.new(file_rec: dest_file_rec,
|
|
192
|
+
app_manager: @app_manager,
|
|
193
|
+
force: true,
|
|
194
|
+
digest_provider: SingleDigestProvider.new(file_rec.metadata_record.checksums))
|
|
195
|
+
register_event.perform
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|