longleaf 0.1.0.pre.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +94 -0
- data/.editorconfig +13 -0
- data/.gitignore +4 -1
- data/.rubocop.yml +44 -0
- data/.rubocop_todo.yml +834 -0
- data/.yardopts +1 -0
- data/Gemfile +16 -1
- data/README.md +98 -12
- data/Rakefile +6 -0
- data/bin/setup +16 -1
- data/docs/aboutlongleaf.md +28 -0
- data/docs/extra.css +32 -0
- data/docs/img/change-file.png +0 -0
- data/docs/img/ll-example-preserved.png +0 -0
- data/docs/index.md +19 -0
- data/docs/install.md +66 -0
- data/docs/ll-example/config-example-relative.yml +33 -0
- data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
- data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
- data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
- data/docs/ll-example/metadata-dir/.gitkeep +0 -0
- data/docs/ll-example/replica-files/.gitkeep +0 -0
- data/docs/ll-example/replica-metadata/.gitkeep +0 -0
- data/docs/quickstart.md +270 -0
- data/docs/rdocs/Longleaf.html +135 -0
- data/docs/rdocs/Longleaf/AppFields.html +178 -0
- data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
- data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
- data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
- data/docs/rdocs/Longleaf/CLI.html +909 -0
- data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
- data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
- data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
- data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
- data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
- data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
- data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
- data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
- data/docs/rdocs/Longleaf/EventError.html +147 -0
- data/docs/rdocs/Longleaf/EventNames.html +163 -0
- data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
- data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
- data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
- data/docs/rdocs/Longleaf/FileRecord.html +716 -0
- data/docs/rdocs/Longleaf/FileSelector.html +901 -0
- data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
- data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
- data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
- data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
- data/docs/rdocs/Longleaf/Logging.html +405 -0
- data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
- data/docs/rdocs/Longleaf/LongleafError.html +139 -0
- data/docs/rdocs/Longleaf/MDFields.html +193 -0
- data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
- data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
- data/docs/rdocs/Longleaf/MetadataError.html +143 -0
- data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
- data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
- data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
- data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
- data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
- data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
- data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
- data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
- data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
- data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
- data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
- data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
- data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
- data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
- data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
- data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
- data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
- data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
- data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
- data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
- data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
- data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
- data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
- data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
- data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
- data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
- data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
- data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
- data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
- data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
- data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
- data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
- data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
- data/docs/rdocs/_index.html +660 -0
- data/docs/rdocs/class_list.html +51 -0
- data/docs/rdocs/css/common.css +1 -0
- data/docs/rdocs/css/full_list.css +58 -0
- data/docs/rdocs/css/style.css +496 -0
- data/docs/rdocs/file.README.html +165 -0
- data/docs/rdocs/file_list.html +56 -0
- data/docs/rdocs/frames.html +17 -0
- data/docs/rdocs/index.html +165 -0
- data/docs/rdocs/js/app.js +303 -0
- data/docs/rdocs/js/full_list.js +216 -0
- data/docs/rdocs/js/jquery.js +4 -0
- data/docs/rdocs/method_list.html +2051 -0
- data/docs/rdocs/top-level-namespace.html +110 -0
- data/lib/longleaf/candidates/file_selector.rb +139 -0
- data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
- data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
- data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
- data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
- data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
- data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
- data/lib/longleaf/cli.rb +237 -46
- data/lib/longleaf/commands/deregister_command.rb +51 -0
- data/lib/longleaf/commands/preserve_command.rb +50 -0
- data/lib/longleaf/commands/register_command.rb +32 -43
- data/lib/longleaf/commands/reindex_command.rb +92 -0
- data/lib/longleaf/commands/validate_config_command.rb +33 -8
- data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
- data/lib/longleaf/errors.rb +26 -7
- data/lib/longleaf/events/deregister_event.rb +53 -0
- data/lib/longleaf/events/event_names.rb +9 -0
- data/lib/longleaf/events/event_status_tracking.rb +59 -0
- data/lib/longleaf/events/preserve_event.rb +81 -0
- data/lib/longleaf/events/register_event.rb +52 -51
- data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
- data/lib/longleaf/helpers/digest_helper.rb +56 -0
- data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
- data/lib/longleaf/helpers/selection_options_parser.rb +189 -0
- data/lib/longleaf/helpers/service_date_helper.rb +78 -0
- data/lib/longleaf/indexing/index_manager.rb +101 -0
- data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
- data/lib/longleaf/logging.rb +5 -4
- data/lib/longleaf/logging/redirecting_logger.rb +26 -25
- data/lib/longleaf/models/app_fields.rb +7 -2
- data/lib/longleaf/models/file_record.rb +17 -8
- data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
- data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
- data/lib/longleaf/models/md_fields.rb +2 -1
- data/lib/longleaf/models/metadata_location.rb +47 -0
- data/lib/longleaf/models/metadata_record.rb +39 -15
- data/lib/longleaf/models/s3_storage_location.rb +133 -0
- data/lib/longleaf/models/service_definition.rb +7 -6
- data/lib/longleaf/models/service_fields.rb +7 -1
- data/lib/longleaf/models/service_record.rb +10 -6
- data/lib/longleaf/models/storage_location.rb +24 -19
- data/lib/longleaf/models/storage_types.rb +9 -0
- data/lib/longleaf/models/system_config_fields.rb +9 -0
- data/lib/longleaf/preservation_services/file_check_service.rb +58 -0
- data/lib/longleaf/preservation_services/fixity_check_service.rb +123 -0
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +182 -0
- data/lib/longleaf/preservation_services/s3_replication_service.rb +143 -0
- data/lib/longleaf/services/application_config_deserializer.rb +81 -24
- data/lib/longleaf/services/application_config_manager.rb +20 -6
- data/lib/longleaf/services/application_config_validator.rb +19 -9
- data/lib/longleaf/services/configuration_validator.rb +67 -4
- data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
- data/lib/longleaf/services/metadata_deserializer.rb +113 -42
- data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
- data/lib/longleaf/services/metadata_serializer.rb +138 -25
- data/lib/longleaf/services/metadata_validator.rb +76 -0
- data/lib/longleaf/services/s3_location_validator.rb +19 -0
- data/lib/longleaf/services/service_class_cache.rb +112 -0
- data/lib/longleaf/services/service_definition_manager.rb +10 -7
- data/lib/longleaf/services/service_definition_validator.rb +25 -18
- data/lib/longleaf/services/service_manager.rb +86 -11
- data/lib/longleaf/services/service_mapping_manager.rb +13 -12
- data/lib/longleaf/services/service_mapping_validator.rb +36 -26
- data/lib/longleaf/services/storage_location_manager.rb +76 -15
- data/lib/longleaf/services/storage_location_validator.rb +49 -35
- data/lib/longleaf/specs/config_builder.rb +47 -23
- data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
- data/lib/longleaf/specs/custom_matchers.rb +9 -0
- data/lib/longleaf/specs/file_helpers.rb +61 -0
- data/lib/longleaf/specs/metadata_builder.rb +92 -0
- data/lib/longleaf/specs/system_config_builder.rb +27 -0
- data/lib/longleaf/version.rb +1 -1
- data/longleaf.gemspec +20 -7
- data/mkdocs.yml +21 -0
- metadata +306 -23
- data/.travis.yml +0 -4
- data/lib/longleaf/commands/abstract_command.rb +0 -37
- data/lib/longleaf/services/storage_path_validator.rb +0 -16
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
require 'longleaf/models/storage_location'
|
|
2
|
+
require 'longleaf/models/storage_types'
|
|
3
|
+
require 'longleaf/helpers/s3_uri_helper'
|
|
4
|
+
require 'uri'
|
|
5
|
+
require 'aws-sdk-s3'
|
|
6
|
+
|
|
7
|
+
module Longleaf
|
|
8
|
+
# A storage location in a s3 bucket
|
|
9
|
+
#
|
|
10
|
+
# Optionally, the location configuration may include an "options" sub-hash in order to provide
|
|
11
|
+
# any of the s3 client options specified in Client initializer:
|
|
12
|
+
# https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#constructor_details
|
|
13
|
+
|
|
14
|
+
class S3StorageLocation < StorageLocation
|
|
15
|
+
|
|
16
|
+
IS_URI_REGEX = /\A#{URI::regexp}\z/
|
|
17
|
+
|
|
18
|
+
CLIENT_OPTIONS_FIELD = 'options'
|
|
19
|
+
|
|
20
|
+
# @param name [String] the name of this storage location
|
|
21
|
+
# @param config [Hash] hash containing the configuration options for this location
|
|
22
|
+
# @param md_loc [MetadataLocation] metadata location associated with this storage location
|
|
23
|
+
def initialize(name, config, md_loc)
|
|
24
|
+
super(name, config, md_loc)
|
|
25
|
+
|
|
26
|
+
@bucket_name = S3UriHelper.extract_bucket(@path)
|
|
27
|
+
if @bucket_name.nil?
|
|
28
|
+
raise ArgumentError.new("Unable to identify bucket for location #{@name} from path #{@path}")
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Force path to always end with a slash
|
|
32
|
+
@path += '/' unless @path.end_with?('/')
|
|
33
|
+
|
|
34
|
+
custom_options = config[CLIENT_OPTIONS_FIELD]
|
|
35
|
+
if custom_options.nil?
|
|
36
|
+
@client_options = Hash.new
|
|
37
|
+
else
|
|
38
|
+
# Clone options and convert keys to symbols
|
|
39
|
+
@client_options = Hash[custom_options.map { |(k,v)| [k.to_sym,v] } ]
|
|
40
|
+
end
|
|
41
|
+
# If no region directly configured, use region from path
|
|
42
|
+
if !@client_options.key?(:region)
|
|
43
|
+
region = S3UriHelper.extract_region(@path)
|
|
44
|
+
@client_options[:region] = region unless region.nil?
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
@subpath_prefix = S3UriHelper.extract_path(@path)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# @return the storage type for this location
|
|
51
|
+
def type
|
|
52
|
+
StorageTypes::S3_STORAGE_TYPE
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Get that absolute path to the file associated with the provided metadata path
|
|
56
|
+
# @param md_path [String] metadata file path
|
|
57
|
+
# @raise [ArgumentError] if the md_path is not in this storage location
|
|
58
|
+
# @return [String] the path for the file associated with this metadata
|
|
59
|
+
def get_path_from_metadata_path(md_path)
|
|
60
|
+
raise ArgumentError.new("A file_path parameter is required") if md_path.nil? || md_path.empty?
|
|
61
|
+
|
|
62
|
+
rel_path = @metadata_location.relative_file_path_for(md_path)
|
|
63
|
+
|
|
64
|
+
URI.join(@path, rel_path).to_s
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Checks that the path and metadata path defined in this location are available
|
|
68
|
+
# @raise [StorageLocationUnavailableError] if the storage location is not available
|
|
69
|
+
def available?
|
|
70
|
+
begin
|
|
71
|
+
s3_client().head_bucket({ bucket: @bucket_name, use_accelerate_endpoint: false })
|
|
72
|
+
rescue StandardError => e
|
|
73
|
+
raise StorageLocationUnavailableError.new("Destination bucket #{@bucket_name} does not exist " \
|
|
74
|
+
+ "or is not accessible: #{e.message}")
|
|
75
|
+
end
|
|
76
|
+
@metadata_location.available?
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Get the file path relative to this location
|
|
80
|
+
# @param file_path [String] file path
|
|
81
|
+
# @return the file path relative to this location
|
|
82
|
+
# @raise [ArgumentError] if the file path is not contained by this location
|
|
83
|
+
def relativize(file_path)
|
|
84
|
+
raise ArgumentError.new("Must provide a non-nil path to relativize") if file_path.nil?
|
|
85
|
+
|
|
86
|
+
if file_path.start_with?(@path)
|
|
87
|
+
file_path[@path.length..-1]
|
|
88
|
+
else
|
|
89
|
+
if file_path =~ IS_URI_REGEX
|
|
90
|
+
raise ArgumentError.new("Path #{file_path} is not contained by #{@name}")
|
|
91
|
+
else
|
|
92
|
+
# path already relative
|
|
93
|
+
file_path
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Prefixes the provided path with the query path portion of the location's path
|
|
99
|
+
# after the bucket uri, used to place relative paths into the same sub-URL of a bucket.
|
|
100
|
+
# For example:
|
|
101
|
+
# Given a location with 'path' http://example.s3-amazonaws.com/env/test/
|
|
102
|
+
# Where rel_path = 'path/to/text.txt'
|
|
103
|
+
# The result would be 'env/test/path/to/text.txt'
|
|
104
|
+
# @param rel_path relative path to work with
|
|
105
|
+
# @return the given relative path prefixed with the path portion of the storage location path
|
|
106
|
+
def relative_to_bucket_path(rel_path)
|
|
107
|
+
raise ArgumentError.new("Must provide a non-nil path") if rel_path.nil?
|
|
108
|
+
|
|
109
|
+
if @subpath_prefix.nil?
|
|
110
|
+
return rel_path
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
@subpath_prefix + rel_path
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# @return the bucket used by this storage location
|
|
117
|
+
def s3_bucket
|
|
118
|
+
if @bucket.nil?
|
|
119
|
+
@s3 = Aws::S3::Resource.new(client: s3_client())
|
|
120
|
+
@bucket = @s3.bucket(@bucket_name)
|
|
121
|
+
end
|
|
122
|
+
@bucket
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# @return the s3 client used by this storage locatio
|
|
126
|
+
def s3_client
|
|
127
|
+
if @client.nil?
|
|
128
|
+
@client = Aws::S3::Client.new(**@client_options)
|
|
129
|
+
end
|
|
130
|
+
@client
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
end
|
|
@@ -1,21 +1,22 @@
|
|
|
1
1
|
require_relative 'service_fields'
|
|
2
2
|
|
|
3
|
-
# Definition of a preservation service
|
|
4
3
|
module Longleaf
|
|
4
|
+
# Definition of a configured preservation service
|
|
5
5
|
class ServiceDefinition
|
|
6
6
|
attr_reader :name
|
|
7
|
-
attr_reader :work_script
|
|
7
|
+
attr_reader :work_script, :work_class
|
|
8
8
|
attr_reader :frequency, :delay
|
|
9
9
|
attr_reader :properties
|
|
10
|
-
|
|
11
|
-
def initialize(name:, work_script:, frequency: nil, delay: nil, properties: Hash.new)
|
|
10
|
+
|
|
11
|
+
def initialize(name:, work_script:, work_class: nil, frequency: nil, delay: nil, properties: Hash.new)
|
|
12
12
|
raise ArgumentError.new("Parameters name and work_script are required") unless name && work_script
|
|
13
|
-
|
|
13
|
+
|
|
14
14
|
@properties = properties
|
|
15
15
|
@name = name
|
|
16
16
|
@work_script = work_script
|
|
17
|
+
@work_class = work_class
|
|
17
18
|
@frequency = frequency
|
|
18
19
|
@delay = delay
|
|
19
20
|
end
|
|
20
21
|
end
|
|
21
|
-
end
|
|
22
|
+
end
|
|
@@ -1,10 +1,16 @@
|
|
|
1
1
|
module Longleaf
|
|
2
|
+
# Constants for common configuration fields for preservation service definitions
|
|
2
3
|
class ServiceFields
|
|
3
4
|
WORK_SCRIPT = 'work_script'
|
|
5
|
+
WORK_CLASS = 'work_class'
|
|
4
6
|
FREQUENCY = 'frequency'
|
|
5
7
|
DELAY = 'delay'
|
|
6
|
-
|
|
8
|
+
|
|
7
9
|
REPLICATE_TO = 'to'
|
|
8
10
|
DIGEST_ALGORITHMS = 'algorithms'
|
|
11
|
+
|
|
12
|
+
COLLISION_PROPERTY = "replica_collision_policy"
|
|
13
|
+
DEFAULT_COLLISION_POLICY = "replace"
|
|
14
|
+
VALID_COLLISION_POLICIES = ["replace"]
|
|
9
15
|
end
|
|
10
16
|
end
|
|
@@ -1,27 +1,31 @@
|
|
|
1
|
-
# Record for an individual service in a file's metadata record.
|
|
2
1
|
module Longleaf
|
|
2
|
+
# Record for an individual service in a file's metadata record.
|
|
3
3
|
class ServiceRecord
|
|
4
4
|
attr_reader :properties
|
|
5
5
|
attr_accessor :stale_replicas, :timestamp, :run_needed
|
|
6
|
-
|
|
6
|
+
attr_accessor :failure_timestamp
|
|
7
|
+
|
|
7
8
|
# @param properties [Hash] initial properties for this service record
|
|
9
|
+
# @param stale_replicas [Boolean] whether there are any stale replicas from this service
|
|
10
|
+
# @param timestamp [String] timestamp when this service last ran or was initialized
|
|
11
|
+
# @param run_needed [Boolean] flag indicating that this service should be run at the next available opportunity
|
|
8
12
|
def initialize(properties: Hash.new, stale_replicas: false, timestamp: nil, run_needed: false)
|
|
9
13
|
raise ArgumentError.new("Service properties must be a hash") if properties.class != Hash
|
|
10
|
-
|
|
14
|
+
|
|
11
15
|
@properties = properties
|
|
12
16
|
@timestamp = timestamp
|
|
13
17
|
@stale_replicas = stale_replicas
|
|
14
18
|
@run_needed = run_needed
|
|
15
19
|
end
|
|
16
|
-
|
|
20
|
+
|
|
17
21
|
# @return the value of a service property identified by key
|
|
18
22
|
def [](key)
|
|
19
23
|
@properties[key]
|
|
20
24
|
end
|
|
21
|
-
|
|
25
|
+
|
|
22
26
|
# set the value of a service property identified by key
|
|
23
27
|
def []=(key, value)
|
|
24
28
|
@properties[key] = value
|
|
25
29
|
end
|
|
26
30
|
end
|
|
27
|
-
end
|
|
31
|
+
end
|
|
@@ -1,19 +1,25 @@
|
|
|
1
|
-
require 'longleaf/
|
|
1
|
+
require 'longleaf/models/app_fields'
|
|
2
2
|
|
|
3
3
|
module Longleaf
|
|
4
|
+
# Representation of a configured storage location
|
|
4
5
|
class StorageLocation
|
|
6
|
+
AF ||= Longleaf::AppFields
|
|
7
|
+
|
|
5
8
|
attr_reader :name
|
|
6
9
|
attr_reader :path
|
|
7
|
-
attr_reader :
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
10
|
+
attr_reader :metadata_location
|
|
11
|
+
|
|
12
|
+
# @param name [String] the name of this storage location
|
|
13
|
+
# @param config [Hash] hash containing the configuration options for this location
|
|
14
|
+
# @param md_loc [MetadataLocation] metadata location associated with this storage location
|
|
15
|
+
def initialize(name, config, md_loc)
|
|
16
|
+
raise ArgumentError.new("Config parameter is required") unless config
|
|
17
|
+
@path = config[AF::LOCATION_PATH]
|
|
13
18
|
@name = name
|
|
14
|
-
@
|
|
19
|
+
raise ArgumentError.new("Parameters name, path and metadata location are required") unless @name && @path && md_loc
|
|
20
|
+
@metadata_location = md_loc
|
|
15
21
|
end
|
|
16
|
-
|
|
22
|
+
|
|
17
23
|
# Get the path for the metadata file for the given file path located in this storage location.
|
|
18
24
|
# @param file_path [String] path of the file
|
|
19
25
|
# @raise [ArgumentError] if the file_path is not provided or is not in this storage location.
|
|
@@ -22,16 +28,15 @@ module Longleaf
|
|
|
22
28
|
raise ArgumentError.new("Provided file path is not contained by storage location #{@name}: #{file_path}") \
|
|
23
29
|
unless file_path.start_with?(@path)
|
|
24
30
|
|
|
25
|
-
file_path
|
|
31
|
+
rel_file_path = relativize(file_path)
|
|
32
|
+
|
|
33
|
+
@metadata_location.metadata_path_for(rel_file_path)
|
|
26
34
|
end
|
|
27
|
-
|
|
28
|
-
#
|
|
29
|
-
# @
|
|
30
|
-
def
|
|
31
|
-
|
|
32
|
-
unless Dir.exist?(@path)
|
|
33
|
-
raise StorageLocationUnavailableError.new("Metadata path does not exist or is not a directory: #{@metadata_path}")\
|
|
34
|
-
unless Dir.exist?(@metadata_path)
|
|
35
|
+
|
|
36
|
+
# @param [String] path to check
|
|
37
|
+
# @return true if the file path is contained by the path for this location
|
|
38
|
+
def contains?(file_path)
|
|
39
|
+
file_path.start_with?(@path)
|
|
35
40
|
end
|
|
36
41
|
end
|
|
37
|
-
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
require 'longleaf/events/event_names'
|
|
2
|
+
require 'longleaf/logging'
|
|
3
|
+
|
|
4
|
+
module Longleaf
|
|
5
|
+
# Preservation service which validates a file using current filesystem information compared against the
|
|
6
|
+
# last registered details for that file. Checks using file name, size and last modified timestamp.
|
|
7
|
+
class FileCheckService
|
|
8
|
+
include Longleaf::Logging
|
|
9
|
+
|
|
10
|
+
# Initialize a FileCheckService from the given service definition
|
|
11
|
+
#
|
|
12
|
+
# @param service_def [ServiceDefinition] the configuration for this service
|
|
13
|
+
# @param app_manager [ApplicationConfigManager] manager for configured storage locations
|
|
14
|
+
def initialize(service_def, app_manager)
|
|
15
|
+
@service_def = service_def
|
|
16
|
+
@app_manager = app_manager
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Perform file information check.
|
|
20
|
+
#
|
|
21
|
+
# @param file_rec [FileRecord] record representing the file to perform the service on.
|
|
22
|
+
# @param event [String] name of the event this service is being invoked by.
|
|
23
|
+
# @raise [PreservationServiceError] if the file system information does not match the stored details
|
|
24
|
+
def perform(file_rec, event)
|
|
25
|
+
file_path = file_rec.path
|
|
26
|
+
md_rec = file_rec.metadata_record
|
|
27
|
+
|
|
28
|
+
logger.debug("Performing file information check of #{file_path}")
|
|
29
|
+
|
|
30
|
+
if !File.exist?(file_path)
|
|
31
|
+
raise PreservationServiceError.new("File does not exist: #{file_path}")
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
file_size = File.size(file_rec.path)
|
|
35
|
+
if file_size != md_rec.file_size
|
|
36
|
+
raise PreservationServiceError.new("File size for #{file_path} does not match the expected value: registered = #{md_rec.file_size} bytes, actual = #{file_size} bytes")
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
last_modified = File.mtime(file_rec.path).utc.iso8601(3)
|
|
40
|
+
if last_modified != md_rec.last_modified
|
|
41
|
+
raise PreservationServiceError.new("Last modified timestamp for #{file_path} does not match the expected value: registered = #{md_rec.last_modified}, actual = #{last_modified}")
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Determine if this service is applicable for the provided event, given the configured service definition
|
|
46
|
+
#
|
|
47
|
+
# @param event [String] name of the event
|
|
48
|
+
# @return [Boolean] returns true if this service is applicable for the provided event
|
|
49
|
+
def is_applicable?(event)
|
|
50
|
+
case event
|
|
51
|
+
when EventNames::PRESERVE
|
|
52
|
+
true
|
|
53
|
+
else
|
|
54
|
+
false
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
require 'longleaf/events/event_names'
|
|
2
|
+
require 'longleaf/models/service_fields'
|
|
3
|
+
require 'longleaf/logging'
|
|
4
|
+
require 'longleaf/helpers/digest_helper'
|
|
5
|
+
require 'set'
|
|
6
|
+
|
|
7
|
+
module Longleaf
|
|
8
|
+
# Preservation service which performs one or more fixity checks on a file based on the configured list
|
|
9
|
+
# of digest algorithms. It currently supports 'md5', 'sha1', 'sha2', 'sha256', 'sha384', 'sha512' and 'rmd160'.
|
|
10
|
+
#
|
|
11
|
+
# If the service encounters a file which is missing any of the digest algorithms the service is configured
|
|
12
|
+
# to check, the outcome may be controlled with the 'absent_digest' property via the following values:
|
|
13
|
+
# * 'fail' - the service will raise a ChecksumMismatchError for the missing algorithm. This is the default.
|
|
14
|
+
# * 'ignore' - the service will skip calculating any algorithms not already present for the file.
|
|
15
|
+
# * 'generate' - the service will generate and store any missing digests from the set of configured algorithms.
|
|
16
|
+
class FixityCheckService
|
|
17
|
+
include Longleaf::Logging
|
|
18
|
+
|
|
19
|
+
SUPPORTED_ALGORITHMS = ['md5', 'sha1', 'sha2', 'sha256', 'sha384', 'sha512', 'rmd160']
|
|
20
|
+
|
|
21
|
+
# service configuration property indicating how to handle situations where a file does not
|
|
22
|
+
# have a digest for one of the expected algorithms on record.
|
|
23
|
+
ABSENT_DIGEST_PROPERTY = 'absent_digest'
|
|
24
|
+
FAIL_IF_ABSENT = 'fail'
|
|
25
|
+
GENERATE_IF_ABSENT = 'generate'
|
|
26
|
+
IGNORE_IF_ABSENT = 'ignore'
|
|
27
|
+
ABSENT_DIGEST_OPTIONS = [FAIL_IF_ABSENT, GENERATE_IF_ABSENT, IGNORE_IF_ABSENT]
|
|
28
|
+
|
|
29
|
+
# Initialize a FixityCheckService from the given service definition
|
|
30
|
+
#
|
|
31
|
+
# @param service_def [ServiceDefinition] the configuration for this service
|
|
32
|
+
# @param app_manager [ApplicationConfigManager] manager for configured storage locations
|
|
33
|
+
def initialize(service_def, app_manager)
|
|
34
|
+
@service_def = service_def
|
|
35
|
+
@absent_digest_behavior = @service_def.properties[ABSENT_DIGEST_PROPERTY] || FAIL_IF_ABSENT
|
|
36
|
+
unless ABSENT_DIGEST_OPTIONS.include?(@absent_digest_behavior)
|
|
37
|
+
raise ArgumentError.new("Invalid option '#{@absent_digest_behavior}' for property #{ABSENT_DIGEST_PROPERTY} in service #{service_def.name}")
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
service_algs = service_def.properties[ServiceFields::DIGEST_ALGORITHMS]
|
|
41
|
+
if service_algs.nil? || service_algs.empty?
|
|
42
|
+
raise ArgumentError.new("FixityCheckService from definition #{service_def.name} requires a list of one or more digest algorithms")
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
service_algs = [service_algs] if service_algs.is_a?(String)
|
|
46
|
+
|
|
47
|
+
# Store the list of digest algorithms to verify, using normalized algorithm names.
|
|
48
|
+
@digest_algs = Set.new
|
|
49
|
+
service_algs.each do |alg|
|
|
50
|
+
normalized_alg = alg.downcase.delete('-')
|
|
51
|
+
if SUPPORTED_ALGORITHMS.include?(normalized_alg)
|
|
52
|
+
@digest_algs << normalized_alg
|
|
53
|
+
else
|
|
54
|
+
raise ArgumentError.new("Unsupported checksum algorithm '#{alg}' in definition #{service_def.name}. Supported algorithms are: #{SUPPORTED_ALGORITHMS}")
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Perform all configured fixity checks on the provided file
|
|
60
|
+
#
|
|
61
|
+
# @param file_rec [FileRecord] record representing the file to perform the service on.
|
|
62
|
+
# @param event [String] name of the event this service is being invoked by.
|
|
63
|
+
# @raise [ChecksumMismatchError] if the checksum on record does not match the generated checksum
|
|
64
|
+
def perform(file_rec, event)
|
|
65
|
+
path = file_rec.path
|
|
66
|
+
md_rec = file_rec.metadata_record
|
|
67
|
+
|
|
68
|
+
# Get the list of existing checksums for the file and normalize algorithm names
|
|
69
|
+
file_digests = Hash.new
|
|
70
|
+
md_rec.checksums&.each do |alg, digest|
|
|
71
|
+
normalized_alg = alg.downcase.delete('-')
|
|
72
|
+
if @digest_algs.include?(normalized_alg)
|
|
73
|
+
file_digests[normalized_alg] = digest
|
|
74
|
+
else
|
|
75
|
+
logger.debug("Metadata for file #{path} contains unexpected '#{alg}' digest, it will be ignored.")
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
@digest_algs.each do |alg|
|
|
80
|
+
existing_digest = file_digests[alg]
|
|
81
|
+
|
|
82
|
+
if existing_digest.nil?
|
|
83
|
+
if @absent_digest_behavior == FAIL_IF_ABSENT
|
|
84
|
+
raise ChecksumMismatchError.new("Fixity check using algorithm '#{alg}' failed for file #{path}: no existing digest of type '#{alg}' on record.")
|
|
85
|
+
elsif @absent_digest_behavior == IGNORE_IF_ABSENT
|
|
86
|
+
logger.debug("Skipping check of algorithm '#{alg}' for file #{path}: no digest on record.")
|
|
87
|
+
next
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
digest = DigestHelper::start_digest(alg)
|
|
92
|
+
digest.file(path)
|
|
93
|
+
generated_digest = digest.hexdigest
|
|
94
|
+
|
|
95
|
+
# Store the missing checksum if using the 'generate' behavior
|
|
96
|
+
if existing_digest.nil? && @absent_digest_behavior == GENERATE_IF_ABSENT
|
|
97
|
+
md_rec.checksums[alg] = generated_digest
|
|
98
|
+
logger.info("Generated and stored digest using algorithm '#{alg}' for file #{path}")
|
|
99
|
+
else
|
|
100
|
+
# Compare the new digest to the one on record
|
|
101
|
+
if existing_digest == generated_digest
|
|
102
|
+
logger.info("Fixity check using algorithm '#{alg}' succeeded for file #{path}")
|
|
103
|
+
else
|
|
104
|
+
raise ChecksumMismatchError.new("Fixity check using algorithm '#{alg}' failed for file #{path}: expected '#{existing_digest}', calculated '#{generated_digest}.'")
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Determine if this service is applicable for the provided event, given the configured service definition
|
|
111
|
+
#
|
|
112
|
+
# @param event [String] name of the event
|
|
113
|
+
# @return [Boolean] returns true if this service is applicable for the provided event
|
|
114
|
+
def is_applicable?(event)
|
|
115
|
+
case event
|
|
116
|
+
when EventNames::PRESERVE
|
|
117
|
+
true
|
|
118
|
+
else
|
|
119
|
+
false
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|