longleaf 0.1.0.pre.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +94 -0
- data/.editorconfig +13 -0
- data/.gitignore +4 -1
- data/.rubocop.yml +44 -0
- data/.rubocop_todo.yml +834 -0
- data/.yardopts +1 -0
- data/Gemfile +16 -1
- data/README.md +98 -12
- data/Rakefile +6 -0
- data/bin/setup +16 -1
- data/docs/aboutlongleaf.md +28 -0
- data/docs/extra.css +32 -0
- data/docs/img/change-file.png +0 -0
- data/docs/img/ll-example-preserved.png +0 -0
- data/docs/index.md +19 -0
- data/docs/install.md +66 -0
- data/docs/ll-example/config-example-relative.yml +33 -0
- data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
- data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
- data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
- data/docs/ll-example/metadata-dir/.gitkeep +0 -0
- data/docs/ll-example/replica-files/.gitkeep +0 -0
- data/docs/ll-example/replica-metadata/.gitkeep +0 -0
- data/docs/quickstart.md +270 -0
- data/docs/rdocs/Longleaf.html +135 -0
- data/docs/rdocs/Longleaf/AppFields.html +178 -0
- data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
- data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
- data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
- data/docs/rdocs/Longleaf/CLI.html +909 -0
- data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
- data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
- data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
- data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
- data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
- data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
- data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
- data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
- data/docs/rdocs/Longleaf/EventError.html +147 -0
- data/docs/rdocs/Longleaf/EventNames.html +163 -0
- data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
- data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
- data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
- data/docs/rdocs/Longleaf/FileRecord.html +716 -0
- data/docs/rdocs/Longleaf/FileSelector.html +901 -0
- data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
- data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
- data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
- data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
- data/docs/rdocs/Longleaf/Logging.html +405 -0
- data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
- data/docs/rdocs/Longleaf/LongleafError.html +139 -0
- data/docs/rdocs/Longleaf/MDFields.html +193 -0
- data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
- data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
- data/docs/rdocs/Longleaf/MetadataError.html +143 -0
- data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
- data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
- data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
- data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
- data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
- data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
- data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
- data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
- data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
- data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
- data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
- data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
- data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
- data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
- data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
- data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
- data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
- data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
- data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
- data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
- data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
- data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
- data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
- data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
- data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
- data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
- data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
- data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
- data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
- data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
- data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
- data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
- data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
- data/docs/rdocs/_index.html +660 -0
- data/docs/rdocs/class_list.html +51 -0
- data/docs/rdocs/css/common.css +1 -0
- data/docs/rdocs/css/full_list.css +58 -0
- data/docs/rdocs/css/style.css +496 -0
- data/docs/rdocs/file.README.html +165 -0
- data/docs/rdocs/file_list.html +56 -0
- data/docs/rdocs/frames.html +17 -0
- data/docs/rdocs/index.html +165 -0
- data/docs/rdocs/js/app.js +303 -0
- data/docs/rdocs/js/full_list.js +216 -0
- data/docs/rdocs/js/jquery.js +4 -0
- data/docs/rdocs/method_list.html +2051 -0
- data/docs/rdocs/top-level-namespace.html +110 -0
- data/lib/longleaf/candidates/file_selector.rb +139 -0
- data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
- data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
- data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
- data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
- data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
- data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
- data/lib/longleaf/cli.rb +237 -46
- data/lib/longleaf/commands/deregister_command.rb +51 -0
- data/lib/longleaf/commands/preserve_command.rb +50 -0
- data/lib/longleaf/commands/register_command.rb +32 -43
- data/lib/longleaf/commands/reindex_command.rb +92 -0
- data/lib/longleaf/commands/validate_config_command.rb +33 -8
- data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
- data/lib/longleaf/errors.rb +26 -7
- data/lib/longleaf/events/deregister_event.rb +53 -0
- data/lib/longleaf/events/event_names.rb +9 -0
- data/lib/longleaf/events/event_status_tracking.rb +59 -0
- data/lib/longleaf/events/preserve_event.rb +81 -0
- data/lib/longleaf/events/register_event.rb +52 -51
- data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
- data/lib/longleaf/helpers/digest_helper.rb +56 -0
- data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
- data/lib/longleaf/helpers/selection_options_parser.rb +189 -0
- data/lib/longleaf/helpers/service_date_helper.rb +78 -0
- data/lib/longleaf/indexing/index_manager.rb +101 -0
- data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
- data/lib/longleaf/logging.rb +5 -4
- data/lib/longleaf/logging/redirecting_logger.rb +26 -25
- data/lib/longleaf/models/app_fields.rb +7 -2
- data/lib/longleaf/models/file_record.rb +17 -8
- data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
- data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
- data/lib/longleaf/models/md_fields.rb +2 -1
- data/lib/longleaf/models/metadata_location.rb +47 -0
- data/lib/longleaf/models/metadata_record.rb +39 -15
- data/lib/longleaf/models/s3_storage_location.rb +133 -0
- data/lib/longleaf/models/service_definition.rb +7 -6
- data/lib/longleaf/models/service_fields.rb +7 -1
- data/lib/longleaf/models/service_record.rb +10 -6
- data/lib/longleaf/models/storage_location.rb +24 -19
- data/lib/longleaf/models/storage_types.rb +9 -0
- data/lib/longleaf/models/system_config_fields.rb +9 -0
- data/lib/longleaf/preservation_services/file_check_service.rb +58 -0
- data/lib/longleaf/preservation_services/fixity_check_service.rb +123 -0
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +182 -0
- data/lib/longleaf/preservation_services/s3_replication_service.rb +143 -0
- data/lib/longleaf/services/application_config_deserializer.rb +81 -24
- data/lib/longleaf/services/application_config_manager.rb +20 -6
- data/lib/longleaf/services/application_config_validator.rb +19 -9
- data/lib/longleaf/services/configuration_validator.rb +67 -4
- data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
- data/lib/longleaf/services/metadata_deserializer.rb +113 -42
- data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
- data/lib/longleaf/services/metadata_serializer.rb +138 -25
- data/lib/longleaf/services/metadata_validator.rb +76 -0
- data/lib/longleaf/services/s3_location_validator.rb +19 -0
- data/lib/longleaf/services/service_class_cache.rb +112 -0
- data/lib/longleaf/services/service_definition_manager.rb +10 -7
- data/lib/longleaf/services/service_definition_validator.rb +25 -18
- data/lib/longleaf/services/service_manager.rb +86 -11
- data/lib/longleaf/services/service_mapping_manager.rb +13 -12
- data/lib/longleaf/services/service_mapping_validator.rb +36 -26
- data/lib/longleaf/services/storage_location_manager.rb +76 -15
- data/lib/longleaf/services/storage_location_validator.rb +49 -35
- data/lib/longleaf/specs/config_builder.rb +47 -23
- data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
- data/lib/longleaf/specs/custom_matchers.rb +9 -0
- data/lib/longleaf/specs/file_helpers.rb +61 -0
- data/lib/longleaf/specs/metadata_builder.rb +92 -0
- data/lib/longleaf/specs/system_config_builder.rb +27 -0
- data/lib/longleaf/version.rb +1 -1
- data/longleaf.gemspec +20 -7
- data/mkdocs.yml +21 -0
- metadata +306 -23
- data/.travis.yml +0 -4
- data/lib/longleaf/commands/abstract_command.rb +0 -37
- data/lib/longleaf/services/storage_path_validator.rb +0 -16
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
require 'longleaf/events/event_names'
|
|
2
|
+
require 'longleaf/logging'
|
|
3
|
+
require 'longleaf/errors'
|
|
4
|
+
require 'longleaf/models/file_record'
|
|
5
|
+
require 'longleaf/models/service_fields'
|
|
6
|
+
require 'longleaf/events/register_event'
|
|
7
|
+
require 'longleaf/candidates/single_digest_provider'
|
|
8
|
+
require 'open3'
|
|
9
|
+
|
|
10
|
+
module Longleaf
|
|
11
|
+
# Preservation service which performs replication of a file to one or more destinations using rsync.
|
|
12
|
+
#
|
|
13
|
+
# The service definition must contain one or more destinations, specified with the "to" property.
|
|
14
|
+
# These destinations must be either a known storage location name, a remote path, or absolute path.
|
|
15
|
+
#
|
|
16
|
+
# Optional service configuration properties:
|
|
17
|
+
# * replica_collision_policy = specifies the desired outcome if the service attempts to replicate
|
|
18
|
+
# a file which already exists at a destination. Default: "replace".
|
|
19
|
+
# * rsync_command = the command to invoke in order to execute rsync. Default: "rsync"
|
|
20
|
+
# * rsync_options = additional parameters that will be passed along to rsync. Cannot include options
|
|
21
|
+
# which change the target of the command or prevent its execution, such as "files-from", "dry-run",
|
|
22
|
+
# "help", etc. Command will always include "-R". Default "-a".
|
|
23
|
+
class RsyncReplicationService
|
|
24
|
+
include Longleaf::Logging
|
|
25
|
+
SF ||= Longleaf::ServiceFields
|
|
26
|
+
|
|
27
|
+
RSYNC_COMMAND_PROPERTY = "rsync_command"
|
|
28
|
+
DEFAULT_COMMAND = "rsync"
|
|
29
|
+
|
|
30
|
+
RSYNC_OPTIONS_PROPERTY = "rsync_options"
|
|
31
|
+
DEFAULT_OPTIONS = "-a"
|
|
32
|
+
DISALLOWED_OPTIONS = ["files-from", "n", "dry-run", "exclude", "exclude-from", "cvs-exclude",
|
|
33
|
+
"h", "help", "f", "F", "filter"]
|
|
34
|
+
|
|
35
|
+
attr_reader :command, :options, :collision_policy
|
|
36
|
+
|
|
37
|
+
# Initialize a RsyncReplicationService from the given service definition
|
|
38
|
+
#
|
|
39
|
+
# @param service_def [ServiceDefinition] the configuration for this service
|
|
40
|
+
# @param app_manager [ApplicationConfigManager] the application configuration
|
|
41
|
+
def initialize(service_def, app_manager)
|
|
42
|
+
@service_def = service_def
|
|
43
|
+
@app_manager = app_manager
|
|
44
|
+
|
|
45
|
+
@command = @service_def.properties[RSYNC_COMMAND_PROPERTY] || DEFAULT_COMMAND
|
|
46
|
+
|
|
47
|
+
# Validate rsync parameters
|
|
48
|
+
@options = @service_def.properties[RSYNC_OPTIONS_PROPERTY] || DEFAULT_OPTIONS
|
|
49
|
+
if contains_disallowed_option?(@options)
|
|
50
|
+
raise ArgumentError.new("Service #{service_def.name} specifies a disallowed rsync paramter," \
|
|
51
|
+
+ " rsync_options may not include the following: #{DISALLOWED_OPTIONS.join(' ')}")
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Add -R (--relative) in to command options to ensure full path gets replicated
|
|
55
|
+
@options = @options + " -R"
|
|
56
|
+
|
|
57
|
+
# Set and validate the replica collision policy
|
|
58
|
+
@collision_policy = @service_def.properties[SF::COLLISION_PROPERTY] || SF::DEFAULT_COLLISION_POLICY
|
|
59
|
+
if !SF::VALID_COLLISION_POLICIES.include?(@collision_policy)
|
|
60
|
+
raise ArgumentError.new("Service #{service_def.name} received invalid #{SF::COLLISION_PROPERTY}" \
|
|
61
|
+
+ " value #{@collision_policy}")
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Store and validate destinations
|
|
65
|
+
replicate_to = @service_def.properties[SF::REPLICATE_TO]
|
|
66
|
+
if replicate_to.nil? || replicate_to.empty?
|
|
67
|
+
raise ArgumentError.new("Service #{service_def.name} must provide one or more replication destinations.")
|
|
68
|
+
end
|
|
69
|
+
replicate_to = [replicate_to] if replicate_to.is_a?(String)
|
|
70
|
+
|
|
71
|
+
loc_manager = app_manager.location_manager
|
|
72
|
+
# Build list of destinations, translating to storage locations when relevant
|
|
73
|
+
@destinations = Array.new
|
|
74
|
+
replicate_to.each do |dest|
|
|
75
|
+
# Assume that if destination contains a : or / it is a path rather than storage location
|
|
76
|
+
if dest =~ /[:\/]/
|
|
77
|
+
@destinations << dest
|
|
78
|
+
else
|
|
79
|
+
if loc_manager.locations.key?(dest)
|
|
80
|
+
@destinations << loc_manager.locations[dest]
|
|
81
|
+
else
|
|
82
|
+
raise ArgumentError.new("Service #{service_def.name} specifies unknown storage location '#{dest}'" \
|
|
83
|
+
+ " as a replication destination")
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# During a replication event, perform replication of the specified file to all configured destinations
|
|
90
|
+
# as necessary.
|
|
91
|
+
#
|
|
92
|
+
# @param file_rec [FileRecord] record representing the file to perform the service on.
|
|
93
|
+
# @param event [String] name of the event this service is being invoked by.
|
|
94
|
+
# @raise [PreservationServiceError] if the rsync replication fails
|
|
95
|
+
def perform(file_rec, event)
|
|
96
|
+
@destinations.each do |destination|
|
|
97
|
+
dest_is_storage_loc = destination.is_a?(Longleaf::StorageLocation)
|
|
98
|
+
|
|
99
|
+
if dest_is_storage_loc
|
|
100
|
+
dest_path = destination.path
|
|
101
|
+
else
|
|
102
|
+
dest_path = destination
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Determine the path to the file being replicated relative to its storage location
|
|
106
|
+
rel_path = file_rec.storage_location.relativize(file_rec.path)
|
|
107
|
+
# source path with . so that rsync will only create destination directories starting from that point
|
|
108
|
+
source_path = File.join(file_rec.storage_location.path, "./#{rel_path}")
|
|
109
|
+
|
|
110
|
+
# Check that the destination is available because attempting to write
|
|
111
|
+
verify_destination_available(destination, file_rec)
|
|
112
|
+
|
|
113
|
+
logger.debug("Invoking rsync with command: #{@command} \"#{source_path}\" \"#{dest_path}\" #{@options}")
|
|
114
|
+
stdout, stderr, status = Open3.capture3("#{@command} \"#{source_path}\" \"#{dest_path}\" #{@options}")
|
|
115
|
+
raise PreservationServiceError.new("Failed to replicate #{file_rec.path} to #{dest_path}: #{stderr}") \
|
|
116
|
+
unless status.success?
|
|
117
|
+
|
|
118
|
+
logger.info("Replicated #{file_rec.path} to destination #{dest_path}")
|
|
119
|
+
|
|
120
|
+
# For destinations which are storage locations, register the replica with longleaf
|
|
121
|
+
if dest_is_storage_loc
|
|
122
|
+
register_replica(destination, rel_path, file_rec)
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Determine if this service is applicable for the provided event, given the configured service definition
|
|
128
|
+
#
|
|
129
|
+
# @param event [String] name of the event
|
|
130
|
+
# @return [Boolean] returns true if this service is applicable for the provided event
|
|
131
|
+
def is_applicable?(event)
|
|
132
|
+
case event
|
|
133
|
+
when EventNames::PRESERVE
|
|
134
|
+
true
|
|
135
|
+
else
|
|
136
|
+
false
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
private
|
|
141
|
+
def contains_disallowed_option?(options)
|
|
142
|
+
DISALLOWED_OPTIONS.each do |disallowed|
|
|
143
|
+
if disallowed.length == 1
|
|
144
|
+
if options =~ /(\A| )-[a-zA-Z0-9]*#{disallowed}[a-zA-Z0-9]*( |=|\z)/
|
|
145
|
+
return true
|
|
146
|
+
end
|
|
147
|
+
else
|
|
148
|
+
if options =~ /(\A| )--#{disallowed}( |=|\z)/
|
|
149
|
+
return true
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
false
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def verify_destination_available(destination, file_rec)
|
|
158
|
+
if destination.is_a?(Longleaf::StorageLocation)
|
|
159
|
+
begin
|
|
160
|
+
destination.available?
|
|
161
|
+
rescue StorageLocationUnavailableError => e
|
|
162
|
+
raise StorageLocationUnavailableError.new("Cannot replicate #{file_rec.path} to destination #{destination.name}: " \
|
|
163
|
+
+ e.message)
|
|
164
|
+
end
|
|
165
|
+
elsif destination.start_with?("/")
|
|
166
|
+
raise StorageLocationUnavailableError.new("Cannot replicate #{file_rec.path} to destination" \
|
|
167
|
+
+ " #{destination}, path does not exist.") unless Dir.exist?(destination)
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def register_replica(destination, rel_path, file_rec)
|
|
172
|
+
dest_file_path = File.join(destination.path, rel_path)
|
|
173
|
+
dest_file_rec = FileRecord.new(dest_file_path, destination)
|
|
174
|
+
|
|
175
|
+
register_event = RegisterEvent.new(file_rec: dest_file_rec,
|
|
176
|
+
app_manager: @app_manager,
|
|
177
|
+
force: true,
|
|
178
|
+
digest_provider: SingleDigestProvider.new(file_rec.metadata_record.checksums))
|
|
179
|
+
register_event.perform
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
end
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
require 'longleaf/events/event_names'
|
|
2
|
+
require 'longleaf/logging'
|
|
3
|
+
require 'longleaf/errors'
|
|
4
|
+
require 'longleaf/models/file_record'
|
|
5
|
+
require 'longleaf/models/service_fields'
|
|
6
|
+
require 'longleaf/events/register_event'
|
|
7
|
+
require 'longleaf/models/storage_types'
|
|
8
|
+
require 'aws-sdk-s3'
|
|
9
|
+
|
|
10
|
+
module Longleaf
|
|
11
|
+
# Preservation service which performs replication of a file to one or more s3 destinations.
|
|
12
|
+
#
|
|
13
|
+
# The service definition must contain one or more destinations, specified with the "to" property.
|
|
14
|
+
# These destinations must be either a known s3 storage location. The s3 client configuration
|
|
15
|
+
# is controlled by the storage location.
|
|
16
|
+
#
|
|
17
|
+
# Optional service configuration properties:
|
|
18
|
+
# * replica_collision_policy = specifies the desired outcome if the service attempts to replicate
|
|
19
|
+
# a file which already exists at a destination. Default: "replace".
|
|
20
|
+
class S3ReplicationService
|
|
21
|
+
include Longleaf::Logging
|
|
22
|
+
ST ||= Longleaf::StorageTypes
|
|
23
|
+
SF ||= Longleaf::ServiceFields
|
|
24
|
+
|
|
25
|
+
attr_reader :collision_policy
|
|
26
|
+
|
|
27
|
+
# Initialize a S3ReplicationService from the given service definition
|
|
28
|
+
#
|
|
29
|
+
# @param service_def [ServiceDefinition] the configuration for this service
|
|
30
|
+
# @param app_manager [ApplicationConfigManager] the application configuration
|
|
31
|
+
def initialize(service_def, app_manager)
|
|
32
|
+
@service_def = service_def
|
|
33
|
+
@app_manager = app_manager
|
|
34
|
+
|
|
35
|
+
# Set and validate the replica collision policy
|
|
36
|
+
@collision_policy = @service_def.properties[SF::COLLISION_PROPERTY] || SF::DEFAULT_COLLISION_POLICY
|
|
37
|
+
if !SF::VALID_COLLISION_POLICIES.include?(@collision_policy)
|
|
38
|
+
raise ArgumentError.new("Service #{service_def.name} received invalid #{SF::COLLISION_PROPERTY}" \
|
|
39
|
+
+ " value #{@collision_policy}")
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Store and validate destinations
|
|
43
|
+
replicate_to = @service_def.properties[SF::REPLICATE_TO]
|
|
44
|
+
if replicate_to.nil? || replicate_to.empty?
|
|
45
|
+
raise ArgumentError.new("Service #{service_def.name} must provide one or more replication destinations.")
|
|
46
|
+
end
|
|
47
|
+
replicate_to = [replicate_to] if replicate_to.is_a?(String)
|
|
48
|
+
|
|
49
|
+
loc_manager = app_manager.location_manager
|
|
50
|
+
# Build list of destinations, translating to storage locations when relevant
|
|
51
|
+
@destinations = Array.new
|
|
52
|
+
replicate_to.each do |dest|
|
|
53
|
+
if loc_manager.locations.key?(dest)
|
|
54
|
+
location = loc_manager.locations[dest]
|
|
55
|
+
if location.type != ST::S3_STORAGE_TYPE
|
|
56
|
+
raise ArgumentError.new(
|
|
57
|
+
"Service #{service_def.name} specifies destination #{dest} which is not of type 's3'")
|
|
58
|
+
end
|
|
59
|
+
@destinations << loc_manager.locations[dest]
|
|
60
|
+
else
|
|
61
|
+
raise ArgumentError.new("Service #{service_def.name} specifies unknown storage location '#{dest}'" \
|
|
62
|
+
+ " as a replication destination")
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# During a replication event, perform replication of the specified file to all configured destinations
|
|
68
|
+
# as necessary.
|
|
69
|
+
#
|
|
70
|
+
# @param file_rec [FileRecord] record representing the file to perform the service on.
|
|
71
|
+
# @param event [String] name of the event this service is being invoked by.
|
|
72
|
+
# @raise [PreservationServiceError] if the rsync replication fails
|
|
73
|
+
def perform(file_rec, event)
|
|
74
|
+
if file_rec.storage_location.type == ST::FILESYSTEM_STORAGE_TYPE
|
|
75
|
+
replicate_from_fs(file_rec)
|
|
76
|
+
else
|
|
77
|
+
raise PreservationServiceError.new("Replication from storage location of type " \
|
|
78
|
+
+ "#{file_rec.storage_location.type} to s3 is not supported")
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def replicate_from_fs(file_rec)
|
|
83
|
+
# Determine the path to the file being replicated relative to its storage location
|
|
84
|
+
rel_path = file_rec.storage_location.relativize(file_rec.path)
|
|
85
|
+
|
|
86
|
+
content_md5 = get_content_md5(file_rec)
|
|
87
|
+
|
|
88
|
+
@destinations.each do |destination|
|
|
89
|
+
# Check that the destination is available before attempting to write
|
|
90
|
+
verify_destination_available(destination, file_rec)
|
|
91
|
+
|
|
92
|
+
rel_to_bucket = destination.relative_to_bucket_path(rel_path)
|
|
93
|
+
file_obj = destination.s3_bucket.object(rel_to_bucket)
|
|
94
|
+
begin
|
|
95
|
+
file_obj.upload_file(file_rec.path, { :content_md5 => content_md5 })
|
|
96
|
+
rescue Aws::S3::Errors::BadDigest => e
|
|
97
|
+
raise ChecksumMismatchError.new("Transfer to bucket '#{destination.s3_bucket.name}' failed, " \
|
|
98
|
+
+ "MD5 provided did not match the received content for #{file_rec.path}")
|
|
99
|
+
rescue Aws::Errors::ServiceError => e
|
|
100
|
+
raise PreservationServiceError.new("Failed to transfer #{file_rec.path} to bucket " \
|
|
101
|
+
+ "'#{destination.s3_bucket.name}': #{e.message}")
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
logger.info("Replicated #{file_rec.path} to destination #{file_obj.public_url}")
|
|
105
|
+
|
|
106
|
+
# TODO register file in destination
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Determine if this service is applicable for the provided event, given the configured service definition
|
|
111
|
+
#
|
|
112
|
+
# @param event [String] name of the event
|
|
113
|
+
# @return [Boolean] returns true if this service is applicable for the provided event
|
|
114
|
+
def is_applicable?(event)
|
|
115
|
+
case event
|
|
116
|
+
when EventNames::PRESERVE
|
|
117
|
+
true
|
|
118
|
+
else
|
|
119
|
+
false
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
private
|
|
124
|
+
def verify_destination_available(destination, file_rec)
|
|
125
|
+
begin
|
|
126
|
+
destination.available?
|
|
127
|
+
rescue StorageLocationUnavailableError => e
|
|
128
|
+
raise StorageLocationUnavailableError.new("Cannot replicate #{file_rec.path} to destination #{destination.name}: " \
|
|
129
|
+
+ e.message)
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def get_content_md5(file_rec)
|
|
134
|
+
md_rec = file_rec.metadata_record
|
|
135
|
+
if md_rec.checksums.key?('md5')
|
|
136
|
+
# base 64 encode the digest, as is required by the Content-Md5 header
|
|
137
|
+
[[md_rec.checksums['md5']].pack("H*")].pack("m0")
|
|
138
|
+
else
|
|
139
|
+
nil
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
end
|
|
@@ -1,46 +1,103 @@
|
|
|
1
1
|
require 'longleaf/services/application_config_validator'
|
|
2
2
|
require 'longleaf/services/application_config_manager'
|
|
3
|
+
require 'digest/md5'
|
|
4
|
+
require 'pathname'
|
|
3
5
|
|
|
4
|
-
# Deserializer for application configuration files
|
|
5
6
|
module Longleaf
|
|
7
|
+
# Deserializer for application configuration files
|
|
6
8
|
class ApplicationConfigDeserializer
|
|
7
|
-
|
|
9
|
+
AF ||= Longleaf::AppFields
|
|
10
|
+
|
|
8
11
|
# Deserializes a valid application configuration file as a ApplicationConfigManager option
|
|
9
|
-
# @param config_path [String] file path to the
|
|
12
|
+
# @param config_path [String] file path to the service and storage mapping configuration file
|
|
10
13
|
# @param format [String] encoding format of the config file
|
|
11
|
-
# return [
|
|
14
|
+
# return [ApplicationConfigManager] manager for the loaded configuration
|
|
12
15
|
def self.deserialize(config_path, format: 'yaml')
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
16
|
+
content = load_config_file(config_path)
|
|
17
|
+
config = load(content, format)
|
|
18
|
+
|
|
19
|
+
config_md5 = Digest::MD5.hexdigest(content)
|
|
20
|
+
|
|
21
|
+
make_paths_absolute(config_path, config)
|
|
22
|
+
|
|
23
|
+
ApplicationConfigValidator.new(config).validate_config.raise_if_invalid
|
|
24
|
+
ApplicationConfigManager.new(config, config_md5)
|
|
17
25
|
end
|
|
18
|
-
|
|
26
|
+
|
|
27
|
+
def self.load_config_file(config_path)
|
|
28
|
+
begin
|
|
29
|
+
File.read(config_path)
|
|
30
|
+
rescue Errno::ENOENT
|
|
31
|
+
raise Longleaf::ConfigurationError.new(
|
|
32
|
+
"Configuration file #{config_path} does not exist.")
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
19
36
|
# Deserialize a configuration file into a hash
|
|
20
|
-
# @param
|
|
37
|
+
# @param content [String] the contents of the application configuration file
|
|
21
38
|
# @param format [String] encoding format of the config file
|
|
22
39
|
# return [Hash] hash containing the configuration
|
|
23
|
-
def self.load(
|
|
40
|
+
def self.load(content, format)
|
|
24
41
|
case format
|
|
25
42
|
when 'yaml'
|
|
26
|
-
from_yaml(
|
|
43
|
+
from_yaml(content)
|
|
27
44
|
else
|
|
28
|
-
raise ArgumentError.new(
|
|
45
|
+
raise ArgumentError.new("Invalid deserialization format #{format} specified")
|
|
29
46
|
end
|
|
30
47
|
end
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def self.from_yaml(config_path)
|
|
48
|
+
|
|
49
|
+
def self.from_yaml(content)
|
|
34
50
|
begin
|
|
35
|
-
YAML.
|
|
36
|
-
rescue Errno::ENOENT => err
|
|
37
|
-
raise Longleaf::ConfigurationError.new(
|
|
38
|
-
"Cannot load application configuration, file #{config_path} does not exist.")
|
|
51
|
+
YAML.safe_load(content, [], [], true)
|
|
39
52
|
rescue => err
|
|
40
|
-
raise Longleaf::ConfigurationError.new(
|
|
41
|
-
%Q(Failed to load application configuration due to the following reason:
|
|
42
|
-
#{err.message}))
|
|
53
|
+
raise Longleaf::ConfigurationError.new(err)
|
|
43
54
|
end
|
|
44
55
|
end
|
|
56
|
+
|
|
57
|
+
def self.make_paths_absolute(config_path, config)
|
|
58
|
+
base_pathname = Pathname.new(config_path).expand_path.parent
|
|
59
|
+
|
|
60
|
+
config[AF::LOCATIONS].each do |name, properties|
|
|
61
|
+
properties[AF::LOCATION_PATH] = make_file_paths_absolute(base_pathname, properties)
|
|
62
|
+
|
|
63
|
+
# Resolve single field metadata location into expanded form
|
|
64
|
+
md_config = properties[AF::METADATA_CONFIG]
|
|
65
|
+
if md_config.nil?
|
|
66
|
+
next
|
|
67
|
+
end
|
|
68
|
+
if md_config.is_a?(String)
|
|
69
|
+
md_config = { AF::LOCATION => m_config }
|
|
70
|
+
end
|
|
71
|
+
md_config[AF::LOCATION_PATH] = make_file_paths_absolute(base_pathname, md_config)
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def self.make_file_paths_absolute(base_pathname, properties)
|
|
76
|
+
path = properties[AF::LOCATION_PATH]
|
|
77
|
+
return nil if path.nil?
|
|
78
|
+
|
|
79
|
+
uri = URI(path)
|
|
80
|
+
|
|
81
|
+
if uri.scheme.nil? || uri.scheme.casecmp("file") == 0
|
|
82
|
+
absolution(base_pathname, uri.path)
|
|
83
|
+
else
|
|
84
|
+
path
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def self.absolution(base_pathname, file_path)
|
|
89
|
+
if file_path.nil?
|
|
90
|
+
nil
|
|
91
|
+
else
|
|
92
|
+
path = Pathname.new(file_path)
|
|
93
|
+
if path.absolute?
|
|
94
|
+
path = path.expand_path.to_s
|
|
95
|
+
else
|
|
96
|
+
path = (base_pathname + path).to_s
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
private_class_method :load_config_file
|
|
45
102
|
end
|
|
46
|
-
end
|
|
103
|
+
end
|